From 3f97531b2c319fe4a460e5d2317fcedf3b0b058a Mon Sep 17 00:00:00 2001 From: Anton Date: Fri, 1 May 2026 12:29:49 +0200 Subject: [PATCH 1/5] feat: live UI server, vendored C library, marketing README MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three changes that work together: 1. Visualization is now a live HTTP server, not a static file. The `` is gone — interactive state (filter selections, scroll, click highlights) survives across data updates. The `visualize` subcommand starts a tokio + axum server with GET / (page), GET /data (JSON), GET /events (SSE). A background task re-scans every --interval seconds and only emits a `data-changed` event when the new content hash differs from the previous one. The page subscribes to /events and reloads only on real change. 2. The C library is now vendored under crates/rayforce-sys/vendor/. build.rs compiles it via `cc::Build` on every fresh build — no external checkout, no submodule, no env var. RAYFORCE_DIR still works as an override for C-side development. 3. README rewritten as a product page (~100 lines). Drops the wall of `cargo run -q -p ... -- ...` commands and example outputs that made it look like an internal scratchpad. Leads with what raysense does, why it matters, and the three commands you actually need. --- README.md | 372 +- crates/rayforce-sys/Cargo.toml | 1 + crates/rayforce-sys/build.rs | 127 +- crates/rayforce-sys/vendor/rayforce/LICENSE | 21 + .../vendor/rayforce/include/rayforce.h | 418 ++ .../vendor/rayforce/src/core/block.c | 82 + .../vendor/rayforce/src/core/block.h | 45 + .../vendor/rayforce/src/core/epoll.c | 250 + .../vendor/rayforce/src/core/iocp.c | 60 + .../vendor/rayforce/src/core/ipc.c | 1117 +++ .../vendor/rayforce/src/core/ipc.h | 96 + .../vendor/rayforce/src/core/kqueue.c | 248 + .../vendor/rayforce/src/core/morsel.c | 122 + .../vendor/rayforce/src/core/morsel.h | 41 + .../vendor/rayforce/src/core/numparse.c | 452 ++ .../vendor/rayforce/src/core/numparse.h | 77 + .../vendor/rayforce/src/core/platform.c | 464 ++ .../vendor/rayforce/src/core/platform.h | 178 + .../vendor/rayforce/src/core/poll.c | 122 + .../vendor/rayforce/src/core/poll.h | 115 + .../vendor/rayforce/src/core/pool.c | 504 ++ .../vendor/rayforce/src/core/pool.h | 95 + .../vendor/rayforce/src/core/profile.h | 161 + .../vendor/rayforce/src/core/progress.c | 170 + .../vendor/rayforce/src/core/runtime.c | 367 + .../vendor/rayforce/src/core/runtime.h | 136 + .../vendor/rayforce/src/core/sock.c | 201 + .../vendor/rayforce/src/core/sock.h | 47 + .../vendor/rayforce/src/core/types.c | 57 + .../vendor/rayforce/src/core/types.h | 45 + .../rayforce-sys/vendor/rayforce/src/io/csv.c | 1821 +++++ .../rayforce-sys/vendor/rayforce/src/io/csv.h | 34 + .../vendor/rayforce/src/lang/cal.h | 84 + .../vendor/rayforce/src/lang/compile.c | 518 ++ .../vendor/rayforce/src/lang/env.c | 658 ++ .../vendor/rayforce/src/lang/env.h | 118 + .../vendor/rayforce/src/lang/eval.c | 2626 +++++++ .../vendor/rayforce/src/lang/eval.h | 298 + .../vendor/rayforce/src/lang/format.c | 1074 +++ .../vendor/rayforce/src/lang/format.h | 50 + .../vendor/rayforce/src/lang/internal.h | 514 ++ .../vendor/rayforce/src/lang/nfo.c | 100 + .../vendor/rayforce/src/lang/nfo.h | 69 + .../vendor/rayforce/src/lang/parse.c | 881 +++ .../vendor/rayforce/src/lang/parse.h | 39 + .../vendor/rayforce/src/lang/syscmd.c | 359 + .../vendor/rayforce/src/lang/syscmd.h | 103 + .../vendor/rayforce/src/mem/arena.c | 160 + .../vendor/rayforce/src/mem/arena.h | 60 + .../vendor/rayforce/src/mem/cow.c | 79 + .../vendor/rayforce/src/mem/cow.h | 43 + .../vendor/rayforce/src/mem/heap.c | 1601 +++++ .../vendor/rayforce/src/mem/heap.h | 404 ++ .../vendor/rayforce/src/mem/sys.c | 122 + .../vendor/rayforce/src/mem/sys.h | 49 + .../vendor/rayforce/src/ops/agg.c | 509 ++ .../vendor/rayforce/src/ops/arith.c | 422 ++ .../vendor/rayforce/src/ops/builtins.c | 2681 +++++++ .../vendor/rayforce/src/ops/cmp.c | 330 + .../vendor/rayforce/src/ops/collection.c | 2040 ++++++ .../vendor/rayforce/src/ops/datalog.c | 4325 +++++++++++ .../vendor/rayforce/src/ops/datalog.h | 344 + .../vendor/rayforce/src/ops/dump.c | 254 + .../vendor/rayforce/src/ops/embedding.c | 870 +++ .../vendor/rayforce/src/ops/exec.c | 2272 ++++++ .../vendor/rayforce/src/ops/exec.h | 29 + .../vendor/rayforce/src/ops/expr.c | 1776 +++++ .../vendor/rayforce/src/ops/filter.c | 685 ++ .../vendor/rayforce/src/ops/fuse.c | 210 + .../vendor/rayforce/src/ops/fuse.h | 29 + .../vendor/rayforce/src/ops/fvec.c | 101 + .../vendor/rayforce/src/ops/fvec.h | 52 + .../vendor/rayforce/src/ops/glob.c | 102 + .../vendor/rayforce/src/ops/glob.h | 43 + .../vendor/rayforce/src/ops/graph.c | 1822 +++++ .../vendor/rayforce/src/ops/graph.h | 29 + .../vendor/rayforce/src/ops/group.c | 4392 ++++++++++++ .../vendor/rayforce/src/ops/hash.h | 252 + .../vendor/rayforce/src/ops/idxop.c | 734 ++ .../vendor/rayforce/src/ops/idxop.h | 171 + .../vendor/rayforce/src/ops/internal.h | 992 +++ .../vendor/rayforce/src/ops/join.c | 1972 +++++ .../vendor/rayforce/src/ops/journal.c | 191 + .../vendor/rayforce/src/ops/journal.h | 64 + .../vendor/rayforce/src/ops/lftj.c | 258 + .../vendor/rayforce/src/ops/lftj.h | 136 + .../vendor/rayforce/src/ops/linkop.c | 328 + .../vendor/rayforce/src/ops/linkop.h | 105 + .../vendor/rayforce/src/ops/ops.h | 726 ++ .../vendor/rayforce/src/ops/opt.c | 2031 ++++++ .../vendor/rayforce/src/ops/opt.h | 29 + .../vendor/rayforce/src/ops/pipe.c | 63 + .../vendor/rayforce/src/ops/pipe.h | 43 + .../vendor/rayforce/src/ops/pivot.c | 666 ++ .../vendor/rayforce/src/ops/plan.c | 31 + .../vendor/rayforce/src/ops/plan.h | 29 + .../vendor/rayforce/src/ops/query.c | 6329 +++++++++++++++++ .../vendor/rayforce/src/ops/rerank.c | 546 ++ .../vendor/rayforce/src/ops/rowsel.c | 445 ++ .../vendor/rayforce/src/ops/rowsel.h | 187 + .../vendor/rayforce/src/ops/sort.c | 3682 ++++++++++ .../vendor/rayforce/src/ops/string.c | 604 ++ .../vendor/rayforce/src/ops/strop.c | 281 + .../vendor/rayforce/src/ops/system.c | 827 +++ .../vendor/rayforce/src/ops/tblop.c | 948 +++ .../vendor/rayforce/src/ops/temporal.c | 665 ++ .../vendor/rayforce/src/ops/temporal.h | 84 + .../vendor/rayforce/src/ops/traverse.c | 2641 +++++++ .../vendor/rayforce/src/ops/window.c | 1223 ++++ .../vendor/rayforce/src/store/col.c | 954 +++ .../vendor/rayforce/src/store/col.h | 34 + .../vendor/rayforce/src/store/csr.c | 529 ++ .../vendor/rayforce/src/store/csr.h | 79 + .../vendor/rayforce/src/store/fileio.c | 270 + .../vendor/rayforce/src/store/fileio.h | 54 + .../vendor/rayforce/src/store/hnsw.c | 972 +++ .../vendor/rayforce/src/store/hnsw.h | 133 + .../vendor/rayforce/src/store/journal.c | 656 ++ .../vendor/rayforce/src/store/journal.h | 123 + .../vendor/rayforce/src/store/meta.c | 43 + .../vendor/rayforce/src/store/meta.h | 33 + .../vendor/rayforce/src/store/part.c | 503 ++ .../vendor/rayforce/src/store/part.h | 33 + .../vendor/rayforce/src/store/serde.c | 984 +++ .../vendor/rayforce/src/store/serde.h | 81 + .../vendor/rayforce/src/store/splay.c | 229 + .../vendor/rayforce/src/store/splay.h | 34 + .../vendor/rayforce/src/table/dict.c | 609 ++ .../vendor/rayforce/src/table/dict.h | 68 + .../vendor/rayforce/src/table/sym.c | 1251 ++++ .../vendor/rayforce/src/table/sym.h | 139 + .../vendor/rayforce/src/table/table.c | 238 + .../vendor/rayforce/src/table/table.h | 40 + .../vendor/rayforce/src/vec/atom.c | 208 + .../vendor/rayforce/src/vec/atom.h | 36 + .../vendor/rayforce/src/vec/embedding.h | 38 + .../vendor/rayforce/src/vec/list.c | 299 + .../vendor/rayforce/src/vec/list.h | 36 + .../vendor/rayforce/src/vec/sel.c | 190 + .../vendor/rayforce/src/vec/str.c | 90 + .../vendor/rayforce/src/vec/str.h | 103 + .../vendor/rayforce/src/vec/vec.c | 1361 ++++ .../vendor/rayforce/src/vec/vec.h | 58 + crates/raysense-cli/Cargo.toml | 4 + crates/raysense-cli/src/lib.rs | 204 +- 145 files changed, 79003 insertions(+), 368 deletions(-) create mode 100644 crates/rayforce-sys/vendor/rayforce/LICENSE create mode 100644 crates/rayforce-sys/vendor/rayforce/include/rayforce.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/block.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/block.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/epoll.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/iocp.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/ipc.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/ipc.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/kqueue.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/morsel.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/morsel.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/numparse.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/numparse.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/platform.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/platform.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/poll.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/poll.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/pool.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/pool.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/profile.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/progress.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/runtime.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/runtime.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/sock.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/sock.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/types.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/core/types.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/io/csv.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/io/csv.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/lang/cal.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/lang/compile.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/lang/env.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/lang/env.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/lang/eval.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/lang/eval.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/lang/format.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/lang/format.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/lang/internal.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/lang/nfo.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/lang/nfo.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/lang/parse.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/lang/parse.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/lang/syscmd.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/lang/syscmd.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/mem/arena.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/mem/arena.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/mem/cow.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/mem/cow.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/mem/heap.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/mem/heap.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/mem/sys.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/mem/sys.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/agg.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/arith.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/builtins.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/cmp.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/collection.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/datalog.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/datalog.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/dump.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/embedding.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/exec.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/exec.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/expr.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/filter.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/fuse.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/fuse.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/fvec.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/fvec.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/glob.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/glob.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/graph.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/graph.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/group.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/hash.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/idxop.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/idxop.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/internal.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/join.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/journal.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/journal.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/lftj.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/lftj.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/linkop.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/linkop.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/ops.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/opt.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/opt.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/pipe.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/pipe.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/pivot.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/plan.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/plan.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/query.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/rerank.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/rowsel.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/rowsel.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/sort.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/string.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/strop.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/system.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/tblop.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/temporal.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/temporal.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/traverse.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/ops/window.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/store/col.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/store/col.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/store/csr.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/store/csr.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/store/fileio.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/store/fileio.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/store/hnsw.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/store/hnsw.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/store/journal.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/store/journal.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/store/meta.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/store/meta.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/store/part.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/store/part.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/store/serde.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/store/serde.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/store/splay.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/store/splay.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/table/dict.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/table/dict.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/table/sym.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/table/sym.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/table/table.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/table/table.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/vec/atom.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/vec/atom.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/vec/embedding.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/vec/list.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/vec/list.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/vec/sel.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/vec/str.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/vec/str.h create mode 100644 crates/rayforce-sys/vendor/rayforce/src/vec/vec.c create mode 100644 crates/rayforce-sys/vendor/rayforce/src/vec/vec.h diff --git a/README.md b/README.md index c313d4c..4017b72 100644 --- a/README.md +++ b/README.md @@ -23,347 +23,97 @@ # Raysense -Raysense is local architectural telemetry for AI coding agents. +**Architectural X-ray for your codebase. Live, local, agent-ready.** -It scans a repository, extracts files/functions/imports, resolves local -dependency edges, classifies imports, computes graph health, and can materialize -the scan into Rayforce-backed memory tables. +Point Raysense at a repository and it tells you, in seconds, where the +load-bearing files are, which modules are tangled, where complexity is +hiding, and which parts of the codebase are bus-factor-of-one. It runs +locally, ships zero data anywhere, and exposes everything to AI coding +agents through MCP. -## Current Test Commands +## Why -```bash -cargo run -q -p raysense-cli -- health . -cargo run -q -p raysense-cli -- edges . -cargo run -q -p raysense-cli -- observe . --memory -``` +LLM coding agents read source one file at a time. They don't see the +*shape* of your project: the cycles, the god files, the dead code, the +files that change together every commit. Raysense computes that shape +once and serves it back as queryable structure — to your agents, to +your CI, and to a live dashboard you can keep open while you work. -Against Rayforce from this workspace layout: +## Install ```bash -cargo run -q -p raysense-cli -- health ../rayforce -cargo run -q -p raysense-cli -- edges ../rayforce | head -cargo run -q -p raysense-cli -- observe ../rayforce --memory -cargo run -q -p raysense-cli -- baseline save ../rayforce -cargo run -q -p raysense-cli -- baseline diff ../rayforce -``` - -Current Rayforce baseline: - -```text -score 77 -quality_signal 7708 -coverage_score 100 -structural_score 72 -facts files=190 functions=2662 calls=25704 call_edges=15492 imports=1039 -entry_points total=50 binaries=6 examples=4 tests=40 -imports local=657 external=0 system=382 unresolved=0 -graph resolved_edges=657 cycles=0 -coupling local_edges=657 cross_module_edges=240 cross_module_ratio=0.365 cross_unstable_edges=200 cross_unstable_ratio=0.304 entropy=0.824 entropy_bits=3.201 entropy_pairs=15 average_module_cohesion=0.667 cohesive_module_count=18 god_files=2 unstable_hotspots=4 -calls total=25704 resolved_edges=15492 resolution_ratio=0.603 max_function_fan_in=2537 max_function_fan_out=293 -size max_file_lines=6329 max_function_lines=2334 large_files=63 long_functions=209 -test_gap production_files=150 test_files=40 files_without_nearby_tests=150 -dsm modules=5 module_edges=240 -root_causes modularity=0.635 acyclicity=1.000 depth=1.000 equality=0.450 redundancy=0.952 -architecture depth=3 max_blast_radius=25 max_blast_radius_file=src/ops/query.c max_non_foundation_blast_radius=12 max_non_foundation_blast_radius_file=src/runtime/eval.c attack_surface_files=45 attack_surface_ratio=0.703 upward_violations=3 upward_violation_ratio=0.012 average_distance_from_main_sequence=0.214 -complexity max=131 avg=3.904 gini=0.550 dead_functions=50 duplicate_groups=20 redundancy_ratio=0.048 -evolution available=true commits_sampled=500 changed_files=190 -rules warnings=7 info=31 +cargo install raysense ``` -## Commands +Or build from source — see [Building](#building) below. -Install from crates.io after building a local Rayforce library: +## Use -```sh -git clone git@github.com:RayforceDB/rayforce.git -make -C rayforce lib -RAYFORCE_DIR="$PWD/rayforce" cargo install raysense -``` +Three things, one binary. -For library use: +**Live dashboard.** Open it once, leave it open. Updates the moment your +code does, never on a fixed timer. -```sh -cargo add raysense -``` - -```text -raysense observe [--json] [--memory] [--config ] -raysense health [--json] [--config ] -raysense edges [--all] [--config ] -raysense memory [--config ] -raysense check [path] [--json] [--sarif ] [--config ] -raysense gate [path] [--save] [--baseline ] [--json] [--config ] -raysense watch [path] [--interval ] [--config ] -raysense visualize [path] [--watch] [--interval ] [--output ] [--config ] -raysense plugin list [path] [--config ] -raysense plugin add [--file-name ] [--path ] [--config ] -raysense plugin add-standard [--path ] [--config ] -raysense plugin remove [--path ] [--config ] -raysense plugin validate [--json] -raysense plugin scaffold [--path ] -raysense plugin init [--path ] [--config ] -raysense policy list -raysense policy init [path] [--config ] -raysense trend record [path] [--config ] -raysense trend show [path] [--json] [--config ] -raysense remediate [path] [--json] [--config ] -raysense what-if [path] [--ignore ] [--generated ] [--json] [--config ] -raysense baseline save [--output ] [--config ] -raysense baseline diff [--baseline ] [--config ] [--json] -raysense baseline tables [--baseline ] [--json] -raysense baseline table [--baseline ] [--columns ] [--filter ] [--filter-mode ] [--sort ] [--desc] [--offset ] [--limit ] [--json] -raysense mcp -raysense rayforce-version +```bash +raysense visualize . ``` -If `/.raysense.toml` exists, health-producing commands load it -automatically. `--config` overrides that path. -Project-local plugin manifests under `.raysense/plugins/*/plugin.toml` are also -loaded during scans, using the same fields as `[[scan.plugins]]`. -When `.raysense/plugins//queries/tags.scm` is present and the plugin -selects a compiled grammar with `grammar = "rust"`, `c`, `cpp`, `python`, or -`typescript`, or with `grammar_path` and optional `grammar_symbol`, Raysense -uses query captures for functions and imports before falling back to token -prefixes. - -`raysense mcp` runs a stdio MCP server for agents. It exposes tools to read and -write config, run health, inspect scan facts, list dependency edges, read -hotspots, read rule findings, read DSM module edges, inspect architecture, -coupling, cycles, hottest files/functions, blast radius, module levels, run -what-if config simulations, and materialize memory table summaries. It can also -write visualization dashboards, emit SARIF reports, apply policy presets, -save/diff baselines, and query saved baseline tables with projection, filters, -sorting, and pagination. Agent session tools can save an in-memory baseline, -rescan, end the session, check rules, inspect evolution, inspect DSM data, -inspect test gaps, list configured language plugins, and add generic or -standard plugin profiles, remove plugin profiles, or validate local plugin -directories. It can also scaffold project-local plugin templates. +**Health report.** A single number out of 100, plus A–F grades on six +dimensions, plus the rules currently failing. -`raysense visualize` writes a self-refreshing local HTML dashboard with file -size blocks, module graph edges, hotspots, rules, complexity, test gaps, and an -embedded telemetry JSON payload. Use `--watch` to keep regenerating the page -from fresh scans. - -Baselines are stored under `/.raysense/baseline` by default. The manifest -is JSON for fast agent diffs, and baseline tables are written under `tables/` -in Rayforce splayed-table format. - -Baseline table filters use `column:op:value`, where `op` is one of `eq`, `ne`, -`in`, `not_in`, `contains`, `starts_with`, `ends_with`, `regex`, `not_regex`, -`gt`, `gte`, `lt`, or `lte`. Filters default to AND semantics; use -`--filter-mode any` for OR. -Repeat `--sort` to apply ordered multi-column sorting. - -CLI examples: - -```sh -raysense baseline save . -raysense baseline tables --baseline .raysense/baseline -raysense baseline table files --baseline .raysense/baseline --columns path,language,lines --filter 'language:in:["c","rust"]' --sort language:asc --sort lines:desc --limit 10 -raysense baseline table files --baseline .raysense/baseline --columns path --filter 'path:regex:^src/ops/.*\.c$' --filter 'path:not_regex:query' --limit 10 +```bash +raysense health . ``` -MCP query example: +**CI gate.** Exit non-zero if any rule fails or scores drop against a +saved baseline. -```json -{ - "name": "raysense_baseline_table_read", - "arguments": { - "baseline_path": ".raysense/baseline", - "table": "files", - "columns": ["path", "language", "lines"], - "filters": [ - {"column": "language", "op": "in", "value": ["c", "rust"]}, - {"column": "path", "op": "regex", "value": "^src/.*\\.(c|rs)$"} - ], - "filter_mode": "all", - "sort": [ - {"column": "language", "direction": "asc"}, - {"column": "lines", "direction": "desc"} - ], - "limit": 10 - } -} +```bash +raysense check . ``` -Release checks: +**Agent connector.** Hook Raysense into Claude, Cursor, or any MCP-capable +client. 40+ tools — scan, edges, hotspots, what-if simulation, baseline +diff, evolution metrics — all queryable. -```sh -cargo package -p rayforce-sys -cargo package -p raysense-core -cargo package -p raysense-memory -cargo package -p raysense-cli -cargo package -p raysense +```bash +raysense mcp ``` -Run the `publish` workflow manually with `dry_run=true` before publishing a -release. The workflow publishes packages in dependency order, waits for each -new package to appear in the registry index, and then runs a post-release -install and library smoke check. - -Example config: +## What it measures -```toml -[scan] -ignored_paths = ["target", "fixtures/generated"] -generated_paths = ["**/generated/*"] -enabled_languages = [] -disabled_languages = [] -module_roots = ["crates", "src"] -test_roots = ["tests"] -public_api_paths = ["src/lib.rs"] +- **Coupling, cohesion, instability** — Robert Martin's stable-foundation + model, plus blast radius and main-sequence distance. +- **Complexity** — cyclomatic and cognitive, per function and aggregated. +- **Cycles and depth** — strongly-connected components, longest acyclic + path, upward-layer violations. +- **Evolution** — bus factor, change-coupling pairs, temporal hotspots + (churn × complexity), file age. +- **Types and inheritance** — type facts with base-class extraction + (Python and TypeScript via tree-sitter, others via line parsing). +- **Test gaps** — files without nearby tests, ranked by risk. +- **Six A–F dimensions** — modularity, acyclicity, depth, equality, + redundancy, structural uniformity. One 0–100 quality signal. -[[scan.plugins]] -name = "foo" -grammar = "rust" -grammar_path = "grammars/foo.so" -grammar_symbol = "tree_sitter_foo" -extensions = ["foo"] -file_names = ["Foofile"] -function_prefixes = ["function "] -import_prefixes = ["load "] -call_suffixes = ["("] -abstract_type_prefixes = ["interface "] -concrete_type_prefixes = ["class ", "type "] -tags_query = """ -(function_item - name: (identifier) @name) @definition.function -""" -package_index_files = ["index.foo"] -test_path_patterns = ["tests/*", "*_test.foo"] -source_roots = ["src"] -ignored_paths = ["build/*"] -local_import_prefixes = ["."] -max_function_complexity = 15 -max_cognitive_complexity = 20 -max_file_lines = 500 -max_function_lines = 80 -resolver_alias_files = ["foo.config.json"] -namespace_separator = "." -module_prefix_files = ["mod.foo"] -module_prefix_directives = ["package "] -entry_point_patterns = ["main"] -test_module_patterns = ["tests/*"] -test_attribute_patterns = ["@Test"] -parameter_node_kinds = ["parameter"] -complexity_node_kinds = ["if_statement", "while_statement"] -logical_operator_kinds = ["&&", "||"] -abstract_base_classes = ["Base"] +## Configuration -[rules] -min_quality_signal = 0 -min_modularity = 0.0 -min_acyclicity = 0.0 -min_depth = 0.0 -min_equality = 0.0 -min_redundancy = 0.0 -max_cycles = 0 -max_coupling_ratio = 1.0 -max_function_complexity = 15 -max_cognitive_complexity = 0 -max_file_lines = 0 -max_function_lines = 0 -no_god_files = true -high_file_fan_in = 50 -high_file_fan_out = 15 -large_file_lines = 500 -max_large_file_findings = 20 -low_call_resolution_min_calls = 100 -low_call_resolution_ratio = 0.5 -high_function_fan_in = 200 -high_function_fan_out = 100 -max_call_hotspot_findings = 5 -max_upward_layer_violations = 0 -no_tests_detected = true +Everything is overridable in `.raysense.toml` at the repo root: rule +thresholds, plugin language definitions, baseline scoring, what-if +ignored paths. Per-language rule overrides let one language demand +stricter caps than another. `raysense --help` lists every flag. -[[boundaries.forbidden_edges]] -from = "src" -to = "test" -reason = "runtime code must not depend on tests" +## Building from source -[[boundaries.layers]] -name = "core" -path = "src/core/*" -order = 0 +The C dependency is vendored. Clone and build — that's it: -[score] -modularity_weight = 1.0 -acyclicity_weight = 1.0 -depth_weight = 1.0 -equality_weight = 1.0 -redundancy_weight = 1.0 -structural_uniformity_weight = 0.0 +```bash +git clone https://github.com/RayforceDB/raysense.git +cd raysense +cargo build --release ``` -## Status - -The first testable version has grammar-backed support for Rust, C/C++, Python, -and TypeScript, plus a built-in generic catalog for common project languages -and formats: +No external setup, no submodules, no environment variables. -- Configurable scan filtering by ignored paths and enabled/disabled languages. -- Configurable module roots for DSM and architecture grouping. -- Generic configured language plugins by file extension with configurable - function, import, and call token extraction. -- Standard language plugin profiles can be listed through MCP or materialized - into project config with `raysense plugin add-standard`. -- Project-local plugin manifests can be loaded from - `.raysense/plugins/*/plugin.toml`. -- Built-in generic analyzers for Go, Java, Kotlin, Scala, C#, PHP, Ruby, Swift, - shell, SQL, Lua, Perl, Dart, Elixir, Haskell, OCaml, F#, Clojure, Solidity, - protobuf, GraphQL, build/config formats, and other common file types. -- Tree-sitter-backed Rust, C, C++, Python, and TypeScript function discovery - with lightweight fallback extraction. -- Tree-sitter-backed Rust `use`/`mod`, C/C++ include, Python import, and - TypeScript import extraction with lightweight fallback extraction. -- Tree-sitter-backed Rust, C, C++, Python, and TypeScript call facts with - enclosing function ids. -- Conservative call-edge resolution for unambiguous function names. -- Function-level call metrics: resolution ratio, fan-in/fan-out, and top - called/calling functions. -- Project profile inference for reusable include-root discovery. -- Entry point facts for binaries, examples, and tests. -- Local, external, system, and unresolved import classification. -- Graph metrics: resolved edges, cycles, fan-in, fan-out. -- Health summary with score, 0-10000 quality signal, root-cause scores, - import breakdown, hotspots, coupling, size, entry point, test-gap, DSM, - architecture, complexity, and evolution metrics. -- Source-aware complexity, duplicate-body grouping, and public API aware - dead-function filtering. -- Semantic-shape duplicate grouping for code that is structurally similar after - names and literals are normalized. -- Ecosystem-aware module grouping for common monorepo, Rust, Python, Java, and - Kotlin layouts. -- Test-gap candidates include expected test file paths for each unmatched - production file. -- Framework-aware test-gap naming for Rust, Python, TypeScript, Go, Java, and - .NET-style projects. -- Built-in policy presets for Rust crates, monorepos, backend services, and - libraries. -- Remediation suggestions are exposed through the CLI and MCP. -- Persisted trend samples can be recorded and read back for score/rule deltas. -- Score calibration weights can be configured for the root-cause dimensions. -- Built-in rules for high fan-in, production dependencies on test paths, - large-file/no-test findings, call-resolution/function-call hotspots, max - cycles, max coupling, max function complexity, god-file pressure, and ordered - layer constraints. -- Rule thresholds can be configured with TOML. -- Forbidden top-level module dependencies can be configured with TOML. -- Config read/write, health runs, scan facts, edges, hotspots, rule findings, - module edges, architecture, coupling, cycles, hottest files/functions, blast - radius, module levels, what-if simulations, session start/end, rescans, rule - checks, evolution, DSM, test gaps, plugin listing, remediation suggestions, - trend metrics, policy presets, memory summaries, and saved baseline table - queries are exposed through the MCP interface. -- Baseline save/diff is available through the CLI and MCP, with Rayforce - splayed-table storage for baseline tables. -- MCP session baselines are persisted by default and can be compared across - process restarts. -- CLI quality gate, watch loop, plugin management, and generated self-refreshing - local HTML architecture visualization are available. -- Rayforce table materialization for scan facts, call facts, call edges, - health summary, hotspots, rules, module edges, and changed-file evolution - metrics. +## License -CI runs on pushes and pull requests. Publish runs when a release is published -and can also be started manually. +MIT. See [LICENSE](LICENSE). diff --git a/crates/rayforce-sys/Cargo.toml b/crates/rayforce-sys/Cargo.toml index 0726ddd..2e82ffe 100644 --- a/crates/rayforce-sys/Cargo.toml +++ b/crates/rayforce-sys/Cargo.toml @@ -29,3 +29,4 @@ description = "Rust FFI bindings for Rayforce used by Raysense" links = "rayforce" [build-dependencies] +cc = "1" diff --git a/crates/rayforce-sys/build.rs b/crates/rayforce-sys/build.rs index a6ad1af..3ecc531 100644 --- a/crates/rayforce-sys/build.rs +++ b/crates/rayforce-sys/build.rs @@ -21,38 +21,23 @@ * SOFTWARE. */ +//! Compile the vendored C library directly via `cc`. No external checkout +//! required — `cargo build` works from a fresh clone with no extra steps. +//! Set `RAYFORCE_DIR` only if you want to link against an outside build for +//! development. + use std::env; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; fn main() { let manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); - let repo_root = manifest_dir.join("../.."); - let checkout_dir = repo_root.join("deps/rayforce"); - let sibling_dir = repo_root.join("../rayforce"); - let rayforce_dir = env::var_os("RAYFORCE_DIR").map(PathBuf::from).unwrap_or({ - if checkout_dir.exists() { - checkout_dir - } else { - sibling_dir - } - }); - let include_dir = rayforce_dir.join("include"); - let lib_dir = rayforce_dir.clone(); - let lib_path = lib_dir.join("librayforce.a"); - - if !lib_path.exists() { - panic!( - "missing {}; build Rayforce with `make -C {} lib` or set RAYFORCE_DIR", - lib_path.display(), - rayforce_dir.display() - ); + if let Some(external_dir) = env::var_os("RAYFORCE_DIR") { + link_external(PathBuf::from(external_dir)); + } else { + compile_vendored(&manifest_dir.join("vendor/rayforce")); } - println!("cargo:include={}", include_dir.display()); - println!("cargo:rustc-link-search=native={}", lib_dir.display()); - println!("cargo:rustc-link-lib=static=rayforce"); - if env::var("CARGO_CFG_TARGET_OS").as_deref() == Ok("linux") { println!("cargo:rustc-link-lib=m"); println!("cargo:rustc-link-lib=pthread"); @@ -61,9 +46,101 @@ fn main() { } println!("cargo:rerun-if-env-changed=RAYFORCE_DIR"); +} + +/// Default path: build the vendored sources with `cc::Build`. Excludes the +/// REPL binary entry (`src/app/main.c`) since we only need the library. +fn compile_vendored(vendor_dir: &Path) { + let include_dir = vendor_dir.join("include"); + let src_dir = vendor_dir.join("src"); + let mut build = cc::Build::new(); + build + .std("c17") + .include(&include_dir) + .include(&src_dir) + .flag_if_supported("-fPIC") + .flag_if_supported("-Wno-unused-parameter") + .flag_if_supported("-Wno-unused-but-set-variable") + .flag_if_supported("-Wno-unused-variable") + .flag_if_supported("-Wno-unused-function"); + + if let Ok(profile) = env::var("PROFILE") { + if profile == "release" { + build + .opt_level(3) + .flag_if_supported("-funroll-loops") + .flag_if_supported("-fomit-frame-pointer") + .flag_if_supported("-fno-math-errno"); + } + } + + let mut count = 0usize; + for entry in walk_c_sources(&src_dir) { + if entry.ends_with(Path::new("app/main.c")) + || entry.ends_with(Path::new("app/repl.c")) + || entry.ends_with(Path::new("app/term.c")) + { + continue; + } + println!("cargo:rerun-if-changed={}", entry.display()); + build.file(&entry); + count += 1; + } + if count == 0 { + panic!( + "no C sources found under {} — vendor/ is empty?", + src_dir.display() + ); + } + println!("cargo:rerun-if-changed={}", include_dir.display()); + println!("cargo:include={}", include_dir.display()); + build.compile("rayforce"); +} + +/// Optional: link against an externally-built `librayforce.a`. Used only for +/// rayforce development; everyone else gets the vendored compile path above. +fn link_external(rayforce_dir: PathBuf) { + let include_dir = rayforce_dir.join("include"); + let lib_path = rayforce_dir.join("librayforce.a"); + if !lib_path.exists() { + panic!( + "RAYFORCE_DIR={} but {} is missing — build with `make -C {} lib`", + rayforce_dir.display(), + lib_path.display(), + rayforce_dir.display(), + ); + } + println!("cargo:include={}", include_dir.display()); + println!( + "cargo:rustc-link-search=native={}", + rayforce_dir.display() + ); + println!("cargo:rustc-link-lib=static=rayforce"); println!("cargo:rerun-if-changed={}", lib_path.display()); println!( "cargo:rerun-if-changed={}", include_dir.join("rayforce.h").display() ); } + +/// Walk a directory tree collecting all `*.c` files. Pure-std (no walkdir +/// dep) to keep build-deps minimal. +fn walk_c_sources(root: &Path) -> Vec { + let mut out = Vec::new(); + let mut stack = vec![root.to_path_buf()]; + while let Some(dir) = stack.pop() { + let Ok(entries) = std::fs::read_dir(&dir) else { + continue; + }; + for entry in entries.flatten() { + let path = entry.path(); + if path.is_dir() { + stack.push(path); + } else if path.extension().and_then(|s| s.to_str()) == Some("c") { + out.push(path); + } + } + } + out.sort(); + out +} diff --git a/crates/rayforce-sys/vendor/rayforce/LICENSE b/crates/rayforce-sys/vendor/rayforce/LICENSE new file mode 100644 index 0000000..d52e496 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Anton Kundenko + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/crates/rayforce-sys/vendor/rayforce/include/rayforce.h b/crates/rayforce-sys/vendor/rayforce/include/rayforce.h new file mode 100644 index 0000000..f5b83f9 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/include/rayforce.h @@ -0,0 +1,418 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_H +#define RAY_H + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* ===== Semantic Versioning ===== */ + +#define RAY_VERSION_MAJOR 2 +#define RAY_VERSION_MINOR 1 +#define RAY_VERSION_PATCH 0 + +/* Packed version number: 0xMMmmpp (MM=major, mm=minor, pp=patch) */ +#define RAY_VERSION_NUMBER \ + ((RAY_VERSION_MAJOR * 10000) + (RAY_VERSION_MINOR * 100) + RAY_VERSION_PATCH) + +/* Compile-time version check: true if lib version >= (major, minor, patch) */ +#define RAY_VERSION_AT_LEAST(major, minor, patch) \ + (RAY_VERSION_NUMBER >= ((major) * 10000 + (minor) * 100 + (patch))) + +/* Runtime version query */ +int ray_version_major(void); +int ray_version_minor(void); +int ray_version_patch(void); +const char* ray_version_string(void); + +/* ===== Type Constants ===== */ + +#define RAY_LIST 0 +#define RAY_BOOL 1 +#define RAY_U8 2 +#define RAY_I16 3 +#define RAY_I32 4 +#define RAY_I64 5 +#define RAY_F32 6 +#define RAY_F64 7 +#define RAY_DATE 8 +#define RAY_TIME 9 +#define RAY_TIMESTAMP 10 +#define RAY_GUID 11 +/* Unified dictionary-encoded string column (adaptive width) */ +#define RAY_SYM 12 +/* Variable-length string column (inline + pool) */ +#define RAY_STR 13 + +/* Compound types */ +#define RAY_INDEX 97 /* Accelerator index attached to a vector (see ops/idxop.h) */ +#define RAY_TABLE 98 +#define RAY_DICT 99 + +/* Function types (Rayforce-compatible) */ +#define RAY_LAMBDA 100 /* User-defined function (compiled body + env) */ +#define RAY_UNARY 101 /* Unary builtin: ray_t* (*)(ray_t*) */ +#define RAY_BINARY 102 /* Binary builtin: ray_t* (*)(ray_t*, ray_t*) */ +#define RAY_VARY 103 /* Variadic builtin: ray_t* (*)(ray_t**, int64_t) */ +#define RAY_ERROR 127 /* Error object: 8-byte packed ASCII code in sdata */ +#define RAY_NULL 126 /* Null / void — singleton static object */ + +/* ===== Error Handling ===== */ + +typedef enum { + RAY_OK = 0, + RAY_ERR_OOM, + RAY_ERR_TYPE, + RAY_ERR_RANGE, + RAY_ERR_LENGTH, + RAY_ERR_RANK, + RAY_ERR_DOMAIN, + RAY_ERR_NYI, + RAY_ERR_IO, + RAY_ERR_SCHEMA, + RAY_ERR_CORRUPT, + RAY_ERR_CANCEL, + RAY_ERR_PARSE, + RAY_ERR_NAME, + RAY_ERR_LIMIT, + RAY_ERR_RESERVED +} ray_err_t; + +#define RAY_IS_ERR(p) ((p) != NULL && (uintptr_t)(p) > 31 && ((ray_t*)(p))->type == RAY_ERROR) + +/* ===== Core Type: ray_t (32-byte block/object header) ===== */ + +typedef union ray_t { + /* Allocated: object header */ + struct { + /* Bytes 0-15: nullable bitmask / slice / ext nullmap / index */ + union { + uint8_t nullmap[16]; + struct { union ray_t* slice_parent; int64_t slice_offset; }; + struct { union ray_t* ext_nullmap; union ray_t* sym_dict; }; + struct { union ray_t* str_ext_null; union ray_t* str_pool; }; + /* RAY_ATTR_HAS_INDEX (vectors): ray_t* of type RAY_INDEX + * carrying both the accelerator payload and the saved nullmap + * bytes. _idx_pad is reserved (must be NULL). See ops/idxop.h. */ + struct { union ray_t* index; union ray_t* _idx_pad; }; + /* RAY_ATTR_HAS_LINK (vectors, RAY_I32/RAY_I64 only): bytes 8-15 + * hold an int64 sym ID naming the target table. link_lo[8] + * aliases bytes 0-7 (inline nullmap bits OR ext_nullmap pointer + * OR HAS_INDEX index pointer, depending on the other arm in use). + * See ops/linkop.h. */ + struct { uint8_t link_lo[8]; int64_t link_target; }; + }; + /* Bytes 16-31: metadata + value */ + uint8_t mmod; /* 0=heap, 1=file-mmap */ + uint8_t order; /* block order (block size = 2^order) */ + int8_t type; /* negative=atom, positive=vector, 0=LIST */ + uint8_t attrs; /* attribute flags */ + uint32_t rc; /* reference count (0=free) */ + union { + uint8_t b8; /* BOOL atom */ + uint8_t u8; /* U8 atom */ + int16_t i16; /* I16 atom */ + int32_t i32; /* I32 atom */ + uint32_t u32; + int64_t i64; /* I64/SYMBOL/DATE/TIME/TIMESTAMP atom */ + double f64; /* F64 atom */ + union ray_t* obj; /* pointer to child (long strings, GUID) */ + struct { uint8_t slen; char sdata[7]; }; /* SSO string (<=7 bytes) */ + int64_t len; /* vector element count */ + }; + uint8_t data[]; /* element data (flexible array member) */ + }; + /* Free: buddy allocator block (fl_prev/fl_next overlay bytes 0-15) */ + struct { + union ray_t* fl_prev; + union ray_t* fl_next; + }; +} ray_t; + +/* Global null singleton — always valid, retain/release are no-ops (ARENA flag) */ +extern ray_t __ray_null; +#define RAY_NULL_OBJ (&__ray_null) +#define RAY_IS_NULL(p) ((p) == RAY_NULL_OBJ) + +/* Global last-resort OOM error sentinel — returned by ray_error when its + * own ray_alloc fails (deep OOM, e.g. heap can't even satisfy the 32-byte + * error header). ARENA-flagged like RAY_NULL_OBJ so retain/release are + * no-ops; slen=3 / sdata="oom" so RAY_IS_ERR() and ray_err_code() both + * work without touching the heap. Carries no per-VM message — we have + * no heap to format one into. Without this fallback, hard OOM would + * silently bypass every `if (RAY_IS_ERR(x)) return x;` guard upstream. */ +extern ray_t __ray_oom; +#define RAY_OOM_OBJ (&__ray_oom) + +/* Error object creation (defined in core/runtime.c) */ +ray_t* ray_error(const char* code, const char* fmt, ...); +const char* ray_err_code_str(ray_err_t e); +ray_err_t ray_err_from_obj(ray_t* err); +const char* ray_err_code(ray_t* err); +/* Free a RAY_ERROR object. ray_release() is a deliberate no-op for + * error ray_t* (see src/mem/cow.c), so callers that hold the sole + * reference and want the block reclaimed must use this helper instead — + * otherwise the error leaks until heap teardown. */ +void ray_error_free(ray_t* err); + +/* ===== Accessor Macros ===== */ + +#define RAY_ATTR_SLICE 0x10 + +#define ray_type(v) ((v)->type) +#define ray_is_atom(v) ((v)->type < 0 || (v)->type >= RAY_LAMBDA) +#define ray_is_vec(v) ((v)->type >= RAY_BOOL && (v)->type <= RAY_STR) +#define ray_len(v) ((v)->len) + +/* Element type sizes indexed by type tag — covers all uint8_t values. + * Only types 1-14 (vectors) have non-zero entries. */ +extern const uint8_t ray_type_sizes[256]; + +static inline void* ray_data_fn(ray_t* v) { + if (__builtin_expect(!!(v->attrs & RAY_ATTR_SLICE), 0)) + return (char*)v->slice_parent->data + + v->slice_offset * ray_type_sizes[(uint8_t)v->type]; + return (void*)v->data; +} +#define ray_slice_data(v) ray_data_fn(v) /* alias — ray_data is always slice-safe */ +#define ray_data(v) ray_data_fn(v) + +/* ===== Memory Allocator API ===== */ + +ray_t* ray_alloc(size_t data_size); +/* NOTE: ray_free supports cross-thread free via foreign_blocks list. + * Blocks freed from a non-owning thread are deferred and coalesced + * when the owning heap flushes foreign blocks. */ +void ray_free(ray_t* v); + +/* ===== Memory Budget API ===== */ + +int64_t ray_mem_budget(void); /* returns memory budget in bytes */ +bool ray_mem_pressure(void); /* true if calling thread's usage exceeds budget */ + +/* ===== Interrupt API ===== + * Long-running queries poll ray_interrupted() at morsel granularity + * and bail out with a "cancel" error. The REPL's SIGINT handler wires + * Ctrl-C to ray_request_interrupt(); embedders can call it from their + * own signal handlers or cancellation threads. */ + +void ray_request_interrupt(void); +void ray_clear_interrupt(void); +bool ray_interrupted(void); + +/* ===== Progress API ===== + * Pull-based, main-thread only. Worker threads never touch progress + * state. The executor calls ray_progress_update() at natural sync + * points (between ops, after pool dispatches, at pivot phase + * boundaries); the update only fires the user callback once the + * query has been running for at least min_ms and at most once per + * tick_interval_ms. Embedders register a callback to visualize + * long-running queries; leaving it unset has zero runtime cost. */ + +typedef struct { + const char* op_name; /* coarse: scan, group, pivot, join, ... */ + const char* phase; /* optional finer label, e.g. "pivot: dedupe" */ + uint64_t rows_done; + uint64_t rows_total; /* 0 = indeterminate */ + double elapsed_sec; + int64_t mem_used; /* bytes: buddy + direct mmap */ + int64_t mem_budget; /* bytes: auto-detected memory budget */ + bool final; /* true on the last tick of a query — renderers + use this to clear the line */ +} ray_progress_t; + +typedef void (*ray_progress_cb)(const ray_progress_t* snapshot, void* user); + +/* Register a progress callback. Set cb=NULL to disable. min_ms is the + * show-after threshold: queries finishing under it fire + * zero callbacks. tick_interval_ms throttles updates once active. */ +void ray_progress_set_callback(ray_progress_cb cb, void* user, + uint64_t min_ms, uint64_t tick_interval_ms); + +/* Update progress state. Safe to call from the main thread only. + * phase/op_name may be NULL to keep the previous value. Counters + * always overwrite — 0 is a valid "starting fresh" value. Fires the + * registered callback if the show-after and tick-interval gates pass. */ +void ray_progress_update(const char* op_name, const char* phase, + uint64_t rows_done, uint64_t rows_total); + +/* Relabel without touching the counters — for wrappers like exec_node + * that only know which operator is about to run but not its rows. A + * subsequent ray_progress_update from inside the op will advance the + * counters; until then the renderer shows an indeterminate bar. */ +void ray_progress_label(const char* op_name, const char* phase); + +/* Mark the end of the current query. Clears state and fires a final + * "100%" tick if the query ran long enough to have shown the bar. */ +void ray_progress_end(void); + +/* ===== COW / Ref Counting API ===== */ + +void ray_retain(ray_t* v); +void ray_release(ray_t* v); + +/* ===== Atom Constructors ===== */ + +ray_t* ray_bool(bool val); +ray_t* ray_u8(uint8_t val); +ray_t* ray_i16(int16_t val); +ray_t* ray_i32(int32_t val); +ray_t* ray_i64(int64_t val); +ray_t* ray_f32(float val); +ray_t* ray_f64(double val); +ray_t* ray_str(const char* s, size_t len); +ray_t* ray_sym(int64_t id); +ray_t* ray_date(int64_t val); +ray_t* ray_time(int64_t val); +ray_t* ray_timestamp(int64_t val); +ray_t* ray_guid(const uint8_t* bytes); +ray_t* ray_typed_null(int8_t type); + +/* Null bitmap check for atoms — bit 0 of nullmap[0] marks typed nulls. + * Also matches RAY_NULL_OBJ (the untyped null singleton). */ +#define RAY_ATOM_IS_NULL(x) (RAY_IS_NULL(x) || ((x)->type < 0 && ((x)->nullmap[0] & 1))) + +/* ===== Vector API ===== */ + +ray_t* ray_vec_new(int8_t type, int64_t capacity); + +/* RAY_SYM index width — encoded in the lower 2 bits of the vector's + * `attrs` byte. Pick the smallest width that fits the destination + * symbol-table size; W64 is the safe default when growing globally. */ +#define RAY_SYM_W8 0 /* uint8_t indices, ≤255 entries */ +#define RAY_SYM_W16 1 /* uint16_t indices, ≤65,535 */ +#define RAY_SYM_W32 2 /* uint32_t indices, ≤4,294,967,295 */ +#define RAY_SYM_W64 3 /* int64_t indices, unbounded */ + +ray_t* ray_sym_vec_new(uint8_t sym_width, int64_t capacity); /* RAY_SYM with adaptive width */ +ray_t* ray_vec_append(ray_t* vec, const void* elem); +ray_t* ray_vec_set(ray_t* vec, int64_t idx, const void* elem); +void* ray_vec_get(ray_t* vec, int64_t idx); +ray_t* ray_vec_slice(ray_t* vec, int64_t offset, int64_t len); +ray_t* ray_vec_concat(ray_t* a, ray_t* b); +ray_t* ray_vec_from_raw(int8_t type, const void* data, int64_t count); +ray_t* ray_vec_insert_at(ray_t* vec, int64_t idx, const void* elem); +ray_t* ray_vec_insert_vec_at(ray_t* vec, int64_t idx, ray_t* src); +ray_t* ray_vec_insert_many(ray_t* vec, ray_t* idxs, ray_t* vals); + +/* Null bitmap ops */ +void ray_vec_set_null(ray_t* vec, int64_t idx, bool is_null); +ray_err_t ray_vec_set_null_checked(ray_t* vec, int64_t idx, bool is_null); +bool ray_vec_is_null(ray_t* vec, int64_t idx); + +/* ===== String Vector API ===== */ + +ray_t* ray_str_vec_append(ray_t* vec, const char* s, size_t len); +const char* ray_str_vec_get(ray_t* vec, int64_t idx, size_t* out_len); +ray_t* ray_str_vec_set(ray_t* vec, int64_t idx, const char* s, size_t len); +ray_t* ray_str_vec_insert_at(ray_t* vec, int64_t idx, const char* s, size_t len); +ray_t* ray_str_vec_compact(ray_t* vec); + +/* ===== String API ===== */ + +const char* ray_str_ptr(ray_t* s); +size_t ray_str_len(ray_t* s); +int ray_str_cmp(ray_t* a, ray_t* b); + +/* ===== List API ===== */ + +ray_t* ray_list_new(int64_t capacity); +ray_t* ray_list_append(ray_t* list, ray_t* item); +ray_t* ray_list_get(ray_t* list, int64_t idx); +ray_t* ray_list_set(ray_t* list, int64_t idx, ray_t* item); +ray_t* ray_list_insert_at(ray_t* list, int64_t idx, ray_t* item); +ray_t* ray_list_insert_many(ray_t* list, ray_t* idxs, ray_t* vals); + +/* ===== Symbol Intern Table API ===== */ + +ray_err_t ray_sym_init(void); +void ray_sym_destroy(void); +int64_t ray_sym_intern(const char* str, size_t len); +int64_t ray_sym_find(const char* str, size_t len); +ray_t* ray_sym_str(int64_t id); +uint32_t ray_sym_count(void); +bool ray_sym_ensure_cap(uint32_t needed); +ray_err_t ray_sym_save(const char* path); +ray_err_t ray_sym_load(const char* path); + +/* ===== Environment API ===== + * + * Thread-safety: the environment is shared global state. Concurrent calls + * to ray_env_get() and ray_env_set() require external synchronization by + * the caller. */ + +ray_t* ray_env_get(int64_t sym_id); +ray_err_t ray_env_set(int64_t sym_id, ray_t* val); + +/* ===== Table API ===== */ + +ray_t* ray_table_new(int64_t ncols); +ray_t* ray_table_add_col(ray_t* tbl, int64_t name_id, ray_t* col_vec); +ray_t* ray_table_get_col(ray_t* tbl, int64_t name_id); +ray_t* ray_table_get_col_idx(ray_t* tbl, int64_t idx); +int64_t ray_table_col_name(ray_t* tbl, int64_t idx); +void ray_table_set_col_name(ray_t* tbl, int64_t idx, int64_t name_id); +int64_t ray_table_ncols(ray_t* tbl); +int64_t ray_table_nrows(ray_t* tbl); +ray_t* ray_table_schema(ray_t* tbl); + +/* ===== Dict API ===== + * + * A dict is a 2-pointer block (type=RAY_DICT, len=2) holding [keys, vals]. + * Pair count is keys->len. + * + * keys: Either a typed vector (RAY_SYM / RAY_I64 / RAY_F64 / RAY_STR / + * RAY_GUID / RAY_DATE / RAY_TIME / RAY_TIMESTAMP / RAY_I32 / + * RAY_I16 / RAY_BOOL / RAY_U8 / RAY_F32) when every key shares + * one atom type, or a RAY_LIST of boxed atoms when keys are + * heterogeneous. Typed-vec lookup honors the keys' null bitmap + * so a null key never collides with a legitimate zero/sentinel. + * vals: Either a typed vector when every value shares one atom type, + * or a RAY_LIST otherwise (the form parsed from {…} literals, + * which keep value expressions unevaluated until probed). + * + * Layout matches RAY_TABLE; only the type tag and the contract on + * `vals` (a RAY_LIST of column vectors for tables) differ. + */ + +ray_t* ray_dict_new(ray_t* keys, ray_t* vals); /* consumes both */ +ray_t* ray_dict_keys(ray_t* d); /* borrowed */ +ray_t* ray_dict_vals(ray_t* d); /* borrowed */ +int64_t ray_dict_len(ray_t* d); /* keys->len */ +ray_t* ray_dict_get(ray_t* d, ray_t* key_atom); /* owned, NULL if missing */ +ray_t* ray_dict_upsert(ray_t* d, ray_t* key_atom, ray_t* val); /* COW; consumes d */ +ray_t* ray_dict_remove(ray_t* d, ray_t* key_atom); /* COW; consumes d */ + +#ifdef __cplusplus +} +#endif + +#endif /* RAY_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/block.c b/crates/rayforce-sys/vendor/rayforce/src/core/block.c new file mode 100644 index 0000000..1401925 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/block.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "block.h" +#include "core/platform.h" +#include "../mem/heap.h" +#include "../ops/ops.h" +#include "../table/sym.h" + +/* Weak stub for ray_alloc — replaced by buddy allocator at link time. + * Uses ray_vm_alloc (mmap) — page-aligned and zero-filled. */ +__attribute__((weak)) +ray_t* ray_alloc(size_t size) { + if (size < 32) size = 32; + size = (size + 4095) & ~(size_t)4095; + void* p = ray_vm_alloc(size); + if (!p) return ray_error("oom", NULL); + return (ray_t*)p; +} + +size_t ray_block_size(ray_t* v) { + if (ray_is_atom(v)) return 32; + /* LIST (type=0) stores child pointers */ + if (v->type == RAY_LIST) return 32 + (size_t)ray_len(v) * sizeof(ray_t*); + /* TABLE / DICT: 2-pointer block [keys, vals] */ + if (v->type == RAY_TABLE || v->type == RAY_DICT) return 32 + 2 * sizeof(ray_t*); + /* RAY_SEL: variable layout — meta + seg_flags + seg_popcnt + bits */ + if (v->type == RAY_SEL) { + int64_t nrows = ray_len(v); + if (nrows < 0) return 32; + uint32_t n_segs = (uint32_t)((nrows + RAY_MORSEL_ELEMS - 1) / RAY_MORSEL_ELEMS); + uint32_t n_words = (uint32_t)((nrows + 63) / 64); + size_t dsz = sizeof(ray_sel_meta_t); + dsz += (n_segs + 7u) & ~(size_t)7; /* seg_flags, 8-aligned */ + dsz += ((size_t)n_segs * 2 + 7u) & ~(size_t)7; /* seg_popcnt, 8-aligned */ + dsz += (size_t)n_words * 8; /* bits */ + return 32 + dsz; + } + /* Vectors: header (32 bytes) + len * elem_size. + * Use ray_sym_elem_size for SYM columns to respect narrow widths. */ + int8_t t = ray_type(v); + if (t <= 0 || t >= RAY_TYPE_COUNT) return 32; + return 32 + (size_t)ray_len(v) * ray_sym_elem_size(t, v->attrs); +} + +ray_t* ray_block_copy(ray_t* src) { + size_t sz = ray_block_size(src); + ray_t* dst = ray_alloc(sz); + if (!dst) return ray_error("oom", NULL); + /* Save allocator metadata before memcpy overwrites the header */ + uint8_t new_mmod = dst->mmod; + uint8_t new_order = dst->order; + memcpy(dst, src, sz); + dst->mmod = new_mmod; + dst->order = new_order; + ray_atomic_store(&dst->rc, 1); + if (!ray_retain_owned_refs(dst)) { + ray_free(dst); + return ray_error("oom", NULL); + } + return dst; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/block.h b/crates/rayforce-sys/vendor/rayforce/src/core/block.h new file mode 100644 index 0000000..1c50969 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/block.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_BLOCK_H +#define RAY_BLOCK_H + +/* + * block.h — Internal block header utilities. + * + * Provides ray_block_size() and ray_block_copy(). The core ray_t struct and + * accessor macros (ray_type, ray_is_atom, ray_is_vec, ray_len, ray_data, + * ray_elem_size) are defined in . + */ +#include +#include + +/* Compute total block size in bytes (header + data) */ +size_t ray_block_size(ray_t* v); + +/* Allocate a new block and shallow-copy header + data from src. + * Retains child refs (STR/LIST/TABLE pointers) via ray_retain_owned_refs. + * Requires ray_alloc (declared in rayforce.h, provided by the buddy allocator). */ +ray_t* ray_block_copy(ray_t* src); + +#endif /* RAY_BLOCK_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/epoll.c b/crates/rayforce-sys/vendor/rayforce/src/core/epoll.c new file mode 100644 index 0000000..3452be8 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/epoll.c @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if defined(__linux__) + +#include "core/poll.h" +#include "mem/sys.h" +#include +#include +#include +#include + +#define RAY_POLL_MAX_EVENTS 64 +#define RAY_POLL_INITIAL_CAP 16 + +ray_poll_t* ray_poll_create(void) +{ + int fd = epoll_create1(0); + if (fd < 0) return NULL; + + ray_poll_t* poll = (ray_poll_t*)ray_sys_alloc(sizeof(ray_poll_t)); + if (!poll) { close(fd); return NULL; } + + memset(poll, 0, sizeof(*poll)); + poll->fd = fd; + poll->code = -1; + poll->sel_cap = RAY_POLL_INITIAL_CAP; + poll->sels = (ray_selector_t**)ray_sys_alloc( + poll->sel_cap * sizeof(ray_selector_t*)); + if (!poll->sels) { + close(fd); + ray_sys_free(poll); + return NULL; + } + memset(poll->sels, 0, poll->sel_cap * sizeof(ray_selector_t*)); + return poll; +} + +void ray_poll_destroy(ray_poll_t* poll) +{ + if (!poll) return; + + /* Deregister all selectors */ + for (uint32_t i = 0; i < poll->n_sels; i++) { + ray_selector_t* sel = poll->sels[i]; + if (!sel) continue; + if (sel->close_fn) sel->close_fn(poll, sel); + epoll_ctl((int)poll->fd, EPOLL_CTL_DEL, (int)sel->fd, NULL); + if (sel->rx.buf) ray_poll_buf_free(sel->rx.buf); + ray_poll_buf_free(sel->tx.buf); + ray_sys_free(sel); + poll->sels[i] = NULL; + } + + if (poll->sels) ray_sys_free(poll->sels); + close((int)poll->fd); + ray_sys_free(poll); +} + +int64_t ray_poll_register(ray_poll_t* poll, ray_poll_reg_t* reg) +{ + if (!poll || !reg) return -1; + + /* Find free slot or grow */ + int64_t id = -1; + for (uint32_t i = 0; i < poll->n_sels; i++) { + if (!poll->sels[i]) { id = (int64_t)i; break; } + } + if (id < 0) { + if (poll->n_sels >= poll->sel_cap) { + uint32_t new_cap = poll->sel_cap * 2; + ray_selector_t** ns = (ray_selector_t**)ray_sys_alloc( + new_cap * sizeof(ray_selector_t*)); + if (!ns) return -1; + memcpy(ns, poll->sels, poll->n_sels * sizeof(ray_selector_t*)); + memset(ns + poll->n_sels, 0, + (new_cap - poll->n_sels) * sizeof(ray_selector_t*)); + ray_sys_free(poll->sels); + poll->sels = ns; + poll->sel_cap = new_cap; + } + id = (int64_t)poll->n_sels; + poll->n_sels++; + } + + ray_selector_t* sel = (ray_selector_t*)ray_sys_alloc(sizeof(ray_selector_t)); + if (!sel) return -1; + memset(sel, 0, sizeof(*sel)); + + sel->fd = reg->fd; + sel->id = id; + sel->type = reg->type; + sel->data = reg->data; + sel->open_fn = reg->open_fn; + sel->close_fn = reg->close_fn; + sel->error_fn = reg->error_fn; + sel->data_fn = reg->data_fn; + sel->rx.recv_fn = reg->recv_fn; + sel->rx.read_fn = reg->read_fn; + sel->tx.send_fn = reg->send_fn; + + poll->sels[id] = sel; + + /* Register with epoll */ + struct epoll_event ev; + ev.events = EPOLLIN; + ev.data.u64 = (uint64_t)id; + + if (epoll_ctl((int)poll->fd, EPOLL_CTL_ADD, (int)reg->fd, &ev) < 0) { + poll->sels[id] = NULL; + ray_sys_free(sel); + return -1; + } + + if (sel->open_fn) sel->open_fn(poll, sel); + return id; +} + +void ray_poll_deregister(ray_poll_t* poll, int64_t id) +{ + if (!poll || id < 0 || (uint32_t)id >= poll->n_sels) return; + ray_selector_t* sel = poll->sels[id]; + if (!sel) return; + + epoll_ctl((int)poll->fd, EPOLL_CTL_DEL, (int)sel->fd, NULL); + if (sel->close_fn) sel->close_fn(poll, sel); + if (sel->rx.buf) ray_poll_buf_free(sel->rx.buf); + ray_poll_buf_free(sel->tx.buf); + ray_sys_free(sel); + poll->sels[id] = NULL; +} + +int64_t ray_poll_run(ray_poll_t* poll) +{ + if (!poll) return -1; + + struct epoll_event events[RAY_POLL_MAX_EVENTS]; + + while (poll->code < 0) { + int n = epoll_wait((int)poll->fd, events, RAY_POLL_MAX_EVENTS, -1); + if (n < 0) { + if (errno == EINTR) continue; + return -1; + } + + for (int i = 0; i < n; i++) { + uint64_t eid = events[i].data.u64; + ray_selector_t* sel = NULL; + + if (eid < poll->n_sels) + sel = poll->sels[eid]; + if (!sel) continue; + + /* Process readable data first — even if hangup is also set. + * A client may send a message and close; epoll reports both + * EPOLLIN and EPOLLHUP in the same event. */ + if (events[i].events & EPOLLIN) { + /* Loop: read data → call read_fn → if state advanced, + * read more. Handles multi-phase protocols (handshake → + * header → payload) arriving in a single epoll event. */ + for (;;) { + /* Fill rx buffer */ + if (sel->rx.recv_fn && sel->rx.buf) { + while (sel->rx.buf->offset < sel->rx.buf->size) { + int64_t nr = sel->rx.recv_fn( + sel->fd, + sel->rx.buf->data + sel->rx.buf->offset, + sel->rx.buf->size - sel->rx.buf->offset); + if (nr <= 0) { + if (nr < 0 && errno == EINTR) continue; + if (nr < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) + break; + /* Error or peer closed mid-read */ + if (sel->error_fn) + sel->error_fn(poll, sel); + else + ray_poll_deregister(poll, sel->id); + goto next_event; + } + sel->rx.buf->offset += nr; + } + } + + /* Not enough data for current phase */ + if (sel->rx.buf && sel->rx.buf->offset < sel->rx.buf->size) + break; + + /* Call read_fn — may advance state and request new buffer */ + if (!sel->rx.read_fn) break; + ray_t* obj = sel->rx.read_fn(poll, sel); + + /* Re-validate: read_fn may have deregistered this selector */ + if (eid >= poll->n_sels || !poll->sels[eid]) goto next_event; + sel = poll->sels[eid]; + + if (obj && sel->data_fn) + sel->data_fn(poll, sel, obj); + + /* If data_fn deregistered the selector, stop */ + if (eid >= poll->n_sels || !poll->sels[eid]) goto next_event; + sel = poll->sels[eid]; + + /* If no rx buffer (state machine done or not set), stop */ + if (!sel->rx.buf) break; + /* If buffer already has enough data for next phase, loop */ + if (sel->rx.buf->offset >= sel->rx.buf->size) continue; + /* Otherwise try reading more (may EAGAIN → break) */ + } + } + + /* Error / hangup — after data is drained */ + if (events[i].events & (EPOLLERR | EPOLLHUP | EPOLLRDHUP)) { + /* Re-check: selector may have been freed by data_fn */ + if (eid < poll->n_sels && poll->sels[eid]) { + sel = poll->sels[eid]; + if (sel->error_fn) + sel->error_fn(poll, sel); + else + ray_poll_deregister(poll, sel->id); + } + } + + next_event:; + } + } + + return poll->code; +} + +#endif /* __linux__ */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/iocp.c b/crates/rayforce-sys/vendor/rayforce/src/core/iocp.c new file mode 100644 index 0000000..8bbfac8 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/iocp.c @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if defined(RAY_OS_WINDOWS) + +#include "core/poll.h" +#include + +/* Windows IOCP implementation — stub for now. + * Full IOCP support is deferred to a future release. */ + +ray_poll_t* ray_poll_create(void) +{ + fprintf(stderr, "ray_poll_create: IOCP not yet implemented\n"); + return NULL; +} + +void ray_poll_destroy(ray_poll_t* poll) +{ + (void)poll; +} + +int64_t ray_poll_register(ray_poll_t* poll, ray_poll_reg_t* reg) +{ + (void)poll; (void)reg; + return -1; +} + +void ray_poll_deregister(ray_poll_t* poll, int64_t id) +{ + (void)poll; (void)id; +} + +int64_t ray_poll_run(ray_poll_t* poll) +{ + (void)poll; + return -1; +} + +#endif /* _WIN32 */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/ipc.c b/crates/rayforce-sys/vendor/rayforce/src/core/ipc.c new file mode 100644 index 0000000..4fa7419 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/ipc.c @@ -0,0 +1,1117 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_OS_WINDOWS + #define _GNU_SOURCE +#endif + +#include "core/ipc.h" +#include "mem/sys.h" +#include "store/journal.h" +#include +#include +#include + +#ifdef RAY_OS_WINDOWS + #define WIN32_LEAN_AND_MEAN + #include + #include +#else + #include + #include +#endif + +#if defined(__linux__) + #include + #define RAY_IPC_MAX_EVENTS 64 +#elif defined(__APPLE__) + #include + #define RAY_IPC_MAX_EVENTS 64 +#endif + +#include "lang/eval.h" + +/* ===== Compression (delta + RLE) ===== */ + +size_t ray_ipc_compress(const uint8_t* src, size_t len, + uint8_t* dst, size_t dst_cap) +{ + if (len <= RAY_IPC_COMPRESS_THRESHOLD) return 0; + + /* Step 1: delta-encode into temporary buffer */ + uint8_t* delta = (uint8_t*)ray_sys_alloc(len); + if (!delta) return 0; + + delta[0] = src[0]; + for (size_t i = 1; i < len; i++) + delta[i] = (uint8_t)(src[i] - src[i - 1]); + + /* Step 2: RLE-compress the delta stream */ + size_t di = 0; + size_t si = 0; + + while (si < len) { + if (si + 1 < len && delta[si] == delta[si + 1]) { + uint8_t val = delta[si]; + size_t run = 1; + while (si + run < len && delta[si + run] == val && run < 127) + run++; + if (di + 2 > dst_cap) { ray_sys_free(delta); return 0; } + dst[di++] = (uint8_t)run; + dst[di++] = val; + si += run; + } else { + size_t start = si; + size_t llen = 0; + while (si < len && llen < 128) { + if (si + 1 < len && delta[si] == delta[si + 1]) + break; + si++; + llen++; + } + if (di + 1 + llen > dst_cap) { ray_sys_free(delta); return 0; } + dst[di++] = (uint8_t)(-(int8_t)llen); + memcpy(dst + di, delta + start, llen); + di += llen; + } + } + + ray_sys_free(delta); + if (di >= len) return 0; + return di; +} + +size_t ray_ipc_decompress(const uint8_t* src, size_t clen, + uint8_t* dst, size_t dst_len) +{ + uint8_t* decoded = (uint8_t*)ray_sys_alloc(dst_len); + if (!decoded) return 0; + + size_t si = 0; + size_t di = 0; + + while (si < clen && di < dst_len) { + int8_t count = (int8_t)src[si++]; + if (count > 0) { + if (si >= clen) { ray_sys_free(decoded); return 0; } + uint8_t val = src[si++]; + size_t n = (size_t)count; + if (di + n > dst_len) { ray_sys_free(decoded); return 0; } + memset(decoded + di, val, n); + di += n; + } else { + size_t n = (size_t)(-(int)count); + if (si + n > clen || di + n > dst_len) { + ray_sys_free(decoded); + return 0; + } + memcpy(decoded + di, src + si, n); + si += n; + di += n; + } + } + + /* Un-delta */ + if (di == 0) { ray_sys_free(decoded); return 0; } + dst[0] = decoded[0]; + for (size_t i = 1; i < di; i++) + dst[i] = (uint8_t)(decoded[i] + dst[i - 1]); + + ray_sys_free(decoded); + return di; +} + +/* ===== Shared protocol helpers ===== */ + +#define RAY_IPC_PHASE_HANDSHAKE 0 +#define RAY_IPC_PHASE_HEADER 1 +#define RAY_IPC_PHASE_PAYLOAD 2 +#define RAY_IPC_PHASE_CREDS 3 + +/* Constant-time comparison — prevents timing side-channel on password. */ +static bool ct_eq(const void* a, const void* b, size_t len) { + const volatile uint8_t* x = a; + const volatile uint8_t* y = b; + volatile uint8_t diff = 0; + for (size_t i = 0; i < len; i++) + diff |= x[i] ^ y[i]; + return diff == 0; +} + +/* Validate credential buffer against secret. Returns true if password matches. + * creds is "user:password\0" with length cred_len. + * secret MUST point to a char[256] buffer (zero-padded beyond the password). + * Compares pw against the full 256-byte secret buffer in constant time. + * No strlen, no secret-length-dependent copies. */ +static bool validate_creds(const uint8_t* buf, uint8_t cred_len, + const char* secret) { + if (cred_len == 0) return false; + const char* creds = (const char*)buf; + const char* colon = memchr(creds, ':', cred_len); + const char* pw = colon ? colon + 1 : creds; + size_t pw_len = colon ? (size_t)(cred_len - (pw - creds)) : cred_len; + if (pw_len > 0 && pw[pw_len - 1] == '\0') pw_len--; + if (pw_len > 255) pw_len = 255; + + /* Zero-pad pw into a 256-byte buffer, then compare all 256 bytes + * against the secret buffer (also 256 bytes, zero-padded at init). + * Matching passwords produce identical 256-byte buffers. */ + uint8_t pw_buf[256] = {0}; + memcpy(pw_buf, pw, pw_len); + return ct_eq(pw_buf, secret, 256); +} + +static void send_response(ray_sock_t fd, ray_t* result) +{ + int64_t ser_size = ray_serde_size(result); + if (ser_size <= 0) return; + + uint8_t* payload = (uint8_t*)ray_sys_alloc((size_t)ser_size); + if (!payload) return; + ray_ser_raw(payload, result); + + uint8_t* send_buf = NULL; + size_t send_len = 0; + uint8_t flags = 0; + + if ((size_t)ser_size > RAY_IPC_COMPRESS_THRESHOLD) { + uint8_t* comp = (uint8_t*)ray_sys_alloc((size_t)ser_size); + if (comp) { + size_t clen = ray_ipc_compress(payload, (size_t)ser_size, + comp, (size_t)ser_size); + if (clen > 0 && clen + 4 < (size_t)ser_size) { + send_len = clen + 4; + send_buf = (uint8_t*)ray_sys_alloc(send_len); + if (send_buf) { + uint32_t uncomp = (uint32_t)ser_size; + memcpy(send_buf, &uncomp, 4); + memcpy(send_buf + 4, comp, clen); + flags = RAY_IPC_FLAG_COMPRESSED; + } + } + ray_sys_free(comp); + } + } + + if (!send_buf) { + send_buf = payload; + send_len = (size_t)ser_size; + payload = NULL; + } + + ray_ipc_header_t hdr = { + .prefix = RAY_SERDE_PREFIX, + .version = RAY_SERDE_WIRE_VERSION, + .flags = flags, + .endian = 0, + .msgtype = RAY_IPC_MSG_RESP, + .size = (int64_t)send_len, + }; + ray_sock_send(fd, &hdr, sizeof(hdr)); + ray_sock_send(fd, send_buf, send_len); + + ray_sys_free(send_buf); + if (payload) ray_sys_free(payload); +} + +static ray_t* eval_payload(uint8_t* payload, size_t payload_len, + ray_ipc_header_t* hdr) +{ + /* Journal hook: log every inbound SYNC message (state-mutation + * channel in q's model) before evaluation, so a crash mid-handler + * still leaves the message on disk for replay. We write the raw + * inbound bytes — header + payload — verbatim, no decompression + * round-trip. Async messages and responses are not logged, so + * background pings and result frames don't pollute the log. + * No-op when no journal is open or during in-progress replay. + * + * RAY_IPC_FLAG_RESTRICTED is captured into a LOCAL header copy: + * we mark the persisted frame with the connection's restricted + * state at write time so replay can re-impose it. Without this + * a `-U` client's writes silently elevate to full privilege on + * crash-recovery, since replay runs on the main thread with no + * IPC connection context. The bit is meaningless on the live + * IPC wire and doesn't affect this handler's eval — that uses + * the connection's own flag set by the caller above us. + * + * If the journal write fails (disk full, EIO), we ABORT the + * eval and return an error to the client. q's documented + * behaviour: "the message has not been logged so we cannot + * accept it". Silently evaluating un-logged mutations defeats + * the entire durability premise of `-l`/`-L`. */ + if (ray_journal_is_open() && hdr->msgtype == RAY_IPC_MSG_SYNC) { + ray_ipc_header_t log_hdr = *hdr; + if (ray_eval_get_restricted()) + log_hdr.flags |= RAY_IPC_FLAG_RESTRICTED; + ray_err_t je = ray_journal_write_bytes(&log_hdr, payload, (int64_t)payload_len); + if (je != RAY_OK) { + fprintf(stderr, "log: ERROR journal write failed (rc=%d) — refusing to evaluate\n", (int)je); + return ray_error("io", "journal write failed; mutation refused"); + } + } + + uint8_t* decompressed = NULL; + if (hdr->flags & RAY_IPC_FLAG_COMPRESSED) { + if (payload_len < 4) return NULL; + uint32_t uncomp_size; + memcpy(&uncomp_size, payload, 4); + if (uncomp_size == 0 || uncomp_size > 256u * 1024u * 1024u) return NULL; + decompressed = (uint8_t*)ray_sys_alloc(uncomp_size); + if (!decompressed) return NULL; + size_t dlen = ray_ipc_decompress(payload + 4, payload_len - 4, + decompressed, uncomp_size); + if (dlen != uncomp_size) { + ray_sys_free(decompressed); + return NULL; + } + payload = decompressed; + payload_len = uncomp_size; + } + + int64_t de_len = (int64_t)payload_len; + ray_t* msg = ray_de_raw(payload, &de_len); + if (decompressed) ray_sys_free(decompressed); + + ray_t* result = NULL; + if (msg && !RAY_IS_ERR(msg)) { + if (msg->type == -RAY_STR) { + const char* str = ray_str_ptr(msg); + size_t slen = ray_str_len(msg); + if (str && slen > 0) { + char* tmp = (char*)ray_sys_alloc(slen + 1); + if (tmp) { + memcpy(tmp, str, slen); + tmp[slen] = '\0'; + result = ray_eval_str(tmp); + ray_sys_free(tmp); + } + } + ray_release(msg); + } else { + result = ray_eval(msg); + ray_release(msg); + } + } + return result ? result : RAY_NULL_OBJ; +} + +/* ====================================================================== + * Poll-based IPC (new API) + * ====================================================================== */ + +/* Per-connection state stored in selector->data */ +typedef struct { + ray_ipc_header_t hdr; + uint8_t phase; + int64_t listener_id; /* id of the listener selector */ + bool auth_required; /* server has -u/-U */ + bool restricted; /* server has -U */ +} ray_ipc_conn_data_t; + +static ray_t* ipc_read_handshake(ray_poll_t* poll, ray_selector_t* sel); +static ray_t* ipc_read_creds(ray_poll_t* poll, ray_selector_t* sel); +static ray_t* ipc_read_header(ray_poll_t* poll, ray_selector_t* sel); +static ray_t* ipc_read_payload(ray_poll_t* poll, ray_selector_t* sel); +static ray_t* ipc_on_data(ray_poll_t* poll, ray_selector_t* sel, void* data); +static void ipc_on_close(ray_poll_t* poll, ray_selector_t* sel); + +/* Wrappers matching ray_io_fn signature for socket recv/send */ +static int64_t ipc_recv_fn(int64_t fd, uint8_t* buf, int64_t len) { + return ray_sock_recv((ray_sock_t)fd, buf, (size_t)len); +} +static int64_t ipc_send_fn(int64_t fd, uint8_t* buf, int64_t len) { + return ray_sock_send((ray_sock_t)fd, buf, (size_t)len); +} + +/* Accept callback — called when listener fd is readable */ +static ray_t* ipc_accept(ray_poll_t* poll, ray_selector_t* sel) +{ + ray_sock_t new_fd = ray_sock_accept((ray_sock_t)sel->fd); + if (new_fd == RAY_INVALID_SOCK) return NULL; + ray_sock_set_nonblocking(new_fd); + + ray_ipc_conn_data_t* cd = (ray_ipc_conn_data_t*)ray_sys_alloc( + sizeof(ray_ipc_conn_data_t)); + if (!cd) { ray_sock_close(new_fd); return NULL; } + memset(cd, 0, sizeof(*cd)); + cd->phase = RAY_IPC_PHASE_HANDSHAKE; + cd->listener_id = sel->id; + cd->auth_required = (poll->auth_secret[0] != '\0'); + cd->restricted = poll->restricted; + + ray_poll_reg_t reg = {0}; + reg.fd = (int64_t)new_fd; + reg.type = RAY_SEL_SOCKET; + reg.recv_fn = ipc_recv_fn; + reg.send_fn = ipc_send_fn; + reg.read_fn = ipc_read_handshake; + reg.data_fn = ipc_on_data; + reg.close_fn = ipc_on_close; + reg.data = cd; + + int64_t id = ray_poll_register(poll, ®); + if (id < 0) { + ray_sock_close(new_fd); + ray_sys_free(cd); + return NULL; + } + + /* Request 2 bytes for handshake */ + ray_selector_t* ns = ray_poll_get(poll, id); + if (ns) ray_poll_rx_request(poll, ns, 2); + + return NULL; +} + +static ray_t* ipc_read_handshake(ray_poll_t* poll, ray_selector_t* sel) +{ + if (!sel->rx.buf || sel->rx.buf->offset < 2) return NULL; + ray_ipc_conn_data_t* cd = (ray_ipc_conn_data_t*)sel->data; + + /* Refuse peers speaking a different wire version BEFORE we commit to + * exchanging any serialized payloads. Without this check a new + * server would happily send v3-layout values to a v2 client, which + * would misparse every atom after the version-bump byte. */ + if (sel->rx.buf->data[0] != RAY_SERDE_WIRE_VERSION) { + ray_poll_deregister(poll, sel->id); + return NULL; + } + + /* Send handshake response: version + auth_required flag */ + uint8_t resp[2] = { RAY_SERDE_WIRE_VERSION, cd->auth_required ? 0x01 : 0x00 }; + ray_sock_send((ray_sock_t)sel->fd, resp, 2); + + if (cd->auth_required) { + cd->phase = RAY_IPC_PHASE_HANDSHAKE; + sel->rx.read_fn = ipc_read_creds; + ray_poll_rx_request(poll, sel, 1); /* length byte first */ + return NULL; + } + + cd->phase = RAY_IPC_PHASE_HEADER; + sel->rx.read_fn = ipc_read_header; + ray_poll_rx_request(poll, sel, sizeof(ray_ipc_header_t)); + return NULL; +} + +static ray_t* ipc_read_creds(ray_poll_t* poll, ray_selector_t* sel) +{ + if (!sel->rx.buf || sel->rx.buf->offset < 1) return NULL; + uint8_t cred_len = sel->rx.buf->data[0]; + + if (sel->rx.buf->offset < 1 + cred_len) { + ray_poll_rx_request(poll, sel, 1 + cred_len); + return NULL; + } + + ray_ipc_conn_data_t* cd = (ray_ipc_conn_data_t*)sel->data; + + bool ok = validate_creds(sel->rx.buf->data + 1, cred_len, + poll->auth_secret); + uint8_t result = ok ? 0x00 : 0x01; + ray_sock_send((ray_sock_t)sel->fd, &result, 1); + + if (!ok) { + ray_poll_deregister(poll, sel->id); + return NULL; + } + + cd->phase = RAY_IPC_PHASE_HEADER; + sel->rx.read_fn = ipc_read_header; + ray_poll_rx_request(poll, sel, sizeof(ray_ipc_header_t)); + return NULL; +} + +static ray_t* ipc_read_header(ray_poll_t* poll, ray_selector_t* sel) +{ + if (!sel->rx.buf || + sel->rx.buf->offset < (int64_t)sizeof(ray_ipc_header_t)) + return NULL; + + ray_ipc_conn_data_t* cd = (ray_ipc_conn_data_t*)sel->data; + memcpy(&cd->hdr, sel->rx.buf->data, sizeof(ray_ipc_header_t)); + + if (cd->hdr.prefix != RAY_SERDE_PREFIX || + cd->hdr.version != RAY_SERDE_WIRE_VERSION || + cd->hdr.size <= 0 || + cd->hdr.size > 256 * 1024 * 1024) { + ray_poll_deregister(poll, sel->id); + return NULL; + } + + cd->phase = RAY_IPC_PHASE_PAYLOAD; + sel->rx.read_fn = ipc_read_payload; + ray_poll_rx_request(poll, sel, cd->hdr.size); + + return NULL; +} + +static ray_t* ipc_read_payload(ray_poll_t* poll, ray_selector_t* sel) +{ + ray_ipc_conn_data_t* cd = (ray_ipc_conn_data_t*)sel->data; + + if (!sel->rx.buf || sel->rx.buf->offset < cd->hdr.size) + return NULL; + + bool prev_restricted = ray_eval_get_restricted(); + ray_eval_set_restricted(cd->restricted); + + /* Eval and produce result */ + ray_t* result = eval_payload(sel->rx.buf->data, + (size_t)sel->rx.buf->offset, &cd->hdr); + + ray_eval_set_restricted(prev_restricted); + + /* Send response for sync messages */ + if (cd->hdr.msgtype == RAY_IPC_MSG_SYNC) + send_response((ray_sock_t)sel->fd, result); + if (result != RAY_NULL_OBJ) ray_release(result); + + /* Reset for next message */ + cd->phase = RAY_IPC_PHASE_HEADER; + sel->rx.read_fn = ipc_read_header; + ray_poll_rx_request(poll, sel, sizeof(ray_ipc_header_t)); + + return NULL; +} + +static ray_t* ipc_on_data(ray_poll_t* poll, ray_selector_t* sel, void* data) +{ + (void)poll; (void)sel; (void)data; + return NULL; +} + +static void ipc_on_close(ray_poll_t* poll, ray_selector_t* sel) +{ + (void)poll; + if (sel->data) { + ray_sys_free(sel->data); + sel->data = NULL; + } + ray_sock_close((ray_sock_t)sel->fd); +} + +int64_t ray_ipc_listen(ray_poll_t* poll, uint16_t port) +{ + if (!poll) return -1; + + ray_sock_t fd = ray_sock_listen(port); + if (fd == RAY_INVALID_SOCK) return -1; + ray_sock_set_nonblocking(fd); + + ray_poll_reg_t reg = {0}; + reg.fd = (int64_t)fd; + reg.type = RAY_SEL_SOCKET; + reg.read_fn = ipc_accept; + reg.close_fn = ipc_on_close; + + int64_t id = ray_poll_register(poll, ®); + if (id < 0) { + ray_sock_close(fd); + return -1; + } + return id; +} + +/* ====================================================================== + * Server API + * ====================================================================== */ + +static void conn_close(ray_ipc_server_t* srv, ray_ipc_conn_t* c) +{ +#if defined(__linux__) + epoll_ctl(srv->poll_fd, EPOLL_CTL_DEL, c->fd, NULL); +#elif defined(__APPLE__) + struct kevent kev; + EV_SET(&kev, c->fd, EVFILT_READ, EV_DELETE, 0, 0, NULL); + kevent(srv->poll_fd, &kev, 1, NULL, 0, NULL); +#else + (void)srv; +#endif + + ray_sock_close(c->fd); + if (c->rx_buf) ray_sys_free(c->rx_buf); + c->fd = RAY_INVALID_SOCK; + c->rx_buf = NULL; + c->rx_len = 0; + c->rx_need = 0; + + uint32_t idx = (uint32_t)(c - srv->conns); + if (idx + 1 < srv->n_conns) + srv->conns[idx] = srv->conns[srv->n_conns - 1]; + if (srv->n_conns > 0) srv->n_conns--; +} + +static void conn_on_handshake(ray_ipc_server_t* srv, ray_ipc_conn_t* c) +{ + /* Refuse peers speaking a different wire version up front — see the + * matching check in ipc_read_handshake. */ + if (!c->rx_buf || c->rx_buf[0] != RAY_SERDE_WIRE_VERSION) { + conn_close(srv, c); + return; + } + + bool auth_req = (srv->auth_secret[0] != '\0'); + uint8_t resp[2] = { RAY_SERDE_WIRE_VERSION, auth_req ? 0x01 : 0x00 }; + ray_sock_send(c->fd, resp, 2); + + ray_sys_free(c->rx_buf); + c->rx_buf = NULL; + c->rx_len = 0; + + if (auth_req) { + c->rx_need = 1; /* length byte */ + c->phase = RAY_IPC_PHASE_CREDS; + return; + } + + c->rx_need = sizeof(ray_ipc_header_t); + c->phase = RAY_IPC_PHASE_HEADER; +} + +static void conn_on_header(ray_ipc_server_t* srv, ray_ipc_conn_t* c) +{ + memcpy(&c->hdr, c->rx_buf, sizeof(ray_ipc_header_t)); + + if (c->hdr.prefix != RAY_SERDE_PREFIX) { conn_close(srv, c); return; } + if (c->hdr.version != RAY_SERDE_WIRE_VERSION) { conn_close(srv, c); return; } + if (c->hdr.size <= 0) { conn_close(srv, c); return; } + if (c->hdr.size > 256 * 1024 * 1024) { conn_close(srv, c); return; } + + ray_sys_free(c->rx_buf); + c->rx_buf = (uint8_t*)ray_sys_alloc((size_t)c->hdr.size); + if (!c->rx_buf) { conn_close(srv, c); return; } + c->rx_len = 0; + c->rx_need = (size_t)c->hdr.size; + c->phase = RAY_IPC_PHASE_PAYLOAD; +} + +static void conn_on_payload(ray_ipc_server_t* srv, ray_ipc_conn_t* c) +{ + bool prev = ray_eval_get_restricted(); + ray_eval_set_restricted(srv->restricted); + + ray_t* result = eval_payload(c->rx_buf, c->rx_len, &c->hdr); + + ray_eval_set_restricted(prev); + + if (c->hdr.msgtype == RAY_IPC_MSG_SYNC) + send_response(c->fd, result); + if (result != RAY_NULL_OBJ) ray_release(result); + + ray_sys_free(c->rx_buf); + c->rx_buf = NULL; + c->rx_len = 0; + c->rx_need = sizeof(ray_ipc_header_t); + c->phase = RAY_IPC_PHASE_HEADER; +} + +static void conn_on_creds(ray_ipc_server_t* srv, ray_ipc_conn_t* c) +{ + if (c->rx_len == 1) { + /* Got length byte — reallocate buffer for full credential */ + uint8_t cred_len = c->rx_buf[0]; + size_t need = 1 + (size_t)cred_len; + uint8_t* newbuf = (uint8_t*)ray_sys_alloc(need); + if (!newbuf) { conn_close(srv, c); return; } + newbuf[0] = cred_len; + ray_sys_free(c->rx_buf); + c->rx_buf = newbuf; + c->rx_need = need; + return; + } + + uint8_t cred_len = c->rx_buf[0]; + bool ok = validate_creds(c->rx_buf + 1, cred_len, srv->auth_secret); + + uint8_t result = ok ? 0x00 : 0x01; + ray_sock_send(c->fd, &result, 1); + + if (!ok) { + conn_close(srv, c); + return; + } + + ray_sys_free(c->rx_buf); + c->rx_buf = NULL; + c->rx_len = 0; + c->rx_need = sizeof(ray_ipc_header_t); + c->phase = RAY_IPC_PHASE_HEADER; +} + +static void conn_on_readable(ray_ipc_server_t* srv, ray_ipc_conn_t* c) +{ + if (!c->rx_buf) { + c->rx_buf = (uint8_t*)ray_sys_alloc(c->rx_need); + if (!c->rx_buf) { conn_close(srv, c); return; } + } + + int64_t n = ray_sock_recv(c->fd, c->rx_buf + c->rx_len, + c->rx_need - c->rx_len); + if (n <= 0) { conn_close(srv, c); return; } + c->rx_len += (size_t)n; + + if (c->rx_len < c->rx_need) return; + + switch (c->phase) { + case RAY_IPC_PHASE_HANDSHAKE: conn_on_handshake(srv, c); break; + case RAY_IPC_PHASE_CREDS: conn_on_creds(srv, c); break; + case RAY_IPC_PHASE_HEADER: conn_on_header(srv, c); break; + case RAY_IPC_PHASE_PAYLOAD: conn_on_payload(srv, c); break; + } +} + +ray_err_t ray_ipc_server_init(ray_ipc_server_t* srv, uint16_t port) +{ + memset(srv, 0, sizeof(*srv)); + srv->listen_fd = ray_sock_listen(port); + if (srv->listen_fd == RAY_INVALID_SOCK) return RAY_ERR_IO; + ray_sock_set_nonblocking(srv->listen_fd); + +#if defined(__linux__) + srv->poll_fd = epoll_create1(0); + if (srv->poll_fd < 0) { + ray_sock_close(srv->listen_fd); + return RAY_ERR_IO; + } + struct epoll_event ev = { .events = EPOLLIN, .data.fd = srv->listen_fd }; + epoll_ctl(srv->poll_fd, EPOLL_CTL_ADD, srv->listen_fd, &ev); +#elif defined(__APPLE__) + srv->poll_fd = kqueue(); + if (srv->poll_fd < 0) { + ray_sock_close(srv->listen_fd); + return RAY_ERR_IO; + } + struct kevent kev; + EV_SET(&kev, srv->listen_fd, EVFILT_READ, EV_ADD, 0, 0, NULL); + kevent(srv->poll_fd, &kev, 1, NULL, 0, NULL); +#else + srv->poll_fd = -1; +#endif + + srv->running = true; + return RAY_OK; +} + +void ray_ipc_server_destroy(ray_ipc_server_t* srv) +{ + for (uint32_t i = 0; i < srv->n_conns; i++) { + ray_ipc_conn_t* c = &srv->conns[i]; + if (c->fd != RAY_INVALID_SOCK) { + if (c->rx_buf) ray_sys_free(c->rx_buf); + ray_sock_close(c->fd); + } + } + srv->n_conns = 0; + + ray_sock_close(srv->listen_fd); + srv->listen_fd = RAY_INVALID_SOCK; + + if (srv->poll_fd >= 0) { +#ifndef RAY_OS_WINDOWS + close(srv->poll_fd); +#endif + } + srv->poll_fd = -1; + srv->running = false; +} + +int ray_ipc_poll(ray_ipc_server_t* srv, int timeout_ms) +{ + int ready = 0; + +#if defined(__linux__) + struct epoll_event events[RAY_IPC_MAX_EVENTS]; + int nfds = epoll_wait(srv->poll_fd, events, RAY_IPC_MAX_EVENTS, timeout_ms); + if (nfds < 0) return (errno == EINTR) ? 0 : -1; + + for (int i = 0; i < nfds; i++) { + int fd = events[i].data.fd; + + if (fd == srv->listen_fd) { + ray_sock_t new_fd = ray_sock_accept(srv->listen_fd); + if (new_fd == RAY_INVALID_SOCK) continue; + ray_sock_set_nonblocking(new_fd); + if (srv->n_conns >= RAY_IPC_MAX_CONNS) { + ray_sock_close(new_fd); + continue; + } + ray_ipc_conn_t* c = &srv->conns[srv->n_conns++]; + c->fd = new_fd; + c->rx_buf = NULL; + c->rx_len = 0; + c->rx_need = 2; + c->phase = RAY_IPC_PHASE_HANDSHAKE; + struct epoll_event cev = { .events = EPOLLIN, .data.fd = new_fd }; + epoll_ctl(srv->poll_fd, EPOLL_CTL_ADD, new_fd, &cev); + } else { + bool found = false; + for (uint32_t j = 0; j < srv->n_conns; j++) { + if (srv->conns[j].fd == fd) { + conn_on_readable(srv, &srv->conns[j]); + found = true; + break; + } + } + if (!found) ready++; + } + } + +#elif defined(__APPLE__) + struct kevent events[RAY_IPC_MAX_EVENTS]; + struct timespec ts; + struct timespec* tsp = NULL; + if (timeout_ms >= 0) { + ts.tv_sec = timeout_ms / 1000; + ts.tv_nsec = (timeout_ms % 1000) * 1000000L; + tsp = &ts; + } + int nfds = kevent(srv->poll_fd, NULL, 0, events, RAY_IPC_MAX_EVENTS, tsp); + if (nfds < 0) return (errno == EINTR) ? 0 : -1; + + for (int i = 0; i < nfds; i++) { + int fd = (int)events[i].ident; + + if (fd == srv->listen_fd) { + ray_sock_t new_fd = ray_sock_accept(srv->listen_fd); + if (new_fd == RAY_INVALID_SOCK) continue; + ray_sock_set_nonblocking(new_fd); + if (srv->n_conns >= RAY_IPC_MAX_CONNS) { + ray_sock_close(new_fd); + continue; + } + ray_ipc_conn_t* c = &srv->conns[srv->n_conns++]; + c->fd = new_fd; + c->rx_buf = NULL; + c->rx_len = 0; + c->rx_need = 2; + c->phase = RAY_IPC_PHASE_HANDSHAKE; + struct kevent kev; + EV_SET(&kev, new_fd, EVFILT_READ, EV_ADD, 0, 0, NULL); + kevent(srv->poll_fd, &kev, 1, NULL, 0, NULL); + } else { + bool found = false; + for (uint32_t j = 0; j < srv->n_conns; j++) { + if (srv->conns[j].fd == fd) { + conn_on_readable(srv, &srv->conns[j]); + found = true; + break; + } + } + if (!found) ready++; + } + } + +#else /* Windows: select-based fallback */ + fd_set rfds; + FD_ZERO(&rfds); + FD_SET(srv->listen_fd, &rfds); + ray_sock_t maxfd = srv->listen_fd; + for (uint32_t i = 0; i < srv->n_conns; i++) { + FD_SET(srv->conns[i].fd, &rfds); + if (srv->conns[i].fd > maxfd) maxfd = srv->conns[i].fd; + } + + struct timeval tv; + struct timeval* tvp = NULL; + if (timeout_ms >= 0) { + tv.tv_sec = timeout_ms / 1000; + tv.tv_usec = (timeout_ms % 1000) * 1000; + tvp = &tv; + } + + int nfds = select((int)(maxfd + 1), &rfds, NULL, NULL, tvp); + if (nfds < 0) return (errno == EINTR) ? 0 : -1; + + if (FD_ISSET(srv->listen_fd, &rfds)) { + ray_sock_t new_fd = ray_sock_accept(srv->listen_fd); + if (new_fd != RAY_INVALID_SOCK) { + ray_sock_set_nonblocking(new_fd); + if (srv->n_conns >= RAY_IPC_MAX_CONNS) { + ray_sock_close(new_fd); + } else { + ray_ipc_conn_t* c = &srv->conns[srv->n_conns++]; + c->fd = new_fd; + c->rx_buf = NULL; + c->rx_len = 0; + c->rx_need = 2; + c->phase = RAY_IPC_PHASE_HANDSHAKE; + } + } + } + + for (uint32_t i = srv->n_conns; i > 0; ) { + --i; + if (srv->conns[i].fd != RAY_INVALID_SOCK && FD_ISSET(srv->conns[i].fd, &rfds)) + conn_on_readable(srv, &srv->conns[i]); + } +#endif + + return ready; +} + +/* ===== Client API ===== */ + +static ray_sock_t g_client_fds[RAY_IPC_MAX_CONNS]; +static int g_client_count = 0; +static bool g_client_init = false; + +static void client_init(void) { + if (g_client_init) return; + for (int i = 0; i < RAY_IPC_MAX_CONNS; i++) + g_client_fds[i] = RAY_INVALID_SOCK; + g_client_init = true; +} + +static int64_t recv_full(ray_sock_t fd, void* buf, size_t len) { + size_t total = 0; + while (total < len) { + int64_t n = ray_sock_recv(fd, (uint8_t*)buf + total, len - total); + if (n <= 0) return -1; + total += (size_t)n; + } + return (int64_t)total; +} + +static int64_t client_send_msg(int64_t handle, ray_t* msg, uint8_t msgtype) +{ + if (handle < 0 || handle >= RAY_IPC_MAX_CONNS) return -2; + ray_sock_t fd = g_client_fds[handle]; + if (fd == RAY_INVALID_SOCK) return -2; + + int64_t ser_size = ray_serde_size(msg); + if (ser_size <= 0) return -1; + + uint8_t* payload = (uint8_t*)ray_sys_alloc((size_t)ser_size); + if (!payload) return -1; + ray_ser_raw(payload, msg); + + uint8_t* send_buf = NULL; + size_t send_len = 0; + uint8_t flags = 0; + + if ((size_t)ser_size > RAY_IPC_COMPRESS_THRESHOLD) { + uint8_t* comp = (uint8_t*)ray_sys_alloc((size_t)ser_size); + if (comp) { + size_t clen = ray_ipc_compress(payload, (size_t)ser_size, + comp, (size_t)ser_size); + if (clen > 0 && clen + 4 < (size_t)ser_size) { + send_len = clen + 4; + send_buf = (uint8_t*)ray_sys_alloc(send_len); + if (send_buf) { + uint32_t uncomp = (uint32_t)ser_size; + memcpy(send_buf, &uncomp, 4); + memcpy(send_buf + 4, comp, clen); + flags = RAY_IPC_FLAG_COMPRESSED; + } + } + ray_sys_free(comp); + } + } + + if (!send_buf) { + send_buf = payload; + send_len = (size_t)ser_size; + payload = NULL; + } + + ray_ipc_header_t hdr = { + .prefix = RAY_SERDE_PREFIX, + .version = RAY_SERDE_WIRE_VERSION, + .flags = flags, + .endian = 0, + .msgtype = msgtype, + .size = (int64_t)send_len, + }; + + int64_t rc = ray_sock_send(fd, &hdr, sizeof(hdr)); + if (rc < 0) { ray_sys_free(send_buf); if (payload) ray_sys_free(payload); return -1; } + rc = ray_sock_send(fd, send_buf, send_len); + + ray_sys_free(send_buf); + if (payload) ray_sys_free(payload); + return rc < 0 ? -1 : 0; +} + +int64_t ray_ipc_connect(const char* host, uint16_t port, + const char* user, const char* password) +{ + client_init(); + + ray_sock_t fd = ray_sock_connect(host, port, 5000); + if (fd == RAY_INVALID_SOCK) return -1; + + uint8_t hs[2] = { RAY_SERDE_WIRE_VERSION, 0x00 }; + if (ray_sock_send(fd, hs, 2) < 0) { + ray_sock_close(fd); + return -1; + } + + uint8_t resp[2]; + if (recv_full(fd, resp, 2) < 0) { + ray_sock_close(fd); + return -1; + } + + /* Refuse a peer that speaks a different wire version. This gives + * the new client an explicit error at connect time rather than + * silently sending a v3 payload to a server that would misparse + * every atom. */ + if (resp[0] != RAY_SERDE_WIRE_VERSION) { + ray_sock_close(fd); + return -4; /* wire version mismatch */ + } + + /* Auth required? */ + if (resp[1] == 0x01) { + if (!password) { + ray_sock_close(fd); + return -2; /* auth required but no creds */ + } + char cred[256]; + int cred_len; + if (user && user[0]) + cred_len = snprintf(cred, sizeof(cred), "%s:%s", user, password); + else + cred_len = snprintf(cred, sizeof(cred), ":%s", password); + if (cred_len < 0 || cred_len >= (int)sizeof(cred)) { + ray_sock_close(fd); + return -1; + } + cred_len++; /* include null terminator */ + uint8_t len_byte = (uint8_t)cred_len; + if (ray_sock_send(fd, &len_byte, 1) < 0 || + ray_sock_send(fd, cred, cred_len) < 0) { + ray_sock_close(fd); + return -1; + } + uint8_t auth_result; + if (recv_full(fd, &auth_result, 1) < 0 || auth_result != 0x00) { + ray_sock_close(fd); + return -3; /* auth rejected */ + } + } else if (resp[1] != 0x00) { + ray_sock_close(fd); + return -1; + } + +#ifdef RAY_OS_WINDOWS + { DWORD z = 0; + setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (const char*)&z, sizeof(z)); + setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, (const char*)&z, sizeof(z)); } +#else + { struct timeval z = {0, 0}; + setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &z, sizeof(z)); + setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &z, sizeof(z)); } +#endif + + for (int i = 0; i < RAY_IPC_MAX_CONNS; i++) { + if (g_client_fds[i] == RAY_INVALID_SOCK) { + g_client_fds[i] = fd; + if (i >= g_client_count) g_client_count = i + 1; + return (int64_t)i; + } + } + + ray_sock_close(fd); + return -1; +} + +void ray_ipc_close(int64_t handle) +{ + if (handle < 0 || handle >= RAY_IPC_MAX_CONNS) return; + if (g_client_fds[handle] == RAY_INVALID_SOCK) return; + ray_sock_close(g_client_fds[handle]); + g_client_fds[handle] = RAY_INVALID_SOCK; +} + +ray_t* ray_ipc_send(int64_t handle, ray_t* msg) +{ + { int64_t sr = client_send_msg(handle, msg, RAY_IPC_MSG_SYNC); + if (sr == -2) return ray_error("io", "connection closed"); + if (sr < 0) return ray_error("io", "ipc send failed"); } + + ray_sock_t fd = g_client_fds[handle]; + + ray_ipc_header_t hdr; + if (recv_full(fd, &hdr, sizeof(hdr)) < 0) { + ray_ipc_close(handle); + return ray_error("io", "ipc recv header failed"); + } + if (hdr.prefix != RAY_SERDE_PREFIX || hdr.size <= 0) { + ray_ipc_close(handle); + return ray_error("io", "ipc bad response header"); + } + if (hdr.version != RAY_SERDE_WIRE_VERSION) { + ray_ipc_close(handle); + return ray_error("version", "ipc peer wire version mismatch"); + } + if (hdr.size > 256 * 1024 * 1024) { + ray_ipc_close(handle); + return ray_error("io", "ipc response too large"); + } + + uint8_t* payload = (uint8_t*)ray_sys_alloc((size_t)hdr.size); + if (!payload) return ray_error("oom", NULL); + if (recv_full(fd, payload, (size_t)hdr.size) < 0) { + ray_sys_free(payload); + ray_ipc_close(handle); + return ray_error("io", "ipc recv payload failed"); + } + + uint8_t* deser_buf = payload; + size_t deser_len = (size_t)hdr.size; + uint8_t* decompressed = NULL; + + if (hdr.flags & RAY_IPC_FLAG_COMPRESSED) { + if (deser_len < 4) { ray_sys_free(payload); return ray_error("io", "ipc compressed payload too short"); } + uint32_t uncomp_size; + memcpy(&uncomp_size, payload, 4); + decompressed = (uint8_t*)ray_sys_alloc(uncomp_size); + if (!decompressed) { ray_sys_free(payload); return ray_error("oom", NULL); } + size_t dlen = ray_ipc_decompress(payload + 4, deser_len - 4, + decompressed, uncomp_size); + if (dlen != uncomp_size) { + ray_sys_free(decompressed); + ray_sys_free(payload); + return ray_error("io", "ipc decompress failed"); + } + deser_buf = decompressed; + deser_len = uncomp_size; + } + + int64_t de_len = (int64_t)deser_len; + ray_t* result = ray_de_raw(deser_buf, &de_len); + + if (decompressed) ray_sys_free(decompressed); + ray_sys_free(payload); + + return result ? result : RAY_NULL_OBJ; +} + +ray_err_t ray_ipc_send_async(int64_t handle, ray_t* msg) +{ + if (client_send_msg(handle, msg, RAY_IPC_MSG_ASYNC) < 0) + return RAY_ERR_IO; + return RAY_OK; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/ipc.h b/crates/rayforce-sys/vendor/rayforce/src/core/ipc.h new file mode 100644 index 0000000..ec10ddf --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/ipc.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_IPC_H +#define RAY_IPC_H + +#include +#include "core/poll.h" +#include "core/sock.h" +#include "store/serde.h" + +/* ===== Compression ===== */ + +#define RAY_IPC_COMPRESS_THRESHOLD 2000 + +size_t ray_ipc_compress(const uint8_t* src, size_t len, + uint8_t* dst, size_t dst_cap); +size_t ray_ipc_decompress(const uint8_t* src, size_t clen, + uint8_t* dst, size_t dst_len); + +/* ===== Message types ===== */ + +#define RAY_IPC_MSG_ASYNC 0 +#define RAY_IPC_MSG_SYNC 1 +#define RAY_IPC_MSG_RESP 2 + +#define RAY_IPC_FLAG_COMPRESSED 0x01 +/* Set by the journal hook in core/ipc.c eval_payload when the inbound + * IPC message arrived on a `-U` restricted connection. Used ONLY for + * persisted log frames; the live IPC path ignores it (the connection's + * restricted state is the source of truth there). Replay reads the + * bit to re-impose the original sender's restrictions, otherwise a + * crash + restart silently elevates restricted commands to full + * privilege. */ +#define RAY_IPC_FLAG_RESTRICTED 0x02 +#define RAY_IPC_MAX_CONNS 256 + +/* ===== Poll-based IPC (new API) ===== */ + +/* Register IPC listener on poll. Returns selector id or -1. */ +int64_t ray_ipc_listen(ray_poll_t* poll, uint16_t port); + +/* ===== Legacy server API (wraps poll internally for tests) ===== */ + +typedef struct ray_ipc_conn { + ray_sock_t fd; + uint8_t* rx_buf; + size_t rx_len; + size_t rx_need; + uint8_t phase; + ray_ipc_header_t hdr; +} ray_ipc_conn_t; + +typedef struct ray_ipc_server { + ray_sock_t listen_fd; + int poll_fd; + ray_ipc_conn_t conns[RAY_IPC_MAX_CONNS]; + uint32_t n_conns; + bool running; + char auth_secret[256]; /* password from -u/-U */ + bool restricted; /* -U mode */ +} ray_ipc_server_t; + +ray_err_t ray_ipc_server_init(ray_ipc_server_t* srv, uint16_t port); +void ray_ipc_server_destroy(ray_ipc_server_t* srv); +int ray_ipc_poll(ray_ipc_server_t* srv, int timeout_ms); + +/* ===== Client API (blocking, no poll needed) ===== */ + +int64_t ray_ipc_connect(const char* host, uint16_t port, + const char* user, const char* password); +void ray_ipc_close(int64_t handle); +ray_t* ray_ipc_send(int64_t handle, ray_t* msg); +ray_err_t ray_ipc_send_async(int64_t handle, ray_t* msg); + +#endif /* RAY_IPC_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/kqueue.c b/crates/rayforce-sys/vendor/rayforce/src/core/kqueue.c new file mode 100644 index 0000000..4c76021 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/kqueue.c @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if defined(__APPLE__) + +#include "core/poll.h" +#include "mem/sys.h" +#include +#include +#include +#include + +#define RAY_POLL_MAX_EVENTS 64 +#define RAY_POLL_INITIAL_CAP 16 + +ray_poll_t* ray_poll_create(void) +{ + int fd = kqueue(); + if (fd < 0) return NULL; + + ray_poll_t* poll = (ray_poll_t*)ray_sys_alloc(sizeof(ray_poll_t)); + if (!poll) { close(fd); return NULL; } + + memset(poll, 0, sizeof(*poll)); + poll->fd = fd; + poll->code = -1; + poll->sel_cap = RAY_POLL_INITIAL_CAP; + poll->sels = (ray_selector_t**)ray_sys_alloc( + poll->sel_cap * sizeof(ray_selector_t*)); + if (!poll->sels) { + close(fd); + ray_sys_free(poll); + return NULL; + } + memset(poll->sels, 0, poll->sel_cap * sizeof(ray_selector_t*)); + return poll; +} + +void ray_poll_destroy(ray_poll_t* poll) +{ + if (!poll) return; + + for (uint32_t i = 0; i < poll->n_sels; i++) { + ray_selector_t* sel = poll->sels[i]; + if (!sel) continue; + if (sel->close_fn) sel->close_fn(poll, sel); + struct kevent kev; + EV_SET(&kev, (uintptr_t)sel->fd, EVFILT_READ, EV_DELETE, 0, 0, NULL); + kevent((int)poll->fd, &kev, 1, NULL, 0, NULL); + if (sel->rx.buf) ray_poll_buf_free(sel->rx.buf); + ray_poll_buf_free(sel->tx.buf); + ray_sys_free(sel); + poll->sels[i] = NULL; + } + + if (poll->sels) ray_sys_free(poll->sels); + close((int)poll->fd); + ray_sys_free(poll); +} + +int64_t ray_poll_register(ray_poll_t* poll, ray_poll_reg_t* reg) +{ + if (!poll || !reg) return -1; + + /* Find free slot or grow */ + int64_t id = -1; + for (uint32_t i = 0; i < poll->n_sels; i++) { + if (!poll->sels[i]) { id = (int64_t)i; break; } + } + if (id < 0) { + if (poll->n_sels >= poll->sel_cap) { + uint32_t new_cap = poll->sel_cap * 2; + ray_selector_t** ns = (ray_selector_t**)ray_sys_alloc( + new_cap * sizeof(ray_selector_t*)); + if (!ns) return -1; + memcpy(ns, poll->sels, poll->n_sels * sizeof(ray_selector_t*)); + memset(ns + poll->n_sels, 0, + (new_cap - poll->n_sels) * sizeof(ray_selector_t*)); + ray_sys_free(poll->sels); + poll->sels = ns; + poll->sel_cap = new_cap; + } + id = (int64_t)poll->n_sels; + poll->n_sels++; + } + + ray_selector_t* sel = (ray_selector_t*)ray_sys_alloc(sizeof(ray_selector_t)); + if (!sel) return -1; + memset(sel, 0, sizeof(*sel)); + + sel->fd = reg->fd; + sel->id = id; + sel->type = reg->type; + sel->data = reg->data; + sel->open_fn = reg->open_fn; + sel->close_fn = reg->close_fn; + sel->error_fn = reg->error_fn; + sel->data_fn = reg->data_fn; + sel->rx.recv_fn = reg->recv_fn; + sel->rx.read_fn = reg->read_fn; + sel->tx.send_fn = reg->send_fn; + + poll->sels[id] = sel; + + /* Register with kqueue */ + struct kevent kev; + EV_SET(&kev, (uintptr_t)reg->fd, EVFILT_READ, EV_ADD, 0, 0, + (void*)(uintptr_t)id); + + if (kevent((int)poll->fd, &kev, 1, NULL, 0, NULL) < 0) { + poll->sels[id] = NULL; + ray_sys_free(sel); + return -1; + } + + if (sel->open_fn) sel->open_fn(poll, sel); + return id; +} + +void ray_poll_deregister(ray_poll_t* poll, int64_t id) +{ + if (!poll || id < 0 || (uint32_t)id >= poll->n_sels) return; + ray_selector_t* sel = poll->sels[id]; + if (!sel) return; + + struct kevent kev; + EV_SET(&kev, (uintptr_t)sel->fd, EVFILT_READ, EV_DELETE, 0, 0, NULL); + kevent((int)poll->fd, &kev, 1, NULL, 0, NULL); + + if (sel->close_fn) sel->close_fn(poll, sel); + if (sel->rx.buf) ray_poll_buf_free(sel->rx.buf); + ray_poll_buf_free(sel->tx.buf); + ray_sys_free(sel); + poll->sels[id] = NULL; +} + +int64_t ray_poll_run(ray_poll_t* poll) +{ + if (!poll) return -1; + + struct kevent events[RAY_POLL_MAX_EVENTS]; + + while (poll->code < 0) { + int n = kevent((int)poll->fd, NULL, 0, events, + RAY_POLL_MAX_EVENTS, NULL); + if (n < 0) { + if (errno == EINTR) continue; + return -1; + } + + for (int i = 0; i < n; i++) { + uint64_t eid = (uint64_t)(uintptr_t)events[i].udata; + ray_selector_t* sel = NULL; + + if (eid < poll->n_sels) + sel = poll->sels[eid]; + if (!sel) continue; + + /* EV_ERROR without data — fatal, skip directly */ + if ((events[i].flags & EV_ERROR) && events[i].filter != EVFILT_READ) { + if (sel->error_fn) + sel->error_fn(poll, sel); + else + ray_poll_deregister(poll, sel->id); + continue; + } + + /* Process readable data first — even if EOF is also set. + * A client may send a message and close simultaneously. */ + if (events[i].filter == EVFILT_READ) { + for (;;) { + if (sel->rx.recv_fn && sel->rx.buf) { + while (sel->rx.buf->offset < sel->rx.buf->size) { + int64_t nr = sel->rx.recv_fn( + sel->fd, + sel->rx.buf->data + sel->rx.buf->offset, + sel->rx.buf->size - sel->rx.buf->offset); + if (nr <= 0) { + if (nr < 0 && errno == EINTR) continue; + if (nr < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) + break; + if (sel->error_fn) + sel->error_fn(poll, sel); + else + ray_poll_deregister(poll, sel->id); + goto next_event; + } + sel->rx.buf->offset += nr; + } + } + if (sel->rx.buf && sel->rx.buf->offset < sel->rx.buf->size) + break; + if (!sel->rx.read_fn) break; + ray_t* obj = sel->rx.read_fn(poll, sel); + + /* Re-validate: read_fn may have deregistered this selector */ + if (eid >= poll->n_sels || !poll->sels[eid]) goto next_event; + sel = poll->sels[eid]; + + if (obj && sel->data_fn) + sel->data_fn(poll, sel, obj); + if (eid >= poll->n_sels || !poll->sels[eid]) goto next_event; + sel = poll->sels[eid]; + if (!sel->rx.buf) break; + if (sel->rx.buf->offset >= sel->rx.buf->size) continue; + } + } + + /* EOF / error — after data is drained */ + if (events[i].flags & (EV_EOF | EV_ERROR)) { + if (eid < poll->n_sels && poll->sels[eid]) { + sel = poll->sels[eid]; + if (sel->type == RAY_SEL_STDIN) goto next_event; /* Ctrl-D handled by read_fn */ + if (sel->error_fn) + sel->error_fn(poll, sel); + else + ray_poll_deregister(poll, sel->id); + } + } + + next_event:; + } + } + + return poll->code; +} + +#endif /* __APPLE__ */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/morsel.c b/crates/rayforce-sys/vendor/rayforce/src/core/morsel.c new file mode 100644 index 0000000..3184cc3 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/morsel.c @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "core/morsel.h" +#include "core/platform.h" +#include "mem/heap.h" +#include "table/sym.h" +#include "ops/idxop.h" +#include + +/* -------------------------------------------------------------------------- + * ray_morsel_init + * + * Initialize a morsel iterator over the given vector. Sets up offset, + * length, and element size. Issues a sequential madvise hint for mmap'd + * vectors to optimize readahead. + * -------------------------------------------------------------------------- */ + +void ray_morsel_init(ray_morsel_t* m, ray_t* vec) { + m->vec = vec; + m->offset = 0; + m->len = ray_len(vec); + m->elem_size = ray_sym_elem_size(vec->type, vec->attrs); + m->morsel_len = 0; + m->morsel_ptr = NULL; + m->null_bits = NULL; + + /* One-time hint for mmap'd vectors */ + if (vec->mmod == 1) { + ray_vm_advise_seq(ray_data(vec), (size_t)m->len * m->elem_size); + } +} + +/* -------------------------------------------------------------------------- + * ray_morsel_next + * + * Advance to the next morsel. Returns true if a morsel is available, false + * when the vector is exhausted. Sets morsel_ptr to the data for the current + * chunk, morsel_len to the number of elements, and null_bits to the null + * bitmap (or NULL if no nulls). + * -------------------------------------------------------------------------- */ + +bool ray_morsel_next(ray_morsel_t* m) { + m->offset += m->morsel_len; + if (m->offset >= m->len) return false; + + int64_t remaining = m->len - m->offset; + m->morsel_len = remaining < RAY_MORSEL_ELEMS ? remaining : RAY_MORSEL_ELEMS; + m->morsel_ptr = (uint8_t*)ray_data(m->vec) + (size_t)m->offset * m->elem_size; + + /* Null bitmap: only if HAS_NULLS. + * M5: null_bits points to the byte containing bit (m->offset). + * Callers must account for (m->offset % 8) bit offset within the + * first byte of null_bits when testing individual null bits. + * + * HAS_INDEX path: when an accelerator index is attached, the parent's + * 16-byte nullmap union holds the index pointer instead of bitmap data + * (or ext_nullmap pointer). The original bytes are preserved inside + * ix->saved_nullmap. Route through that snapshot here so null-aware + * loops still see the correct bits. */ + m->null_bits = NULL; + if (m->vec->attrs & RAY_ATTR_HAS_NULLS) { + if (m->vec->attrs & RAY_ATTR_HAS_INDEX) { + ray_index_t* ix = ray_index_payload(m->vec->index); + if (ix->saved_attrs & RAY_ATTR_NULLMAP_EXT) { + ray_t* ext; + memcpy(&ext, &ix->saved_nullmap[0], sizeof(ext)); + m->null_bits = (uint8_t*)ray_data(ext) + (m->offset / 8); + } else if (m->offset < 128) { + m->null_bits = ix->saved_nullmap + (m->offset / 8); + } + } else if (m->vec->attrs & RAY_ATTR_NULLMAP_EXT) { + /* External bitmap: point to correct byte offset */ + ray_t* ext = m->vec->ext_nullmap; + m->null_bits = (uint8_t*)ray_data(ext) + (m->offset / 8); + } else if (m->offset < 128) { + /* Inline bitmap is 16 bytes = 128 bits; vectors with HAS_NULLS + * and >128 elements must use external nullmap (RAY_ATTR_NULLMAP_EXT). + * Returns null_bits=NULL for offset>=128 when using inline bitmap. */ + m->null_bits = m->vec->nullmap + (m->offset / 8); + } + } + + return true; +} + +/* -------------------------------------------------------------------------- + * ray_morsel_init_range + * + * Initialize a morsel iterator over a sub-range [start, end) of the vector. + * Used by parallel dispatch so each worker iterates a disjoint portion. + * -------------------------------------------------------------------------- */ + +void ray_morsel_init_range(ray_morsel_t* m, ray_t* vec, int64_t start, int64_t end) { + m->vec = vec; + m->offset = start; + m->len = end; + m->elem_size = ray_sym_elem_size(vec->type, vec->attrs); + m->morsel_len = 0; + m->morsel_ptr = NULL; + m->null_bits = NULL; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/morsel.h b/crates/rayforce-sys/vendor/rayforce/src/core/morsel.h new file mode 100644 index 0000000..db7c80c --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/morsel.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_MORSEL_H +#define RAY_MORSEL_H + +/* + * morsel.h -- Morsel iterator infrastructure. + * + * A morsel is a chunk of up to RAY_MORSEL_ELEMS (1024) elements from a vector. + * The iterator advances through the vector one morsel at a time, providing + * direct data pointers and null bitmap pointers for each chunk. + */ + +#include "ops/ops.h" + +/* Initialize a morsel iterator over a sub-range [start, end) of vec. + * Used by parallel dispatch to partition work across workers. */ +void ray_morsel_init_range(ray_morsel_t* m, ray_t* vec, int64_t start, int64_t end); + +#endif /* RAY_MORSEL_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/numparse.c b/crates/rayforce-sys/vendor/rayforce/src/core/numparse.c new file mode 100644 index 0000000..408443b --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/numparse.c @@ -0,0 +1,452 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "core/numparse.h" + +#include +#include +#include +#include + +/* ---------------------------------------------------------------------------- + * SWAR digit detection + * + * Load 8 bytes as a little-endian u64 and use the standard Lemire trick: + * - subtract 0x30 from each byte → if any byte was < '0' the result + * underflows and the high bit of that lane is set + * - add 0x46 (= 0x7F - 0x39) to each byte → if any byte was > '9' + * the result exceeds 0x7F and the high bit of that lane is set + * - OR the two and mask with 0x80...80; zero ⇔ all bytes in '0'..'9' + * ---------------------------------------------------------------------------- */ + +#define LANE8_BIT 0x8080808080808080ULL +#define LANE4_BIT 0x80808080U + +bool ray_is_8_digits(const void *p) { + uint64_t chunk; + memcpy(&chunk, p, 8); + uint64_t under = chunk - 0x3030303030303030ULL; /* < '0' → MSB set */ + uint64_t over = chunk + 0x4646464646464646ULL; /* > '9' → MSB set */ + return ((under | over) & LANE8_BIT) == 0; +} + +bool ray_is_4_digits(const void *p) { + uint32_t chunk; + memcpy(&chunk, p, 4); + uint32_t under = chunk - 0x30303030U; + uint32_t over = chunk + 0x46464646U; + return ((under | over) & LANE4_BIT) == 0; +} + +/* ---------------------------------------------------------------------------- + * SWAR digit accumulation + * + * The classic three-stage byte-pair-quad fold from the + * "fast atoi" literature. Compiler folds away well at -O2/-O3, but + * the explicit form keeps it tight at -O0 too (sanitizer build). + * ---------------------------------------------------------------------------- */ + +uint64_t ray_parse_8_digits(const void *p) { + uint64_t chunk; + memcpy(&chunk, p, 8); + chunk -= 0x3030303030303030ULL; /* now each byte ∈ 0..9 */ + + /* Fold pairs of digits into 16-bit words: tens*10 + ones. The + * memory-low byte of each pair holds the tens digit (it printed + * first), so on a little-endian load the tens are at chunk's even + * bytes and the ones are at the odd bytes. */ + uint64_t tens = chunk & 0x000F000F000F000FULL; + uint64_t ones = (chunk >> 8) & 0x000F000F000F000FULL; + uint64_t pairs = tens * 10 + ones; /* 4 × 16-bit values 0..99 */ + + /* Fold pairs-of-pairs into 32-bit words: pair_lo*100 + pair_hi, + * where pair_lo holds the more-significant pair (printed first). */ + uint64_t p_hi = pairs & 0x000000FF000000FFULL; + uint64_t p_lo = (pairs >> 16) & 0x000000FF000000FFULL; + uint64_t quads = p_hi * 100 + p_lo; /* 2 × 32-bit values 0..9999 */ + + /* Final fold: low 32 bits hold the more-significant quad. */ + return (quads & 0xFFFFFFFFULL) * 10000 + (quads >> 32); +} + +uint32_t ray_parse_4_digits(const void *p) { + uint32_t chunk; + memcpy(&chunk, p, 4); + chunk -= 0x30303030U; + uint32_t tens = chunk & 0x000F000FU; + uint32_t ones = (chunk >> 8) & 0x000F000FU; + uint32_t pairs = tens * 10 + ones; /* low 16 = pair1, high 16 = pair2 */ + return (pairs & 0xFFFFU) * 100 + (pairs >> 16); +} + +/* ---------------------------------------------------------------------------- + * Integer parsers + * ---------------------------------------------------------------------------- */ + +#define IS_DIGIT(c) ((unsigned)((unsigned char)(c) - '0') < 10u) + +size_t ray_parse_i64(const char *src, size_t len, int64_t *dst) { + if (len == 0) return 0; + + size_t i = 0; + int neg = 0; + if (src[0] == '-') { neg = 1; i = 1; } + else if (src[0] == '+') { i = 1; } + if (i == len) return 0; + + size_t digit_start = i; + + /* Strip leading zeros — they don't contribute to the significant + * digit count and would otherwise force an overly strict cap below + * (e.g. "00000000000000000001" is just 1, not a 20-digit value). */ + while (i < len && src[i] == '0') i++; + size_t sig_start = i; + + uint64_t result = 0; + + /* SWAR: first 8 digits */ + if (i + 8 <= len && ray_is_8_digits(src + i)) { + result = ray_parse_8_digits(src + i); + i += 8; + /* Second 8-digit chunk: result is in [0, 1e8), well below the + * 922337203 bound (= u64 max ÷ 2e10) that keeps result*1e8 + + * 1e8-1 from wrapping u64. */ + if (i + 8 <= len && result <= 922337203ULL && ray_is_8_digits(src + i)) { + result = result * 100000000ULL + ray_parse_8_digits(src + i); + i += 8; + } + } + + /* Scalar tail with strict 19-digit cap. INT64_MAX (and |INT64_MIN|) + * have 19 decimal digits; anything past that always overflows i64 + * and may also overflow u64 in a way where the wrapped value lands + * back inside [0, INT64_MAX], silently misparsing oversized inputs + * as small in-range values. Cut off before that can happen. */ + while (i < len && IS_DIGIT(src[i])) { + if ((size_t)(i - sig_start) >= 19) return 0; /* too many sig digits */ + uint64_t prev = result; + result = result * 10 + (uint64_t)(src[i] - '0'); + if (result < prev) return 0; /* u64 wrap (defensive) */ + i++; + } + + if (i == digit_start) return 0; /* no digits at all */ + + /* Fit into int64 with proper handling of INT64_MIN. */ + if (neg) { + if (result > (uint64_t)INT64_MAX + 1ULL) return 0; + *dst = (int64_t)(0u - result); /* avoids signed UB */ + } else { + if (result > (uint64_t)INT64_MAX) return 0; + *dst = (int64_t)result; + } + return i; +} + +size_t ray_parse_i32(const char *src, size_t len, int32_t *dst) { + int64_t v; + size_t n = ray_parse_i64(src, len, &v); + if (n == 0) return 0; + if (v < INT32_MIN || v > INT32_MAX) return 0; + *dst = (int32_t)v; + return n; +} + +/* ---------------------------------------------------------------------------- + * Float parser + * + * Layout: [+-]digits[.digits][eE[+-]digits] + * Also accepts NaN, Inf, +Inf, -Inf (case-insensitive prefix; we match + * the same forms the language printer emits and that .csv.write produces). + * ---------------------------------------------------------------------------- */ + +static const double g_pow10[] = { + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, + 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, + 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22 +}; + +static inline int icmp3(const char *p, char a, char b, char c) { + unsigned char x = (unsigned char)p[0], y = (unsigned char)p[1], z = (unsigned char)p[2]; + return (x == (unsigned char)a || x == (unsigned char)(a ^ 0x20)) && + (y == (unsigned char)b || y == (unsigned char)(b ^ 0x20)) && + (z == (unsigned char)c || z == (unsigned char)(c ^ 0x20)); +} + +/* Apply 10^e to val. + * + * For |e| ≤ 22 the pow10 table entries are exact f64 (10^k for k ≤ 22 is + * representable), so a single multiply / divide is correctly rounded. + * + * For positive e > 22 we use libm `pow(10, e)` and a *single* multiply. + * Chaining `val *= 1e22` instead would accumulate ~½ ulp per step and + * thirteen steps is enough to push values right at the DBL_MAX boundary + * (e.g. 1.7976931348623158e308) over the rounding threshold into +inf, + * even though the correctly-rounded f64 is still finite. + * + * For negative e, single multiply via `pow(10, -324)` would underflow to + * zero before the multiply could lift the result back into the denormal + * range — so 2.2250738585072014e-308 becomes 0. We chain by 1e22 in + * that direction; chained division stays well-conditioned all the way + * down to the smallest denormal. */ +static inline double scale_pow10(double val, int e) { + if (e == 0) return val; + if (e > 0) { + if (e <= 22) return val * g_pow10[e]; + return val * pow(10.0, (double)e); + } else { + int ne = -e; + if (ne <= 22) return val / g_pow10[ne]; + while (ne > 22) { + val /= 1e22; + if (val == 0.0) return val; + ne -= 22; + } + return val / g_pow10[ne]; + } +} + +size_t ray_parse_f64(const char *src, size_t len, double *dst) { + if (len == 0) return 0; + + size_t i = 0; + int neg = 0; + if (src[0] == '-') { neg = 1; i = 1; } + else if (src[0] == '+') { i = 1; } + + /* NaN / Inf */ + if (i + 3 <= len && icmp3(src + i, 'n', 'a', 'n')) { + *dst = __builtin_nan(""); + return i + 3; + } + if (i + 3 <= len && icmp3(src + i, 'i', 'n', 'f')) { + *dst = neg ? -__builtin_inf() : __builtin_inf(); + return i + 3; + } + + if (i == len) return 0; + + /* Build a single decimal mantissa in u64 plus a signed power-of-ten + * offset, then finalize with one multiply. This avoids two pitfalls + * the earlier hand-rolled accumulator had: + * + * 1. A purely positional fractional cap dropped meaningful trailing + * digits when leading zeros took up the budget — so 1e-19 written + * as "0.0000000000000000001" came back as 0. + * 2. Chained `val *= 1e22` for large exponents accumulated rounding + * error past DBL_MAX, turning DBL_MAX itself into inf. + */ + uint64_t mantissa = 0; + int mant_digits = 0; /* significant digits captured */ + int dec_offset = 0; /* power of 10 to apply at the end */ + bool have_digit = false; + + /* ---- integer part ----------------------------------------------- */ + + /* Skip leading zeros (don't count as significant). */ + while (i < len && src[i] == '0') { i++; have_digit = true; } + + /* SWAR fast path for the first 8 / 16 sig digits. */ + if (i + 8 <= len && ray_is_8_digits(src + i)) { + mantissa = ray_parse_8_digits(src + i); + mant_digits = 8; + i += 8; + have_digit = true; + if (i + 8 <= len && ray_is_8_digits(src + i)) { + mantissa = mantissa * 100000000ULL + ray_parse_8_digits(src + i); + mant_digits = 16; + i += 8; + } + } + + /* Scalar tail of the integer part. Past 18 sig digits we drop + * further integer digits but keep their magnitude via dec_offset. */ + while (i < len && IS_DIGIT(src[i])) { + if (mant_digits < 18) { + mantissa = mantissa * 10 + (uint64_t)(src[i] - '0'); + mant_digits++; + } else { + dec_offset++; + } + i++; + have_digit = true; + } + + /* ---- fractional part -------------------------------------------- */ + + if (i < len && src[i] == '.') { + i++; + /* Leading zeros in the fractional part (when the mantissa is + * still 0) shift the decimal point but contribute no significant + * digit. */ + if (mantissa == 0) { + while (i < len && src[i] == '0') { + dec_offset--; + i++; + have_digit = true; + } + } + + /* SWAR fast path for the first 8 sig fractional digits. */ + if (i + 8 <= len && mant_digits + 8 <= 18 && ray_is_8_digits(src + i)) { + mantissa = mantissa * 100000000ULL + ray_parse_8_digits(src + i); + mant_digits += 8; + dec_offset -= 8; + i += 8; + have_digit = true; + } + + /* Scalar tail of the fractional part. Past 18 sig digits we + * skip further fractional digits — they are below f64 precision + * and they don't shift the magnitude (no dec_offset change). */ + while (i < len && IS_DIGIT(src[i])) { + if (mant_digits < 18) { + mantissa = mantissa * 10 + (uint64_t)(src[i] - '0'); + mant_digits++; + dec_offset--; + } + i++; + have_digit = true; + } + } + + if (!have_digit) return 0; + + /* ---- explicit exponent ------------------------------------------ */ + + if (i < len && (src[i] == 'e' || src[i] == 'E')) { + size_t e_at = i; + i++; + int e_neg = 0; + if (i < len) { + if (src[i] == '-') { e_neg = 1; i++; } + else if (src[i] == '+') { i++; } + } + size_t e_start = i; + int exp_v = 0; + bool exp_capped = false; + while (i < len && IS_DIGIT(src[i])) { + if (exp_v <= 999) exp_v = exp_v * 10 + (src[i] - '0'); + else exp_capped = true; + i++; + } + if (i == e_start) { + /* "1e" with no digits — rewind; the 'e' is not part of the number. */ + i = e_at; + } else { + int e = exp_capped ? 10000 : exp_v; + dec_offset += e_neg ? -e : e; + } + } + + /* ---- finalize: val = mantissa * 10^dec_offset ------------------- */ + + /* Fast path applies only when the conversion is provably correctly + * rounded — i.e. both factors of the final multiply are exact f64s: + * + * - (double)mantissa is exact for mantissa ≤ 2^53. Significant + * digits ≤ 15 keeps mantissa ≤ 10^15 - 1 < 2^53. + * - g_pow10[|k|] is exact for |k| ≤ 22 (10^22 fits in 76 bits but + * IEEE 754 happens to round 10^k for k ≤ 22 to a value that + * matches the table entries we hand-wrote). + * + * Outside that window — high-precision mantissas, large exponents, + * or boundary-near values — defer to libc strtod on the original + * substring. glibc strtod is correctly rounded, so this fixes: + * • DBL_MAX-edge overshoot (1.7976931348623158e308 → +inf in the + * fast path; strtod rounds to DBL_MAX); + * • DBL_MAX_PREV mismatch (1.7976931348623155e308 — fast path + * gives DBL_MAX, strtod correctly gives DBL_MAX_PREV); + * • Denormal underflow (mantissa·pow(10,-324) zeroes out before + * scale_pow10's chained division could keep the result alive). + * + * Most CSV / lang values land in the fast path: they have ≤ 15 + * significant digits and modest exponents. The slow lane is + * reserved for inputs where the trade-off is correctness over + * speed. */ + double val = 0.0; + bool need_strtod = false; + + if (mantissa == 0) { + val = 0.0; + } else if (dec_offset > 308) { + val = __builtin_inf(); + } else if (dec_offset < -342) { /* below denormal range */ + val = 0.0; + } else if (mant_digits <= 15 && dec_offset >= -22 && dec_offset <= 22) { + val = (double)mantissa; + if (dec_offset > 0) val *= g_pow10[dec_offset]; + else if (dec_offset < 0) val /= g_pow10[-dec_offset]; + } else { + need_strtod = true; + } + + if (need_strtod) { + char stackbuf[128]; + char *buf = (i + 1 <= sizeof(stackbuf)) ? stackbuf : malloc(i + 1); + if (buf) { + memcpy(buf, src, i); + buf[i] = '\0'; + char *endp = NULL; + double v = strtod(buf, &endp); + bool ok = (endp == buf + i); + if (buf != stackbuf) free(buf); + if (ok) { + /* strtod already applied the leading sign in buf, so + * don't apply `neg` again. */ + *dst = v; + return i; + } + } + /* Strtod unusable (OOM on a giant literal, or unexpected parse + * disagreement). Fall through with the approximate result + * from the chained-multiply slow path so we still return a + * sensible value rather than 0. */ + val = scale_pow10((double)mantissa, dec_offset); + } + + *dst = neg ? -val : val; + return i; +} + +/* ---------------------------------------------------------------------------- + * Hexadecimal (no 0x prefix, lowercase or uppercase) + * ---------------------------------------------------------------------------- */ + +size_t ray_parse_u64_hex(const char *src, size_t len, uint64_t *dst) { + uint64_t v = 0; + size_t i = 0; + while (i < len && i < 16) { + unsigned char c = (unsigned char)src[i]; + unsigned d; + if (c >= '0' && c <= '9') d = (unsigned)(c - '0'); + else if (c >= 'a' && c <= 'f') d = (unsigned)(c - 'a' + 10); + else if (c >= 'A' && c <= 'F') d = (unsigned)(c - 'A' + 10); + else break; + v = (v << 4) | d; + i++; + } + if (i == 0) return 0; + *dst = v; + return i; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/numparse.h b/crates/rayforce-sys/vendor/rayforce/src/core/numparse.h new file mode 100644 index 0000000..fca548c --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/numparse.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_CORE_NUMPARSE_H +#define RAY_CORE_NUMPARSE_H + +/* ============================================================================ + * numparse — unified (ptr, len) → value parsers + * + * Used by both the language tokenizer (src/lang/parse.c) and the CSV + * reader (src/io/csv.c). All parsers share the same shape: + * + * size_t consumed = ray_parse_X(src, len, &out); + * + * - returns the number of bytes consumed from `src` + * - 0 means "no progress" — parse failed at byte 0, *out unchanged + * - the language tokenizer advances its cursor by `consumed` + * - the CSV reader treats `consumed != len` as a null/invalid field + * + * No leading whitespace is stripped; callers strip first if they need to. + * Optional sign characters (`+` / `-`) ARE consumed. + * + * SWAR primitives are also exported (used by fast date / time parsers + * that consume fixed-width digit groups). + * ============================================================================ */ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +size_t ray_parse_i64(const char *src, size_t len, int64_t *dst); +size_t ray_parse_i32(const char *src, size_t len, int32_t *dst); +size_t ray_parse_f64(const char *src, size_t len, double *dst); +size_t ray_parse_u64_hex(const char *src, size_t len, uint64_t *dst); + +/* ---------------------------------------------------------------------------- + * SWAR (SIMD Within A Register) digit primitives. + * + * Caller must guarantee 8 readable bytes at `p` for the 8-digit forms, + * 4 for the 4-digit forms. All loads are unaligned via memcpy. + * Little-endian assumed (x86_64 / aarch64 in normal mode). + * ---------------------------------------------------------------------------- */ + +bool ray_is_8_digits (const void *p); +bool ray_is_4_digits (const void *p); +uint64_t ray_parse_8_digits(const void *p); +uint32_t ray_parse_4_digits(const void *p); + +#ifdef __cplusplus +} +#endif + +#endif /* RAY_CORE_NUMPARSE_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/platform.c b/crates/rayforce-sys/vendor/rayforce/src/core/platform.c new file mode 100644 index 0000000..a386b32 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/platform.c @@ -0,0 +1,464 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Feature test macros must come before any includes */ +#if defined(__linux__) + #define _GNU_SOURCE +#endif + +#include "platform.h" + +/* ========================================================================== + * Linux / macOS (POSIX) + * ========================================================================== */ +#if defined(RAY_OS_LINUX) || defined(RAY_OS_MACOS) + +#include +#include +#include +#include +#include +#include "mem/sys.h" + +/* -------------------------------------------------------------------------- + * Virtual memory + * -------------------------------------------------------------------------- */ +void* ray_vm_alloc(size_t size) { + void* p = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + return (p == MAP_FAILED) ? NULL : p; +} + +void ray_vm_free(void* ptr, size_t size) { + if (ptr) munmap(ptr, size); +} + +void* ray_vm_map_file(const char* path, size_t* out_size) { + int fd = open(path, O_RDONLY); + if (fd < 0) return NULL; + + struct stat st; + if (fstat(fd, &st) != 0) { + close(fd); + return NULL; + } + + if (st.st_size <= 0) { + close(fd); + if (out_size) *out_size = 0; + return NULL; + } + + size_t len = (size_t)st.st_size; + void* p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + close(fd); + + if (p == MAP_FAILED) return NULL; + + if (out_size) *out_size = len; + return p; +} + +void ray_vm_unmap_file(void* ptr, size_t size) { + if (ptr) munmap(ptr, size); +} + +void ray_vm_advise_seq(void* ptr, size_t size) { + if (ptr) madvise(ptr, size, MADV_SEQUENTIAL); +} + +void ray_vm_advise_willneed(void* ptr, size_t size) { + if (ptr) madvise(ptr, size, MADV_WILLNEED); +} + +void ray_vm_release(void* ptr, size_t size) { + if (!ptr) return; +#if defined(RAY_OS_MACOS) + madvise(ptr, size, MADV_FREE); +#else + madvise(ptr, size, MADV_DONTNEED); +#endif +} + +void* ray_vm_alloc_aligned(size_t size, size_t alignment) { + size_t total = size + alignment; + void* mem = mmap(NULL, total, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mem == MAP_FAILED) return NULL; + + uintptr_t addr = (uintptr_t)mem; + uintptr_t aligned = (addr + alignment - 1) & ~(alignment - 1); + + /* Trim leading excess */ + if (aligned > addr) + munmap(mem, aligned - addr); + + /* Trim trailing excess */ + uintptr_t end = addr + total; + uintptr_t aligned_end = aligned + size; + if (end > aligned_end) + munmap((void*)aligned_end, end - aligned_end); + + return (void*)aligned; +} + +/* -------------------------------------------------------------------------- + * Threading + * -------------------------------------------------------------------------- */ + +/* pthread entry expects void*(*)(void*), but ray_thread_fn is void(*)(void*). + * Use a small trampoline to bridge the signatures. */ +typedef struct { + ray_thread_fn fn; + void* arg; +} ray_thread_trampoline_t; + +static void* thread_trampoline(void* raw) { + ray_thread_trampoline_t ctx = *(ray_thread_trampoline_t*)raw; + /* Free the trampoline struct allocated on the heap. We copied it first + * so the creating thread can proceed freely. */ + ray_sys_free(raw); + ctx.fn(ctx.arg); + return NULL; +} + +ray_err_t ray_thread_create(ray_thread_t* t, ray_thread_fn fn, void* arg) { + ray_thread_trampoline_t* ctx = (ray_thread_trampoline_t*)ray_sys_alloc(sizeof(*ctx)); + if (!ctx) return RAY_ERR_OOM; + ctx->fn = fn; + ctx->arg = arg; + + pthread_t pt; + int rc = pthread_create(&pt, NULL, thread_trampoline, ctx); + if (rc != 0) { + ray_sys_free(ctx); + return RAY_ERR_OOM; + } + *t = (ray_thread_t)pt; + return RAY_OK; +} + +ray_err_t ray_thread_join(ray_thread_t t) { + int rc = pthread_join((pthread_t)t, NULL); + return (rc == 0) ? RAY_OK : RAY_ERR_IO; +} + +uint32_t ray_thread_count(void) { + long n = sysconf(_SC_NPROCESSORS_ONLN); + return (n > 0) ? (uint32_t)n : 1; +} + +/* -------------------------------------------------------------------------- + * Semaphore + * -------------------------------------------------------------------------- */ +#if defined(RAY_OS_MACOS) + +ray_err_t ray_sem_init(ray_sem_t* s, uint32_t initial_value) { + *s = dispatch_semaphore_create((long)initial_value); + return (*s) ? RAY_OK : RAY_ERR_OOM; +} + +void ray_sem_destroy(ray_sem_t* s) { + /* dispatch_semaphore is ARC-managed on modern macOS; explicit release for + * non-ARC builds (our C code). */ + if (*s) dispatch_release(*s); + *s = NULL; +} + +void ray_sem_wait(ray_sem_t* s) { + dispatch_semaphore_wait(*s, DISPATCH_TIME_FOREVER); +} + +void ray_sem_signal(ray_sem_t* s) { + dispatch_semaphore_signal(*s); +} + +#else /* Linux */ + +ray_err_t ray_sem_init(ray_sem_t* s, uint32_t initial_value) { + return (sem_init(s, 0, initial_value) == 0) ? RAY_OK : RAY_ERR_OOM; +} + +void ray_sem_destroy(ray_sem_t* s) { + sem_destroy(s); +} + +void ray_sem_wait(ray_sem_t* s) { + while (sem_wait(s) != 0) { /* retry on EINTR */ } +} + +void ray_sem_signal(ray_sem_t* s) { + sem_post(s); +} + +#endif /* macOS vs Linux semaphore */ + +/* ========================================================================== + * Windows + * ========================================================================== */ +#elif defined(RAY_OS_WINDOWS) + +#ifndef WIN32_LEAN_AND_MEAN + #define WIN32_LEAN_AND_MEAN +#endif +#include + +/* -------------------------------------------------------------------------- + * Virtual memory + * -------------------------------------------------------------------------- */ +void* ray_vm_alloc(size_t size) { + return VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); +} + +void ray_vm_free(void* ptr, size_t size) { + (void)size; + if (ptr) VirtualFree(ptr, 0, MEM_RELEASE); +} + +void* ray_vm_map_file(const char* path, size_t* out_size) { + HANDLE hFile = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, NULL, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (hFile == INVALID_HANDLE_VALUE) return NULL; + + LARGE_INTEGER file_size; + if (!GetFileSizeEx(hFile, &file_size)) { + CloseHandle(hFile); + return NULL; + } + + HANDLE hMap = CreateFileMappingA(hFile, NULL, PAGE_WRITECOPY, 0, 0, NULL); + if (!hMap) { + CloseHandle(hFile); + return NULL; + } + + void* p = MapViewOfFile(hMap, FILE_MAP_COPY, 0, 0, 0); + + /* We can close both handles; the mapping keeps the file open internally. */ + CloseHandle(hMap); + CloseHandle(hFile); + + if (!p) return NULL; + + if (out_size) *out_size = (size_t)file_size.QuadPart; + return p; +} + +void ray_vm_unmap_file(void* ptr, size_t size) { + (void)size; + if (ptr) UnmapViewOfFile(ptr); +} + +void ray_vm_advise_seq(void* ptr, size_t size) { + /* PrefetchVirtualMemory is Win8.1+. Best-effort; ignore failure. */ + WIN32_MEMORY_RANGE_ENTRY entry; + entry.VirtualAddress = ptr; + entry.NumberOfBytes = size; + PrefetchVirtualMemory(GetCurrentProcess(), 1, &entry, 0); +} + +void ray_vm_release(void* ptr, size_t size) { + if (!ptr) return; + /* DiscardVirtualMemory (Win8.1+) or fallback to decommit+recommit */ + DiscardVirtualMemory(ptr, size); +} + +void* ray_vm_alloc_aligned(size_t size, size_t alignment) { + /* Over-allocate, find aligned offset. Can't trim on Windows, so the + * pool header's vm_base field stores the original base for VirtualFree. */ + void* mem = VirtualAlloc(NULL, size + alignment, + MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); + if (!mem) return NULL; + uintptr_t aligned = ((uintptr_t)mem + alignment - 1) & ~(alignment - 1); + return (void*)aligned; +} + +/* -------------------------------------------------------------------------- + * Threading + * -------------------------------------------------------------------------- */ +typedef struct { + ray_thread_fn fn; + void* arg; +} ray_thread_trampoline_t; + +static DWORD WINAPI thread_trampoline(LPVOID raw) { + ray_thread_trampoline_t ctx = *(ray_thread_trampoline_t*)raw; + HeapFree(GetProcessHeap(), 0, raw); + ctx.fn(ctx.arg); + return 0; +} + +ray_err_t ray_thread_create(ray_thread_t* t, ray_thread_fn fn, void* arg) { + ray_thread_trampoline_t* ctx = HeapAlloc(GetProcessHeap(), 0, sizeof(*ctx)); + if (!ctx) return RAY_ERR_OOM; + ctx->fn = fn; + ctx->arg = arg; + + HANDLE h = CreateThread(NULL, 0, thread_trampoline, ctx, 0, NULL); + if (!h) { + HeapFree(GetProcessHeap(), 0, ctx); + return RAY_ERR_OOM; + } + *t = (ray_thread_t)h; + return RAY_OK; +} + +ray_err_t ray_thread_join(ray_thread_t t) { + DWORD rc = WaitForSingleObject((HANDLE)t, INFINITE); + CloseHandle((HANDLE)t); + return (rc == WAIT_OBJECT_0) ? RAY_OK : RAY_ERR_IO; +} + +uint32_t ray_thread_count(void) { + SYSTEM_INFO si; + GetSystemInfo(&si); + return (uint32_t)si.dwNumberOfProcessors; +} + +/* -------------------------------------------------------------------------- + * Semaphore + * -------------------------------------------------------------------------- */ +ray_err_t ray_sem_init(ray_sem_t* s, uint32_t initial_value) { + *s = CreateSemaphoreA(NULL, (LONG)initial_value, LONG_MAX, NULL); + return (*s) ? RAY_OK : RAY_ERR_OOM; +} + +void ray_sem_destroy(ray_sem_t* s) { + if (*s) CloseHandle(*s); + *s = NULL; +} + +void ray_sem_wait(ray_sem_t* s) { + WaitForSingleObject(*s, INFINITE); +} + +void ray_sem_signal(ray_sem_t* s) { + ReleaseSemaphore(*s, 1, NULL); +} + +#endif /* RAY_OS_WINDOWS */ + +/* ========================================================================== + * WASM (Emscripten) + * + * Single-threaded by construction. VM allocs are plain malloc; mmap of + * files goes through MEMFS via mmap()/munmap() (still works in emscripten + * for files written into the in-memory FS). Thread/semaphore ops are + * stubs — pool.c will see thread_count() == 1 and skip worker creation. + * ========================================================================== */ +#if defined(RAY_OS_WASM) + +#include +#include +#include +#include +#include +#include +#include "mem/sys.h" + +void* ray_vm_alloc(size_t size) { + /* Emscripten provides MAP_ANONYMOUS; this is the cleanest way to get a + * page-aligned region the heap can hand out. Falls back to aligned + * malloc if mmap is somehow refused (shouldn't happen on MEMFS). */ + void* p = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (p == MAP_FAILED) { + /* aligned_alloc requires size to be a multiple of alignment. + * Round up to a 64KB WASM page. */ + size_t aligned = (size + 65535u) & ~(size_t)65535u; + p = aligned_alloc(65536, aligned); + return p; + } + return p; +} + +void ray_vm_free(void* ptr, size_t size) { + if (!ptr) return; + if (munmap(ptr, size) != 0) free(ptr); +} + +void* ray_vm_map_file(const char* path, size_t* out_size) { + int fd = open(path, O_RDONLY); + if (fd < 0) return NULL; + + struct stat st; + if (fstat(fd, &st) != 0 || st.st_size <= 0) { + close(fd); + if (out_size) *out_size = 0; + return NULL; + } + + size_t len = (size_t)st.st_size; + void* p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + close(fd); + + if (p == MAP_FAILED) return NULL; + if (out_size) *out_size = len; + return p; +} + +void ray_vm_unmap_file(void* ptr, size_t size) { + if (ptr) munmap(ptr, size); +} + +/* madvise hints are advisory and have no analog on WASM — no-ops. */ +void ray_vm_advise_seq(void* ptr, size_t size) { (void)ptr; (void)size; } +void ray_vm_advise_willneed(void* ptr, size_t size) { (void)ptr; (void)size; } +void ray_vm_release(void* ptr, size_t size) { (void)ptr; (void)size; } + +void* ray_vm_alloc_aligned(size_t size, size_t alignment) { + /* aligned_alloc requires size to be a multiple of alignment per C17. */ + size_t aligned_size = (size + alignment - 1) & ~(alignment - 1); + return aligned_alloc(alignment, aligned_size); +} + +/* Threading — return errors / 1. pool.c with n_workers==0 (the result of + * thread_count==1 ⇒ ncpu-1 == 0) never invokes thread_create. */ +ray_err_t ray_thread_create(ray_thread_t* t, ray_thread_fn fn, void* arg) { + (void)t; (void)fn; (void)arg; + return RAY_ERR_NYI; +} + +ray_err_t ray_thread_join(ray_thread_t t) { + (void)t; + return RAY_OK; +} + +uint32_t ray_thread_count(void) { return 1; } + +/* Semaphore — counter-only. Single-threaded so wait never blocks (the + * counter must already be positive when wait fires). */ +ray_err_t ray_sem_init(ray_sem_t* s, uint32_t initial_value) { + *s = (int32_t)initial_value; + return RAY_OK; +} + +void ray_sem_destroy(ray_sem_t* s) { (void)s; } + +void ray_sem_wait(ray_sem_t* s) { + if (*s > 0) (*s)--; +} + +void ray_sem_signal(ray_sem_t* s) { (*s)++; } + +#endif /* RAY_OS_WASM */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/platform.h b/crates/rayforce-sys/vendor/rayforce/src/core/platform.h new file mode 100644 index 0000000..cad406a --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/platform.h @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_PLATFORM_H +#define RAY_PLATFORM_H + +#include +#include + +/* -------------------------------------------------------------------------- + * OS detection + * -------------------------------------------------------------------------- */ +/* Detect WASM/Emscripten *before* Linux so we don't pull in Linux-only + * headers (madvise, sem_*, pthread) that emscripten's sysroot stubs out. + * platform.c provides a dedicated WASM arm with malloc/MEMFS shims. */ +#if defined(__EMSCRIPTEN__) + #define RAY_OS_WASM 1 +#elif defined(__linux__) + #define RAY_OS_LINUX 1 +#elif defined(__APPLE__) && defined(__MACH__) + #define RAY_OS_MACOS 1 +#elif defined(_WIN32) + #define RAY_OS_WINDOWS 1 +#else + #error "Unsupported platform" +#endif + +/* -------------------------------------------------------------------------- + * Compiler hints + * -------------------------------------------------------------------------- */ +#if !defined(RAY_LIKELY) +#if defined(__GNUC__) || defined(__clang__) + #define RAY_LIKELY(x) __builtin_expect(!!(x), 1) + #define RAY_UNLIKELY(x) __builtin_expect(!!(x), 0) + #define RAY_ALIGN(n) __attribute__((aligned(n))) + #define RAY_INLINE static inline __attribute__((always_inline)) +#elif defined(_MSC_VER) + #define RAY_LIKELY(x) (x) + #define RAY_UNLIKELY(x) (x) + #define RAY_ALIGN(n) __declspec(align(n)) + #define RAY_INLINE static __forceinline +#else + #define RAY_LIKELY(x) (x) + #define RAY_UNLIKELY(x) (x) + #define RAY_ALIGN(n) + #define RAY_INLINE static inline +#endif +#endif /* !RAY_LIKELY */ + +/* -------------------------------------------------------------------------- + * Thread-local storage + * -------------------------------------------------------------------------- */ +#if !defined(RAY_TLS) +#if defined(_MSC_VER) + #define RAY_TLS __declspec(thread) +#else + #define RAY_TLS _Thread_local +#endif +#endif /* !RAY_TLS */ + +/* -------------------------------------------------------------------------- + * Atomics + * -------------------------------------------------------------------------- */ +#if !defined(ray_atomic_inc) +#if defined(_MSC_VER) + #include + /* MSVC Interlocked* return the NEW value; adjust to match fetch_add/ + * fetch_sub semantics (return OLD value). + * _InterlockedIncrement returns new, subtract 1 to get pre-increment. + * _InterlockedDecrement returns new, add 1 to get pre-decrement. + * On ARM use _nf (no fence) / _rel variants for relaxed/release semantics. */ + #if defined(_M_ARM) || defined(_M_ARM64) + #define ray_atomic_inc(p) (_InterlockedIncrement_nf((volatile long*)(p)) - 1) + #define ray_atomic_dec(p) (_InterlockedDecrement_rel((volatile long*)(p)) + 1) + #define ray_atomic_fence_acquire() __dmb(_ARM_BARRIER_ISH) + #else + #define ray_atomic_inc(p) (_InterlockedIncrement((volatile long*)(p)) - 1) + #define ray_atomic_dec(p) (_InterlockedDecrement((volatile long*)(p)) + 1) + #define ray_atomic_fence_acquire() _ReadWriteBarrier() + #endif + #define ray_atomic_load(p) _InterlockedOr((volatile long*)(p), 0) + #define ray_atomic_store(p, v) _InterlockedExchange((volatile long*)(p), (long)(v)) + #define ray_atomic_cas(p, expected, desired) \ + (_InterlockedCompareExchange((volatile long*)(p), (long)(desired), (long)(*(expected))) == (long)(*(expected))) +#else + #include + #define ray_atomic_inc(p) __atomic_fetch_add(p, 1, __ATOMIC_RELAXED) + #define ray_atomic_dec(p) __atomic_fetch_sub(p, 1, __ATOMIC_RELEASE) + #define ray_atomic_load(p) __atomic_load_n(p, __ATOMIC_ACQUIRE) + #define ray_atomic_store(p, v) __atomic_store_n(p, v, __ATOMIC_RELEASE) + #define ray_atomic_cas(p, expected, desired) \ + __atomic_compare_exchange_n(p, expected, desired, 0, \ + __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE) + #define ray_atomic_fence_acquire() __atomic_thread_fence(__ATOMIC_ACQUIRE) +#endif +#endif /* !ray_atomic_inc */ + +/* -------------------------------------------------------------------------- + * Pull in the public header for ray_err_t, ray_t, etc. + * -------------------------------------------------------------------------- */ +#include + +/* -------------------------------------------------------------------------- + * Thread types + * -------------------------------------------------------------------------- */ +#if defined(_WIN32) + typedef void* ray_thread_t; +#else + typedef unsigned long ray_thread_t; +#endif + +typedef void (*ray_thread_fn)(void* arg); + +/* -------------------------------------------------------------------------- + * Platform VM API + * -------------------------------------------------------------------------- */ +void* ray_vm_alloc(size_t size); +void ray_vm_free(void* ptr, size_t size); +void* ray_vm_map_file(const char* path, size_t* out_size); +void ray_vm_unmap_file(void* ptr, size_t size); +void ray_vm_advise_seq(void* ptr, size_t size); +void ray_vm_advise_willneed(void* ptr, size_t size); +void ray_vm_release(void* ptr, size_t size); +void* ray_vm_alloc_aligned(size_t size, size_t alignment); + +/* -------------------------------------------------------------------------- + * Threading API + * -------------------------------------------------------------------------- */ +ray_err_t ray_thread_create(ray_thread_t* t, ray_thread_fn fn, void* arg); +ray_err_t ray_thread_join(ray_thread_t t); +uint32_t ray_thread_count(void); + +void ray_parallel_begin(void); +void ray_parallel_end(void); +extern _Atomic(uint32_t) ray_parallel_flag; + +/* -------------------------------------------------------------------------- + * Semaphore (platform-specific, not in the public header) + * -------------------------------------------------------------------------- */ +#if defined(RAY_OS_WINDOWS) + typedef void* ray_sem_t; /* HANDLE */ +#elif defined(RAY_OS_MACOS) + #include + typedef dispatch_semaphore_t ray_sem_t; +#elif defined(RAY_OS_WASM) + /* WASM is single-threaded by construction; semaphores are no-op stubs. */ + typedef int32_t ray_sem_t; +#else + #include + typedef sem_t ray_sem_t; +#endif + +ray_err_t ray_sem_init(ray_sem_t* s, uint32_t initial_value); +void ray_sem_destroy(ray_sem_t* s); +void ray_sem_wait(ray_sem_t* s); +void ray_sem_signal(ray_sem_t* s); + +#endif /* RAY_PLATFORM_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/poll.c b/crates/rayforce-sys/vendor/rayforce/src/core/poll.c new file mode 100644 index 0000000..5e29140 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/poll.c @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "core/poll.h" +#include "mem/sys.h" +#include + +#ifndef RAY_OS_WINDOWS +#include +#endif + +/* ===== Shared (platform-independent) poll helpers ===== */ + +void ray_poll_exit(ray_poll_t* poll, int64_t code) +{ + if (poll) poll->code = code; +} + +ray_selector_t* ray_poll_get(ray_poll_t* poll, int64_t id) +{ + if (!poll || id < 0 || (uint32_t)id >= poll->n_sels) + return NULL; + return poll->sels[id]; +} + +ray_poll_buf_t* ray_poll_buf_new(int64_t size) +{ + ray_poll_buf_t* buf = (ray_poll_buf_t*)ray_sys_alloc( + sizeof(ray_poll_buf_t) + (size_t)size); + if (!buf) return NULL; + buf->next = NULL; + buf->size = size; + buf->offset = 0; + return buf; +} + +void ray_poll_buf_free(ray_poll_buf_t* buf) +{ + while (buf) { + ray_poll_buf_t* next = buf->next; + ray_sys_free(buf); + buf = next; + } +} + +void ray_poll_rx_request(ray_poll_t* poll, ray_selector_t* sel, int64_t size) +{ + (void)poll; + if (sel->rx.buf) { + /* Reuse if large enough, otherwise reallocate */ + if (sel->rx.buf->size >= size) { + sel->rx.buf->offset = 0; + sel->rx.buf->size = size; + return; + } + ray_poll_buf_free(sel->rx.buf); + } + sel->rx.buf = ray_poll_buf_new(size); +} + +void ray_poll_rx_extend(ray_poll_t* poll, ray_selector_t* sel, int64_t extra) +{ + (void)poll; + if (!sel->rx.buf) { + sel->rx.buf = ray_poll_buf_new(extra); + return; + } + int64_t new_size = sel->rx.buf->size + extra; + ray_poll_buf_t* nb = ray_poll_buf_new(new_size); + if (!nb) return; + if (sel->rx.buf->offset > 0) + memcpy(nb->data, sel->rx.buf->data, (size_t)sel->rx.buf->offset); + nb->offset = sel->rx.buf->offset; + ray_poll_buf_free(sel->rx.buf); + sel->rx.buf = nb; +} + +void ray_poll_send(ray_poll_t* poll, ray_selector_t* sel, + ray_poll_buf_t* buf) +{ + (void)poll; + if (!sel || !buf) return; + + /* Use platform send_fn if available, otherwise write() */ + int64_t sent = 0; + while (buf->offset < buf->size) { + if (sel->tx.send_fn) { + sent = sel->tx.send_fn(sel->fd, buf->data + buf->offset, + buf->size - buf->offset); + } else { +#ifdef RAY_OS_WINDOWS + sent = -1; /* must have send_fn on Windows */ +#else + sent = (int64_t)write((int)sel->fd, buf->data + buf->offset, + (size_t)(buf->size - buf->offset)); +#endif + } + if (sent <= 0) break; + buf->offset += sent; + } + ray_poll_buf_free(buf); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/poll.h b/crates/rayforce-sys/vendor/rayforce/src/core/poll.h new file mode 100644 index 0000000..1424629 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/poll.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_POLL_H +#define RAY_POLL_H + +#include + +/* Forward declarations */ +typedef struct ray_poll ray_poll_t; +typedef struct ray_selector ray_selector_t; + +/* ===== Selector types ===== */ + +#define RAY_SEL_STDIN 0 +#define RAY_SEL_SOCKET 3 + +/* ===== Callbacks ===== */ + +typedef int64_t (*ray_io_fn)(int64_t fd, uint8_t* buf, int64_t len); +typedef ray_t* (*ray_read_fn)(ray_poll_t* poll, ray_selector_t* sel); +typedef void (*ray_event_fn)(ray_poll_t* poll, ray_selector_t* sel); +typedef ray_t* (*ray_poll_data_fn)(ray_poll_t* poll, ray_selector_t* sel, void* data); + +/* ===== Buffer ===== */ + +typedef struct ray_poll_buf { + struct ray_poll_buf* next; + int64_t size; + int64_t offset; + uint8_t data[]; +} ray_poll_buf_t; + +/* ===== Selector — one per registered fd ===== */ + +struct ray_selector { + int64_t fd; + int64_t id; + uint8_t type; + void* data; + ray_event_fn open_fn; + ray_event_fn close_fn; + ray_event_fn error_fn; + ray_poll_data_fn data_fn; + struct { ray_poll_buf_t* buf; ray_io_fn recv_fn; ray_read_fn read_fn; } rx; + struct { ray_poll_buf_t* buf; ray_io_fn send_fn; } tx; +}; + +/* ===== Registration ===== */ + +typedef struct ray_poll_reg { + int64_t fd; + uint8_t type; + ray_event_fn open_fn; + ray_event_fn close_fn; + ray_event_fn error_fn; + ray_poll_data_fn data_fn; + ray_io_fn recv_fn; + ray_io_fn send_fn; + ray_read_fn read_fn; + void* data; +} ray_poll_reg_t; + +/* ===== Poll ===== */ + +struct ray_poll { + int64_t fd; /* epoll/kqueue/iocp handle */ + int64_t code; /* exit code (-1 = running) */ + ray_selector_t** sels; /* selector array */ + uint32_t n_sels; + uint32_t sel_cap; + char auth_secret[256]; /* password from -u/-U, empty = no auth */ + bool restricted; /* true if -U (read-only IPC mode) */ +}; + +/* ===== API ===== */ + +ray_poll_t* ray_poll_create(void); +void ray_poll_destroy(ray_poll_t* poll); +int64_t ray_poll_register(ray_poll_t* poll, ray_poll_reg_t* reg); +void ray_poll_deregister(ray_poll_t* poll, int64_t id); +int64_t ray_poll_run(ray_poll_t* poll); +void ray_poll_exit(ray_poll_t* poll, int64_t code); +ray_selector_t* ray_poll_get(ray_poll_t* poll, int64_t id); + +ray_poll_buf_t* ray_poll_buf_new(int64_t size); +void ray_poll_buf_free(ray_poll_buf_t* buf); +void ray_poll_rx_request(ray_poll_t* poll, ray_selector_t* sel, + int64_t size); +void ray_poll_rx_extend(ray_poll_t* poll, ray_selector_t* sel, + int64_t extra); +void ray_poll_send(ray_poll_t* poll, ray_selector_t* sel, + ray_poll_buf_t* buf); + +#endif /* RAY_POLL_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/pool.c b/crates/rayforce-sys/vendor/rayforce/src/core/pool.c new file mode 100644 index 0000000..cb62277 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/pool.c @@ -0,0 +1,504 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "core/pool.h" +#include "mem/cow.h" +#include "mem/heap.h" +#include "mem/sys.h" +#include +#include + +/* Task granularity: RAY_DISPATCH_MORSELS * RAY_MORSEL_ELEMS elements per task */ +#define TASK_GRAIN ((int64_t)RAY_DISPATCH_MORSELS * RAY_MORSEL_ELEMS) + +/* Maximum ring capacity (power of 2) */ +#define MAX_RING_CAP (1u << 16) + +/* -------------------------------------------------------------------------- + * Worker thread entry + * -------------------------------------------------------------------------- */ + +typedef struct { + ray_pool_t* pool; + uint32_t worker_id; /* 1-based (0 = main thread) */ +} worker_ctx_t; + +static void worker_loop(void* arg) { + worker_ctx_t wctx = *(worker_ctx_t*)arg; + ray_sys_free(arg); + + ray_pool_t* pool = wctx.pool; + + /* Each worker thread gets its own heap */ + ray_heap_init(); + ray_rc_sync = true; /* workers always use atomic refcounting */ + + for (;;) { + ray_sem_wait(&pool->work_ready); + + if (atomic_load_explicit(&pool->shutdown, memory_order_acquire)) + break; + + /* Claim and execute tasks until ring is drained */ + for (;;) { + uint32_t idx = atomic_fetch_add_explicit(&pool->task_tail, 1, + memory_order_acq_rel); + if (idx >= atomic_load_explicit(&pool->task_count, + memory_order_acquire)) + break; + + /* Skip execution if query was cancelled */ + if (RAY_UNLIKELY(atomic_load_explicit(&pool->cancelled, + memory_order_relaxed))) { + atomic_fetch_sub_explicit(&pool->pending, 1, + memory_order_acq_rel); + continue; + } + + ray_pool_task_t* t = &pool->tasks[idx & (pool->task_cap - 1)]; + t->fn(t->ctx, wctx.worker_id, t->start, t->end); + + atomic_fetch_sub_explicit(&pool->pending, 1, + memory_order_acq_rel); + } + + /* No ray_heap_gc() here — removing worker GC between dispatch rounds + * ensures main can safely modify worker heaps in ray_parallel_end(). + * Eager madvise in heap_coalesce already releases pages on free. */ + } + + ray_heap_destroy(); +} + +/* -------------------------------------------------------------------------- + * ray_pool_create + * -------------------------------------------------------------------------- */ + +ray_err_t ray_pool_create(ray_pool_t* pool, uint32_t n_workers) { + /* conc-L7: memset zeroes all fields including the `cancelled` atomic, + * which resets any cancellation state from a prior pool instance. */ + memset(pool, 0, sizeof(*pool)); + /* H3: Re-initialize atomic fields after memset — memset produces a + * valid zero bit pattern on all supported platforms, but C11 requires + * atomic_init for well-defined atomic semantics. */ + atomic_init(&pool->shutdown, 0); + atomic_init(&pool->task_tail, 0); + atomic_init(&pool->task_count, 0); + atomic_init(&pool->pending, 0); + atomic_init(&pool->cancelled, 0); + + if (n_workers == 0) { + uint32_t ncpu = ray_thread_count(); + n_workers = (ncpu > 1) ? ncpu - 1 : 0; + } + + pool->n_workers = n_workers; + atomic_store_explicit(&pool->shutdown, 0, memory_order_relaxed); + + /* Allocate task ring */ + pool->task_cap = 1024; + if (pool->task_cap < MAX_RING_CAP) { + /* Will grow if needed in dispatch */ + } + pool->tasks = (ray_pool_task_t*)ray_sys_alloc(pool->task_cap * sizeof(ray_pool_task_t)); + if (!pool->tasks) return RAY_ERR_OOM; + + pool->task_head = 0; + atomic_store_explicit(&pool->task_tail, 0, memory_order_relaxed); + atomic_store_explicit(&pool->task_count, 0, memory_order_relaxed); + atomic_store_explicit(&pool->pending, 0, memory_order_relaxed); + + ray_err_t err = ray_sem_init(&pool->work_ready, 0); + if (err != RAY_OK) { + ray_sys_free(pool->tasks); + return err; + } + + /* Spawn worker threads */ + if (n_workers > 0) { + pool->threads = (ray_thread_t*)ray_sys_alloc(n_workers * sizeof(ray_thread_t)); + if (!pool->threads) { + ray_sem_destroy(&pool->work_ready); + ray_sys_free(pool->tasks); + return RAY_ERR_OOM; + } + + for (uint32_t i = 0; i < n_workers; i++) { + worker_ctx_t* wctx = (worker_ctx_t*)ray_sys_alloc(sizeof(worker_ctx_t)); + if (!wctx) { + /* Partial cleanup: shut down already-started threads */ + atomic_store_explicit(&pool->shutdown, 1, memory_order_release); + for (uint32_t j = 0; j < i; j++) { + ray_sem_signal(&pool->work_ready); + } + for (uint32_t j = 0; j < i; j++) { + ray_thread_join(pool->threads[j]); + } + ray_sys_free(pool->threads); + ray_sem_destroy(&pool->work_ready); + ray_sys_free(pool->tasks); + return RAY_ERR_OOM; + } + wctx->pool = pool; + wctx->worker_id = i + 1; /* 0 = main thread */ + + err = ray_thread_create(&pool->threads[i], worker_loop, wctx); + if (err != RAY_OK) { + ray_sys_free(wctx); + atomic_store_explicit(&pool->shutdown, 1, memory_order_release); + for (uint32_t j = 0; j < i; j++) { + ray_sem_signal(&pool->work_ready); + } + for (uint32_t j = 0; j < i; j++) { + ray_thread_join(pool->threads[j]); + } + ray_sys_free(pool->threads); + ray_sem_destroy(&pool->work_ready); + ray_sys_free(pool->tasks); + return err; + } + } + } + + return RAY_OK; +} + +/* -------------------------------------------------------------------------- + * ray_pool_free + * -------------------------------------------------------------------------- */ + +void ray_pool_free(ray_pool_t* pool) { + if (!pool) return; + + /* Signal shutdown and wake all workers */ + atomic_store_explicit(&pool->shutdown, 1, memory_order_release); + for (uint32_t i = 0; i < pool->n_workers; i++) { + ray_sem_signal(&pool->work_ready); + } + + /* Join all worker threads */ + for (uint32_t i = 0; i < pool->n_workers; i++) { + ray_thread_join(pool->threads[i]); + } + + ray_sys_free(pool->threads); + ray_sem_destroy(&pool->work_ready); + ray_sys_free(pool->tasks); + memset(pool, 0, sizeof(*pool)); +} + +/* -------------------------------------------------------------------------- + * ray_pool_dispatch + * -------------------------------------------------------------------------- */ + +/* M2: Caller (ray_execute) must reset pool->cancelled before dispatching. + * The cancelled flag is per-query state; failing to clear it causes all + * subsequent dispatches to skip task execution. */ +void ray_pool_dispatch(ray_pool_t* pool, ray_pool_fn fn, void* ctx, + int64_t total_elems) { + if (total_elems <= 0) return; + + /* Calculate number of tasks. + * Overflow guard: total_elems + grain - 1 could wrap for extreme values. */ + int64_t grain = TASK_GRAIN; + if (RAY_UNLIKELY(total_elems > INT64_MAX - grain + 1)) + total_elems = INT64_MAX - grain + 1; + uint32_t n_tasks = (uint32_t)((total_elems + grain - 1) / grain); + + /* conc-L6: Ring growth is safe without synchronization because dispatch is + * single-producer: only the main thread (the dispatch caller) writes to + * task_head, tasks[], and task_cap. Workers only read via task_tail after + * the publish fence (task_count store-release). */ + if (n_tasks > pool->task_cap) { + uint32_t new_cap = pool->task_cap; + while (new_cap < n_tasks && new_cap < MAX_RING_CAP) new_cap *= 2; + if (new_cap > pool->task_cap) { + ray_pool_task_t* new_tasks = (ray_pool_task_t*)ray_sys_realloc( + pool->tasks, new_cap * sizeof(ray_pool_task_t)); + if (new_tasks) { + pool->tasks = new_tasks; + pool->task_cap = new_cap; + } + } + } + + /* Clamp n_tasks to task_cap to prevent ring overflow */ + if (n_tasks > pool->task_cap) { + n_tasks = pool->task_cap; + grain = (total_elems + n_tasks - 1) / n_tasks; + } + + /* Fill task ring */ + for (uint32_t i = 0; i < n_tasks; i++) { + int64_t start = (int64_t)i * grain; + int64_t end = start + grain; + if (end > total_elems) end = total_elems; + + uint32_t slot = i & (pool->task_cap - 1); + pool->tasks[slot].fn = fn; + pool->tasks[slot].ctx = ctx; + pool->tasks[slot].start = start; + pool->tasks[slot].end = end; + } + + pool->task_head = n_tasks; + atomic_store_explicit(&pool->task_count, n_tasks, memory_order_release); + atomic_store_explicit(&pool->task_tail, 0, memory_order_release); + atomic_store_explicit(&pool->pending, n_tasks, memory_order_release); + + /* Mark parallel region: workers are about to run, cross-heap + * freelist modification is unsafe until spin-wait completes. */ + atomic_store_explicit(&ray_parallel_flag, 1, memory_order_release); + + /* Main thread enters atomic refcount mode during parallel dispatch */ + ray_rc_sync = true; + + /* Wake worker threads */ + for (uint32_t i = 0; i < pool->n_workers; i++) { + ray_sem_signal(&pool->work_ready); + } + + /* Main thread participates as worker 0 */ + for (;;) { + uint32_t idx = atomic_fetch_add_explicit(&pool->task_tail, 1, + memory_order_acq_rel); + if (idx >= n_tasks) break; + + if (RAY_UNLIKELY(atomic_load_explicit(&pool->cancelled, + memory_order_relaxed))) { + atomic_fetch_sub_explicit(&pool->pending, 1, memory_order_acq_rel); + continue; + } + + ray_pool_task_t* t = &pool->tasks[idx & (pool->task_cap - 1)]; + t->fn(t->ctx, 0, t->start, t->end); + + atomic_fetch_sub_explicit(&pool->pending, 1, memory_order_acq_rel); + } + + /* Spin-wait for workers to finish remaining tasks. + * No semaphore — avoids surplus-signal bug between consecutive dispatches. */ + { + unsigned spin_count = 0; + while (atomic_load_explicit(&pool->pending, memory_order_acquire) > 0) { +#if defined(__x86_64__) || defined(__i386__) + __builtin_ia32_pause(); +#elif defined(__aarch64__) + __asm__ volatile("yield" ::: "memory"); +#endif + if (++spin_count % 1024 == 0) sched_yield(); + } + } + + /* All tasks done, workers heading to sem_wait (no GC in loop). + * Safe for main to modify worker heaps between dispatches. */ + atomic_store_explicit(&ray_parallel_flag, 0, memory_order_release); + + /* Memory fence ensures all worker RC operations are visible before + * main thread switches to non-atomic refcounting. Workers may still + * be between pending-- and sem_wait. */ + atomic_thread_fence(memory_order_seq_cst); + ray_rc_sync = false; +} + +/* -------------------------------------------------------------------------- + * ray_pool_dispatch_n — dispatch exactly n_tasks tasks, each [i, i+1) + * -------------------------------------------------------------------------- */ + +void ray_pool_dispatch_n(ray_pool_t* pool, ray_pool_fn fn, void* ctx, + uint32_t n_tasks) { + if (n_tasks == 0) return; + + /* Grow ring if needed */ + if (n_tasks > pool->task_cap) { + uint32_t new_cap = pool->task_cap; + while (new_cap < n_tasks && new_cap < MAX_RING_CAP) new_cap *= 2; + if (new_cap > pool->task_cap) { + ray_pool_task_t* new_tasks = (ray_pool_task_t*)ray_sys_realloc( + pool->tasks, new_cap * sizeof(ray_pool_task_t)); + if (new_tasks) { + pool->tasks = new_tasks; + pool->task_cap = new_cap; + } + } + } + + /* Clamp n_tasks to task_cap to prevent ring overflow */ + if (n_tasks > pool->task_cap) n_tasks = pool->task_cap; + + /* Fill task ring: one task per partition */ + for (uint32_t i = 0; i < n_tasks; i++) { + uint32_t slot = i & (pool->task_cap - 1); + pool->tasks[slot].fn = fn; + pool->tasks[slot].ctx = ctx; + pool->tasks[slot].start = (int64_t)i; + pool->tasks[slot].end = (int64_t)i + 1; + } + + pool->task_head = n_tasks; + atomic_store_explicit(&pool->task_count, n_tasks, memory_order_release); + atomic_store_explicit(&pool->task_tail, 0, memory_order_release); + atomic_store_explicit(&pool->pending, n_tasks, memory_order_release); + + atomic_store_explicit(&ray_parallel_flag, 1, memory_order_release); + ray_rc_sync = true; + + /* Wake worker threads */ + for (uint32_t i = 0; i < pool->n_workers; i++) { + ray_sem_signal(&pool->work_ready); + } + + /* Main thread participates as worker 0 */ + for (;;) { + uint32_t idx = atomic_fetch_add_explicit(&pool->task_tail, 1, + memory_order_acq_rel); + if (idx >= n_tasks) break; + + if (RAY_UNLIKELY(atomic_load_explicit(&pool->cancelled, + memory_order_relaxed))) { + atomic_fetch_sub_explicit(&pool->pending, 1, memory_order_acq_rel); + continue; + } + + ray_pool_task_t* t = &pool->tasks[idx & (pool->task_cap - 1)]; + t->fn(t->ctx, 0, t->start, t->end); + + atomic_fetch_sub_explicit(&pool->pending, 1, memory_order_acq_rel); + } + + /* Spin-wait for workers to finish remaining tasks */ + { + unsigned spin_count = 0; + while (atomic_load_explicit(&pool->pending, memory_order_acquire) > 0) { +#if defined(__x86_64__) || defined(__i386__) + __builtin_ia32_pause(); +#elif defined(__aarch64__) + __asm__ volatile("yield" ::: "memory"); +#endif + if (++spin_count % 1024 == 0) sched_yield(); + } + } + + atomic_store_explicit(&ray_parallel_flag, 0, memory_order_release); + atomic_thread_fence(memory_order_seq_cst); + ray_rc_sync = false; +} + +/* -------------------------------------------------------------------------- + * Global pool singleton (lazy init) + * -------------------------------------------------------------------------- */ + +/* L4: Global singleton; not destroyed at program exit (OS reclaims resources). + * May cause ASan leak reports — suppress via LSAN_OPTIONS=detect_leaks=0 or + * an explicit ray_pool_destroy() call before exit. */ +static ray_pool_t g_pool; +static _Atomic(uint32_t) g_pool_init_state = 0; /* 0=uninit, 1=initializing, 2=ready */ + +ray_pool_t* ray_pool_get(void) { + uint32_t state = atomic_load_explicit(&g_pool_init_state, memory_order_acquire); + if (state == 2) return &g_pool; + if (state == 0) { + uint32_t expected = 0; + if (atomic_compare_exchange_strong_explicit(&g_pool_init_state, &expected, 1, + memory_order_acq_rel, + memory_order_acquire)) { + ray_err_t err = ray_pool_create(&g_pool, 0); + if (err == RAY_OK) { + atomic_store_explicit(&g_pool_init_state, 2, memory_order_release); + return &g_pool; + } + /* Failed — allow retry */ + atomic_store_explicit(&g_pool_init_state, 0, memory_order_release); + return NULL; + } + } + /* Spin while another thread initializes or destroys. + * M7: state==3 means the pool is being destroyed — treat as unavailable + * and wait for it to return to state 0 (then return NULL), or become + * state 2 if re-initialized by another thread. */ + { + unsigned spin_count = 0; + for (;;) { + uint32_t s = atomic_load_explicit(&g_pool_init_state, memory_order_acquire); + if (s == 2) return &g_pool; + if (s == 0) return NULL; /* init failed, not started, or destroy completed */ + /* s == 1: still initializing, s == 3: destroying — spin */ +#if defined(__x86_64__) || defined(__i386__) + __builtin_ia32_pause(); +#elif defined(__aarch64__) + __asm__ volatile("yield" ::: "memory"); +#endif + if (++spin_count % 1024 == 0) sched_yield(); + } + } +} + +/* -------------------------------------------------------------------------- + * Public API wrappers (declared in rayforce.h) + * -------------------------------------------------------------------------- */ + +/* conc-L4: If ray_pool_init() is called when the pool is already initialized + * (state==2), the n_workers parameter is silently ignored and the existing + * pool configuration is preserved. This is by design — the pool is a + * singleton and reconfiguration requires ray_pool_destroy() + ray_pool_init(). */ +ray_err_t ray_pool_init(uint32_t n_workers) { + uint32_t expected = 0; + if (!atomic_compare_exchange_strong_explicit(&g_pool_init_state, &expected, 1, + memory_order_acq_rel, + memory_order_acquire)) { + /* Another thread is currently initializing (state==1); spin until ready */ + if (expected == 1) { + while (atomic_load_explicit(&g_pool_init_state, memory_order_acquire) == 1) { +#if defined(__x86_64__) || defined(__i386__) + __builtin_ia32_pause(); +#elif defined(__aarch64__) + __asm__ volatile("yield" ::: "memory"); +#endif + } + } + return RAY_OK; /* already initialized or completed during our spin */ + } + ray_err_t err = ray_pool_create(&g_pool, n_workers); + if (err == RAY_OK) { + atomic_store_explicit(&g_pool_init_state, 2, memory_order_release); + } else { + atomic_store_explicit(&g_pool_init_state, 0, memory_order_release); + } + return err; +} + +void ray_pool_destroy(void) { + uint32_t expected = 2; + if (!atomic_compare_exchange_strong_explicit(&g_pool_init_state, &expected, 3, + memory_order_acq_rel, + memory_order_acquire)) + return; /* not ready, or another thread is already destroying */ + ray_pool_free(&g_pool); + atomic_store_explicit(&g_pool_init_state, 0, memory_order_release); +} + +void ray_cancel(void) { + ray_pool_t* pool = ray_pool_get(); + if (pool) + atomic_store_explicit(&pool->cancelled, 1, memory_order_release); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/pool.h b/crates/rayforce-sys/vendor/rayforce/src/core/pool.h new file mode 100644 index 0000000..3252755 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/pool.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_POOL_H +#define RAY_POOL_H + +/* + * pool.h -- Persistent thread pool for parallel morsel dispatch. + * + * Workers sleep on a semaphore and wake when ray_pool_dispatch() submits tasks. + * The main thread participates as worker 0 (no thread spawned for it). + * Each worker initializes its own thread-local heap via ray_heap_init(). + */ + +#include "core/platform.h" +#include "ops/ops.h" + +/* Callback: process elements [start, end) with the given worker_id */ +typedef void (*ray_pool_fn)(void* ctx, uint32_t worker_id, int64_t start, int64_t end); + +/* A single work item in the task ring */ +typedef struct { + ray_pool_fn fn; + void* ctx; + int64_t start; + int64_t end; +} ray_pool_task_t; + +/* Thread pool */ +struct ray_pool { + ray_thread_t* threads; /* worker thread handles [n_workers] */ + uint32_t n_workers; /* number of background threads (nproc - 1) */ + _Atomic(uint32_t) shutdown; + + /* SPMC task ring (single producer = main, multi consumer = workers + main) */ + ray_pool_task_t* tasks; /* ring buffer [task_cap] */ + uint32_t task_cap; /* power of 2 */ + uint32_t task_head; /* next to write (main only, no atomic needed) */ + _Atomic(uint32_t) task_tail; /* next to claim (workers, atomic_fetch_add) */ + _Atomic(uint32_t) task_count; /* total tasks submitted this dispatch */ + + /* Barrier */ + _Atomic(uint32_t) pending; /* decremented by each task completion */ + ray_sem_t work_ready; /* workers sleep here */ + + /* Query cancellation — set by ray_cancel(), checked per-morsel */ + _Atomic(uint32_t) cancelled; +}; + +/* Total workers = n_workers + 1 (main thread is worker 0) */ +#define ray_pool_total_workers(p) ((p)->n_workers + 1) + +/* Initialize pool with n_workers background threads. + * Pass 0 to auto-detect (nproc - 1). */ +ray_err_t ray_pool_create(ray_pool_t* pool, uint32_t n_workers); + +/* Shutdown and free all resources */ +void ray_pool_free(ray_pool_t* pool); + +/* Dispatch fn over [0, total_elems) partitioned into morsel-sized tasks. + * Blocks until all tasks complete. Main thread participates as worker 0. */ +void ray_pool_dispatch(ray_pool_t* pool, ray_pool_fn fn, void* ctx, int64_t total_elems); + +/* Dispatch exactly n_tasks tasks, each with range [i, i+1). + * Used for partition-parallel workloads where each task is one partition. */ +void ray_pool_dispatch_n(ray_pool_t* pool, ray_pool_fn fn, void* ctx, uint32_t n_tasks); + +/* Global pool lifecycle (lazy singleton) */ +ray_pool_t* ray_pool_get(void); + +/* Public pool init/destroy (moved from rayforce.h) */ +ray_err_t ray_pool_init(uint32_t n_workers); +void ray_pool_destroy(void); + +#endif /* RAY_POOL_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/profile.h b/crates/rayforce-sys/vendor/rayforce/src/core/profile.h new file mode 100644 index 0000000..e89c495 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/profile.h @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_PROFILE_H +#define RAY_PROFILE_H + +#include +#include + +#if defined(RAY_OS_WINDOWS) +#include +#else +#include +/* clock_gettime / CLOCK_MONOTONIC may be hidden under strict -std=c17 + * without _POSIX_C_SOURCE. Provide fallback declarations. */ +#ifndef CLOCK_MONOTONIC +#define CLOCK_MONOTONIC 1 +int clock_gettime(int clk_id, struct timespec *tp); +#endif +#endif + +/* ===== Span-based execution profiler ===== + * + * Zero overhead when inactive — every call guards on g_ray_profile.active. + * Activated by REPL :t command; lives entirely outside hot morsel loops. + */ + +#define RAY_PROFILE_SPANS_MAX 2048 + +typedef enum { + RAY_PROF_SPAN_START, + RAY_PROF_SPAN_END, + RAY_PROF_SPAN_TICK +} ray_prof_span_type_t; + +typedef struct { + ray_prof_span_type_t type; + const char* msg; + int64_t ts; /* nanoseconds (monotonic) */ +} ray_prof_span_t; + +/* Progress callback — set by REPL to render progress bar. + * Called at morsel boundaries; receives done/total/label. */ +typedef void (*ray_progress_fn)(int64_t done, int64_t total, const char* label); + +typedef struct { + bool active; + int32_t n; + /* Progress tracking */ + int64_t progress_total; + int64_t progress_done; + const char* progress_label; + int64_t progress_last_render; /* ns timestamp of last render */ + ray_progress_fn progress_cb; /* set by REPL; NULL = no-op */ + ray_prof_span_t spans[RAY_PROFILE_SPANS_MAX]; +} ray_profile_t; + +/* Single global instance */ +extern ray_profile_t g_ray_profile; + +static inline int64_t ray_profile_now_ns(void) { +#if defined(RAY_OS_WINDOWS) + LARGE_INTEGER freq, cnt; + QueryPerformanceFrequency(&freq); + QueryPerformanceCounter(&cnt); + return (int64_t)((double)cnt.QuadPart / (double)freq.QuadPart * 1e9); +#else + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (int64_t)ts.tv_sec * 1000000000LL + (int64_t)ts.tv_nsec; +#endif +} + +static inline void ray_profile_reset(void) { + g_ray_profile.n = 0; + g_ray_profile.progress_total = 0; + g_ray_profile.progress_done = 0; + g_ray_profile.progress_label = NULL; + g_ray_profile.progress_last_render = 0; +} + +static inline void ray_profile_span_start(const char* name) { + if (!g_ray_profile.active) return; + if (g_ray_profile.n >= RAY_PROFILE_SPANS_MAX) return; + ray_prof_span_t* s = &g_ray_profile.spans[g_ray_profile.n++]; + s->type = RAY_PROF_SPAN_START; + s->msg = name; + s->ts = ray_profile_now_ns(); +} + +static inline void ray_profile_span_end(const char* name) { + if (!g_ray_profile.active) return; + if (g_ray_profile.n >= RAY_PROFILE_SPANS_MAX) return; + ray_prof_span_t* s = &g_ray_profile.spans[g_ray_profile.n++]; + s->type = RAY_PROF_SPAN_END; + s->msg = name; + s->ts = ray_profile_now_ns(); +} + +static inline void ray_profile_tick(const char* msg) { + if (!g_ray_profile.active) return; + if (g_ray_profile.n >= RAY_PROFILE_SPANS_MAX) return; + ray_prof_span_t* s = &g_ray_profile.spans[g_ray_profile.n++]; + s->type = RAY_PROF_SPAN_TICK; + s->msg = msg; + s->ts = ray_profile_now_ns(); +} + +/* Progress bar — called between morsels / pipeline stages */ +static inline void ray_profile_progress_begin(const char* label, int64_t total) { + if (!g_ray_profile.active) return; + g_ray_profile.progress_label = label; + g_ray_profile.progress_total = total; + g_ray_profile.progress_done = 0; +} + +/* Throttled: renders at most every 100ms to avoid terminal spam */ +#define RAY_PROGRESS_RENDER_INTERVAL_NS (100 * 1000000LL) + +static inline void ray_profile_progress_advance(int64_t delta) { + if (!g_ray_profile.active) return; + g_ray_profile.progress_done += delta; + if (g_ray_profile.progress_cb && g_ray_profile.progress_total > 0) { + int64_t now = ray_profile_now_ns(); + if (now - g_ray_profile.progress_last_render > RAY_PROGRESS_RENDER_INTERVAL_NS) { + g_ray_profile.progress_last_render = now; + g_ray_profile.progress_cb(g_ray_profile.progress_done, + g_ray_profile.progress_total, + g_ray_profile.progress_label); + } + } +} + +static inline void ray_profile_progress_end(void) { + if (!g_ray_profile.active) return; + g_ray_profile.progress_label = NULL; + g_ray_profile.progress_total = 0; + g_ray_profile.progress_done = 0; +} + +#endif /* RAY_PROFILE_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/progress.c b/crates/rayforce-sys/vendor/rayforce/src/core/progress.c new file mode 100644 index 0000000..0b00f0a --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/progress.c @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Pull-based progress reporting. Zero cost when no callback is + * registered; single main-thread pointer/int stores at sync points + * otherwise. Workers never touch this state. + */ + +#if !defined(_WIN32) && !defined(_POSIX_C_SOURCE) +#define _POSIX_C_SOURCE 200809L +#endif + +#include "rayforce.h" +#include "mem/heap.h" +#include +#include + +static ray_progress_cb g_cb; +static void* g_user; +static uint64_t g_min_ms = 2000; +static uint64_t g_tick_ms = 100; + +/* Active-query state — only touched by the main executor thread. + * A dedicated thread would need atomics, but since every writer is + * the main thread we can use plain loads/stores. */ +static const char* g_op_name; +static const char* g_phase; +static uint64_t g_rows_done; +static uint64_t g_rows_total; +static uint64_t g_start_ns; +static uint64_t g_last_fire_ns; +static bool g_showing; + +static inline uint64_t mono_ns(void) { + struct timespec ts; +#ifdef CLOCK_MONOTONIC_COARSE + clock_gettime(CLOCK_MONOTONIC_COARSE, &ts); +#else + clock_gettime(CLOCK_MONOTONIC, &ts); +#endif + return (uint64_t)ts.tv_sec * 1000000000ull + (uint64_t)ts.tv_nsec; +} + +void ray_progress_set_callback(ray_progress_cb cb, void* user, + uint64_t min_ms, uint64_t tick_interval_ms) { + g_cb = cb; + g_user = user; + if (min_ms) g_min_ms = min_ms; + if (tick_interval_ms) g_tick_ms = tick_interval_ms; +} + +static void fire(uint64_t now_ns, bool final) { + ray_mem_stats_t ms; + ray_mem_stats(&ms); + ray_progress_t snap = { + .op_name = g_op_name ? g_op_name : "", + .phase = g_phase ? g_phase : "", + .rows_done = g_rows_done, + .rows_total = g_rows_total, + .elapsed_sec = (double)(now_ns - g_start_ns) / 1e9, + .mem_used = (int64_t)(ms.bytes_allocated + ms.direct_bytes), + .mem_budget = ray_mem_budget(), + .final = final, + }; + g_cb(&snap, g_user); + g_last_fire_ns = now_ns; + g_showing = true; +} + +void ray_progress_update(const char* op_name, const char* phase, + uint64_t rows_done, uint64_t rows_total) { + if (!g_cb) return; + + /* Lazy-start the query clock on first call after ray_progress_end + * (or on very first call). Callers don't need a separate begin + * hook — the first update sets the query start time. */ + if (g_start_ns == 0) { + g_start_ns = mono_ns(); + g_last_fire_ns = 0; + g_showing = false; + } + + /* Name/phase follow "NULL = keep previous" so callers can tick + * without relabeling. Counters always overwrite — 0 is a valid + * "starting fresh" value and must reset stale totals from the + * prior op/phase (otherwise a new pivot phase would carry the + * previous phase's rows_total forward and render wrong percentages). */ + if (op_name) g_op_name = op_name; + if (phase) g_phase = phase; + g_rows_done = rows_done; + g_rows_total = rows_total; + + uint64_t now = mono_ns(); + uint64_t elapsed_ms = (now - g_start_ns) / 1000000ull; + if (elapsed_ms < g_min_ms) return; + + uint64_t since_last = g_last_fire_ns ? (now - g_last_fire_ns) / 1000000ull : g_tick_ms; + if (since_last < g_tick_ms) return; + + fire(now, false); +} + +void ray_progress_label(const char* op_name, const char* phase) { + if (!g_cb) return; + if (g_start_ns == 0) { + g_start_ns = mono_ns(); + g_last_fire_ns = 0; + g_showing = false; + } + if (op_name) g_op_name = op_name; + /* phase is always overwritten — label() marks a new op boundary + * so any stale phase string from the previous op (e.g. "pivot: + * dedupe") must not leak into the next op's render. Callers pass + * NULL when the new op has no phase of its own. */ + g_phase = phase; + /* Reset counters so a freshly-entered op that doesn't know its + * row total shows an indeterminate bar instead of the previous + * op's percentages. The first ray_progress_update from inside + * the op will fill them in. */ + g_rows_done = 0; + g_rows_total = 0; + + uint64_t now = mono_ns(); + uint64_t elapsed_ms = (now - g_start_ns) / 1000000ull; + if (elapsed_ms < g_min_ms) return; + uint64_t since_last = g_last_fire_ns ? (now - g_last_fire_ns) / 1000000ull : g_tick_ms; + if (since_last < g_tick_ms) return; + fire(now, false); +} + +void ray_progress_end(void) { + if (!g_cb) { + g_start_ns = 0; + return; + } + if (g_showing) { + /* Final 100% tick — only if the bar was actually shown, so + * short queries don't flash anything at all. */ + uint64_t now = mono_ns(); + if (g_rows_total) g_rows_done = g_rows_total; + fire(now, true); + } + g_op_name = NULL; + g_phase = NULL; + g_rows_done = 0; + g_rows_total = 0; + g_start_ns = 0; + g_last_fire_ns = 0; + g_showing = false; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/runtime.c b/crates/rayforce-sys/vendor/rayforce/src/core/runtime.c new file mode 100644 index 0000000..0aeab15 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/runtime.c @@ -0,0 +1,367 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "runtime.h" +#include "mem/heap.h" +#include "mem/sys.h" +#include +#include +#include +#include +#include +#ifdef RAY_OS_WINDOWS +#include +#else +#include +#endif + +/* Forward-declare lang init/destroy to avoid eval.h ray_vm_t conflict */ +extern ray_err_t ray_lang_init(void); +extern void ray_lang_destroy(void); + +/* ===== Global state ===== */ + +ray_runtime_t *__RUNTIME = NULL; +_Thread_local ray_vm_t *__VM = NULL; + +/* Static null singleton — type RAY_NULL, ARENA flag makes retain/release no-ops */ +ray_t __ray_null = { .type = RAY_NULL, .attrs = RAY_ATTR_ARENA, .rc = 0, .len = 0 }; + +/* Static last-resort OOM error — used when ray_error itself can't allocate + * the small block it needs to construct a fresh error. Tagged with + * RAY_ATTR_ARENA so retain/release are no-ops, matching __ray_null. Code + * is "oom" inline (slen=3). Per-VM message is dropped under deep OOM since + * we have no heap to format anything new — callers get the bare type/code. + * + * Without this sentinel, hard OOM (heap can't satisfy even the 32-byte + * error header) makes ray_error return NULL, which silently bypasses + * every `if (RAY_IS_ERR(x)) return x;` guard upstream and reintroduces + * exactly the silent-failure pathology the wrapper-level fixes were + * meant to close. */ +ray_t __ray_oom = { + .type = RAY_ERROR, + .attrs = RAY_ATTR_ARENA, + .rc = 0, + /* slen / sdata share a union with len / i64 / etc. (bytes 16-31) — + * pick one designated path for that union or clang's + * -Winitializer-overrides flags it under -Werror. */ + .slen = 3, + .sdata = { 'o', 'o', 'm', 0, 0, 0, 0 }, +}; + +/* ===== Error code to string ===== */ + +const char* ray_err_code_str(ray_err_t e) { + static const char* codes[] = { + [RAY_OK] = "ok", + [RAY_ERR_OOM] = "oom", + [RAY_ERR_TYPE] = "type", + [RAY_ERR_RANGE] = "range", + [RAY_ERR_LENGTH] = "length", + [RAY_ERR_RANK] = "rank", + [RAY_ERR_DOMAIN] = "domain", + [RAY_ERR_NYI] = "nyi", + [RAY_ERR_IO] = "io", + [RAY_ERR_SCHEMA] = "schema", + [RAY_ERR_CORRUPT] = "corrupt", + [RAY_ERR_CANCEL] = "cancel", + [RAY_ERR_PARSE] = "parse", + [RAY_ERR_NAME] = "name", + [RAY_ERR_LIMIT] = "limit", + /* "reserve" (not "reserved") because the err->sdata inline field + * is capped at 7 bytes — the past-tense form would truncate. */ + [RAY_ERR_RESERVED] = "reserve", + }; + if ((unsigned)e >= sizeof(codes)/sizeof(codes[0])) return "error"; + return codes[e]; +} + +ray_err_t ray_err_from_obj(ray_t* err) { + if (!err || err->type != RAY_ERROR) return RAY_ERR_DOMAIN; + const char* s = err->sdata; + int n = err->slen; + static const struct { const char* s; int len; ray_err_t e; } map[] = { + {"oom", 3, RAY_ERR_OOM}, {"type", 4, RAY_ERR_TYPE}, + {"range", 5, RAY_ERR_RANGE}, {"length", 6, RAY_ERR_LENGTH}, + {"rank", 4, RAY_ERR_RANK}, {"domain", 6, RAY_ERR_DOMAIN}, + {"nyi", 3, RAY_ERR_NYI}, {"io", 2, RAY_ERR_IO}, + {"schema", 6, RAY_ERR_SCHEMA}, {"corrupt", 7, RAY_ERR_CORRUPT}, + {"cancel", 6, RAY_ERR_CANCEL}, {"parse", 5, RAY_ERR_PARSE}, + {"name", 4, RAY_ERR_NAME}, {"limit", 5, RAY_ERR_LIMIT}, + {"reserve", 7, RAY_ERR_RESERVED}, + }; + for (int i = 0; i < (int)(sizeof(map)/sizeof(map[0])); i++) + if (n == map[i].len && memcmp(s, map[i].s, n) == 0) return map[i].e; + return RAY_ERR_DOMAIN; +} + +/* ===== Error API ===== */ + +static ray_t* ray_verror(const char* code, const char* fmt, va_list ap) { + /* Populate / clear the per-VM message buffer FIRST. On the deep-OOM + * path below we return the static __ray_oom sentinel, but that path + * still has to leave __VM->err.msg consistent with this call — + * otherwise ray_error_msg() returns text from whatever earlier error + * happened to land in the buffer last, which a user would naturally + * read as the message for THIS error. The vsnprintf target is a + * fixed-size member of __VM (allocated at runtime-init), so this + * step does not depend on the heap and stays valid even when + * ray_alloc below fails. */ + if (__VM) { + if (fmt) vsnprintf(__VM->err.msg, sizeof(__VM->err.msg), fmt, ap); + else __VM->err.msg[0] = '\0'; + } + + ray_t* err = ray_alloc(0); + if (!err) return &__ray_oom; /* sentinel — see __ray_oom comment */ + err->type = RAY_ERROR; + err->slen = 0; + memset(err->sdata, 0, 7); + if (code) { + size_t len = strlen(code); + if (len > 7) len = 7; + memcpy(err->sdata, code, len); + err->slen = (uint8_t)len; + } + return err; +} + +ray_t* ray_error(const char* code, const char* fmt, ...) { + if (fmt) { + va_list ap; + va_start(ap, fmt); + ray_t* err = ray_verror(code, fmt, ap); + va_end(ap); + return err; + } + /* No format string — skip va_list entirely for portability. Clear + * the per-VM message buffer FIRST so the deep-OOM sentinel path + * doesn't leave stale text from an earlier error visible. */ + if (__VM) __VM->err.msg[0] = '\0'; + ray_t* err = ray_alloc(0); + if (!err) return &__ray_oom; /* sentinel — see __ray_oom comment */ + err->type = RAY_ERROR; + err->slen = 0; + memset(err->sdata, 0, 7); + if (code) { + size_t len = strlen(code); + if (len > 7) len = 7; + memcpy(err->sdata, code, len); + err->slen = (uint8_t)len; + } + return err; +} + +void ray_error_free(ray_t* err) { + /* Skip NULL and anything that isn't actually a RAY_ERROR — callers + * often pass a result that might be either an error or a real value. */ + if (!err || !RAY_IS_ERR(err)) return; + /* The static OOM sentinel lives in BSS, not the heap. Freeing it + * would corrupt the buddy allocator's bookkeeping. */ + if (err == RAY_OOM_OBJ) return; + /* Both ray_free and ray_release_owned_refs short-circuit on RAY_IS_ERR + * as a safety default (the refcount system deliberately does not track + * error objects). Retype the block to a leaf atom (-RAY_I64) so those + * guards don't fire — an atom with no owned children is the safest + * shape to pass through the standard free path. The rc was already + * 1 from ray_alloc, so ray_free will reclaim the block via the buddy + * allocator. From this point the caller must not touch err again. */ + err->type = -RAY_I64; + ray_free(err); +} + +const char* ray_err_code(ray_t* err) { + if (!err || err->type != RAY_ERROR) return NULL; + /* sdata is 7 bytes and may not be null-terminated when full */ + static _Thread_local char buf[8]; + memcpy(buf, err->sdata, err->slen); + buf[err->slen] = '\0'; + return buf; +} + +const char* ray_error_msg(void) { + if (!__VM || !__VM->err.msg[0]) return NULL; + return __VM->err.msg; +} + +void ray_error_clear(void) { + if (__VM) __VM->err.msg[0] = '\0'; +} + +/* ===== Lifecycle ===== */ + +static ray_runtime_t* runtime_create_impl(const char* sym_path, + ray_err_t* out_sym_err) { + if (out_sym_err) *out_sym_err = RAY_OK; + + /* Init subsystems */ + ray_heap_init(); + ray_sym_init(); + + /* Allocate runtime and set __VM + mem_budget BEFORE any file I/O so + * that ray_error() has a live VM to record diagnostics against and + * allocations are bounded by the budget. */ + ray_runtime_t* rt = (ray_runtime_t*)ray_sys_alloc(sizeof(ray_runtime_t)); + if (!rt) return NULL; + memset(rt, 0, sizeof(*rt)); + + /* Create main VM (id=0) */ + rt->n_vms = 1; + rt->vms = (ray_vm_t**)ray_sys_alloc(sizeof(ray_vm_t*)); + if (!rt->vms) { ray_sys_free(rt); return NULL; } + rt->vms[0] = (ray_vm_t*)ray_sys_alloc(sizeof(ray_vm_t)); + if (!rt->vms[0]) { ray_sys_free(rt->vms); ray_sys_free(rt); return NULL; } + memset(rt->vms[0], 0, sizeof(ray_vm_t)); + rt->vms[0]->id = 0; + __VM = rt->vms[0]; + + /* Detect memory budget: 80% of physical RAM */ +#ifdef RAY_OS_WINDOWS + MEMORYSTATUSEX ms; + ms.dwLength = sizeof(ms); + if (GlobalMemoryStatusEx(&ms)) + rt->mem_budget = (int64_t)(ms.ullTotalPhys * 0.8); + else + rt->mem_budget = (int64_t)(4ULL << 30); +#else + long pages = sysconf(_SC_PHYS_PAGES); + long psize = sysconf(_SC_PAGESIZE); + if (pages > 0 && psize > 0) + rt->mem_budget = (int64_t)((double)pages * (double)psize * 0.8); + else + rt->mem_budget = (int64_t)(4ULL << 30); +#endif + + /* __RUNTIME must be visible before ray_sym_load so mem_budget checks + * and ray_error() both operate against the live runtime. */ + __RUNTIME = rt; + + /* Load persisted symbol table BEFORE ray_lang_init interns builtins. + * Ordering: __VM + mem_budget are live so file I/O errors surface via + * ray_error() and allocations are budget-bounded. Still before + * ray_lang_init so persisted user symbol IDs keep their slots and + * builtins append afterwards. */ + if (sym_path) { + /* Pre-flight size check: reject files that would blow past the + * memory budget before ever touching ray_col_load. + * + * errno handling: ENOENT is the normal first-run case and stays + * RAY_OK; any *other* stat failure (EACCES, ENOTDIR, EIO, …) is + * a real problem and must be surfaced as RAY_ERR_IO, otherwise + * the caller would silently continue with an empty sym table + * and later hit the "divergence" class of bugs this entrypoint + * was added to avoid. */ + struct stat st; + if (stat(sym_path, &st) == 0) { + /* Allow the sym file itself plus some working headroom (2x). + * A well-formed sym file is a list of interned strings; the + * in-memory footprint is bounded by file size within a small + * constant factor. */ + if (st.st_size > 0 && + (int64_t)st.st_size > rt->mem_budget / 2) { + if (out_sym_err) *out_sym_err = RAY_ERR_OOM; + /* Continue startup with empty sym table; caller decides + * whether to treat this as fatal. */ + } else { + ray_err_t sym_err = ray_sym_load(sym_path); + if (out_sym_err) *out_sym_err = sym_err; + /* RAY_ERR_CORRUPT and I/O errors are non-fatal here: + * caller inspects out_sym_err to decide recovery. */ + } + } else if (errno != ENOENT) { + if (out_sym_err) *out_sym_err = RAY_ERR_IO; + } + /* ENOENT: leave out_sym_err = RAY_OK — absent sym file is the + * normal first-run case. */ + } + + /* Init language (env + builtins) — must be after __VM is set and + * after sym_load so persisted user IDs keep their slots. */ + ray_lang_init(); + + return rt; +} + +ray_runtime_t* ray_runtime_create(int argc, char** argv) { + (void)argc; (void)argv; + return runtime_create_impl(NULL, NULL); +} + +ray_runtime_t* ray_runtime_create_with_sym(const char* sym_path) { + return runtime_create_impl(sym_path, NULL); +} + +ray_runtime_t* ray_runtime_create_with_sym_err(const char* sym_path, + ray_err_t* out_sym_err) { + return runtime_create_impl(sym_path, out_sym_err); +} + +/* ===== Main event loop accessors ===== + * The poll is opaque to runtime.h (stored as `void*`) so adding it + * doesn't drag poll.h into every TU that includes runtime.h. Set + * once by main.c after ray_poll_create; read by runtime-level + * builtins (.sys.listen, .sys.cmd "listen N"). */ + +void ray_runtime_set_poll(void* poll) { + if (__RUNTIME) __RUNTIME->poll = poll; +} + +void* ray_runtime_get_poll(void) { + return __RUNTIME ? __RUNTIME->poll : NULL; +} + +/* ===== Memory Budget API ===== */ + +int64_t ray_mem_budget(void) { + return __RUNTIME ? __RUNTIME->mem_budget : 0; +} + +bool ray_mem_pressure(void) { + if (!__RUNTIME) return false; + ray_mem_stats_t st; + ray_mem_stats(&st); + return (int64_t)(st.bytes_allocated + st.direct_bytes) > __RUNTIME->mem_budget; +} + +void ray_runtime_destroy(ray_runtime_t* rt) { + if (!rt) return; + + ray_lang_destroy(); + + /* Free VMs */ + for (int32_t i = 0; i < rt->n_vms; i++) { + ray_vm_t* vm = rt->vms[i]; + if (vm->raise_val) ray_release(vm->raise_val); + if (vm->trace) { ray_release(vm->trace); vm->trace = NULL; } + ray_sys_free(vm); + } + ray_sys_free(rt->vms); + + __VM = NULL; + __RUNTIME = NULL; + + ray_sym_destroy(); + ray_heap_destroy(); + + ray_sys_free(rt); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/runtime.h b/crates/rayforce-sys/vendor/rayforce/src/core/runtime.h new file mode 100644 index 0000000..5d8e509 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/runtime.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef RAY_RUNTIME_H +#define RAY_RUNTIME_H + +#include + +/* ===== Error Info (per-VM, ephemeral) ===== */ + +typedef struct { + char msg[256]; +} ray_err_info_t; + +/* ===== Scope Frame (moved from env.c) ===== */ + +#define RAY_SCOPE_CAP 64 +#define RAY_FRAME_CAP 64 + +typedef struct { + int64_t keys[RAY_FRAME_CAP]; + ray_t* vals[RAY_FRAME_CAP]; + int32_t count; +} ray_scope_frame_t; + +/* ===== VM sub-types ===== */ + +#define RAY_VM_STACK_SIZE 1024 +#define RAY_VM_TRAP_SIZE 16 + +typedef struct { + ray_t *fn; + int32_t fp; + int32_t ip; +} ray_vm_ctx_t; + +typedef struct { + int32_t rp; + int32_t sp; + int32_t handler_ip; + ray_t *fn; + int32_t fp; + int32_t n_locals; +} ray_vm_trap_t; + +/* ===== Per-thread VM ===== */ + +typedef struct { + /* hot path */ + int32_t sp; + int32_t fp; + int32_t rp; + int32_t id; + ray_t *fn; + void *heap; + int32_t tp; + /* stacks */ + ray_t *ps[RAY_VM_STACK_SIZE]; + ray_vm_ctx_t rs[RAY_VM_STACK_SIZE]; + ray_vm_trap_t ts[RAY_VM_TRAP_SIZE]; + /* cold — error/debug */ + ray_err_info_t err; + ray_t *nfo; + ray_t *trace; + ray_t *raise_val; + /* scope */ + ray_scope_frame_t scope_stack[RAY_SCOPE_CAP]; + int32_t scope_depth; +} ray_vm_t; + +/* ===== Runtime ===== */ + +typedef struct ray_runtime_s { + ray_vm_t **vms; + int32_t n_vms; + int64_t mem_budget; /* 80% of physical RAM, bytes */ + void *poll; /* opaque ray_poll_t* — see ray_runtime_(set|get)_poll */ +} ray_runtime_t; + +/* Global runtime + per-thread VM */ +extern ray_runtime_t *__RUNTIME; +extern _Thread_local ray_vm_t *__VM; + +/* Lifecycle */ +ray_runtime_t* ray_runtime_create(int argc, char** argv); +void ray_runtime_destroy(ray_runtime_t* rt); + +/* Main event-loop accessors. The host (main.c) registers the poll it + * created; runtime-level builtins read it back through these to avoid + * pulling poll.h into runtime.h (and to keep TUs that include + * runtime.h decoupled from the eval-VM definition that conflicts with + * the unrelated `ray_vm_t` declared above). */ +void ray_runtime_set_poll(void* poll); +void* ray_runtime_get_poll(void); + +/* Persistent-consumer lifecycle: load the sym table from `sym_path` (if + * present) before builtins register, so user-interned IDs keep the same + * slots across process restarts. The _err variant surfaces the load + * result via `out_sym_err` (RAY_OK / RAY_ERR_CORRUPT / I/O errors) so + * callers can decide recovery policy; the plain variant discards it. */ +ray_runtime_t* ray_runtime_create_with_sym(const char* sym_path); +ray_runtime_t* ray_runtime_create_with_sym_err(const char* sym_path, + ray_err_t* out_sym_err); + +/* Error API — allocates ray_t with type=RAY_ERROR, sets __VM->err.msg */ +ray_t* ray_error(const char* code, const char* fmt, ...); +/* Read error code from a RAY_ERROR object (returns pointer to sdata) */ +const char* ray_err_code(ray_t* err); +/* ray_error_free() is published in include/rayforce.h */ + +/* Read VM error detail message (NULL if empty) */ +const char* ray_error_msg(void); + +/* Clear VM error detail */ +void ray_error_clear(void); + +#endif /* RAY_RUNTIME_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/sock.c b/crates/rayforce-sys/vendor/rayforce/src/core/sock.c new file mode 100644 index 0000000..2983b9d --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/sock.c @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_OS_WINDOWS + #define _GNU_SOURCE +#endif + +#include "core/sock.h" +#include +#include +#include + +#ifdef RAY_OS_WINDOWS + #define WIN32_LEAN_AND_MEAN + #include + #include +#else + #include + #include + #include + #include + #include + #include + #include + #include +#endif + +/* ===== Socket Implementation ===== */ + +ray_sock_t ray_sock_listen(uint16_t port) +{ + ray_sock_t fd = (ray_sock_t)socket(AF_INET, SOCK_STREAM, 0); + if (fd == RAY_INVALID_SOCK) return RAY_INVALID_SOCK; + + int yes = 1; + setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (const char*)&yes, sizeof(yes)); + + struct sockaddr_in addr; + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = htons(port); + + if (bind(fd, (struct sockaddr*)&addr, sizeof(addr)) < 0) { + ray_sock_close(fd); + return RAY_INVALID_SOCK; + } + if (listen(fd, 128) < 0) { + ray_sock_close(fd); + return RAY_INVALID_SOCK; + } + return fd; +} + +ray_sock_t ray_sock_accept(ray_sock_t srv) +{ + ray_sock_t fd; + do { + fd = (ray_sock_t)accept(srv, NULL, NULL); + } while (fd == RAY_INVALID_SOCK && errno == EINTR); + + if (fd == RAY_INVALID_SOCK) return RAY_INVALID_SOCK; + + int yes = 1; + setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (const char*)&yes, sizeof(yes)); + return fd; +} + +ray_sock_t ray_sock_connect(const char* host, uint16_t port, int timeout_ms) +{ + struct addrinfo hints, *res = NULL; + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + + char port_str[8]; + snprintf(port_str, sizeof(port_str), "%u", (unsigned)port); + + if (getaddrinfo(host, port_str, &hints, &res) != 0 || !res) + return RAY_INVALID_SOCK; + + ray_sock_t fd = (ray_sock_t)socket(res->ai_family, res->ai_socktype, + res->ai_protocol); + if (fd == RAY_INVALID_SOCK) { + freeaddrinfo(res); + return RAY_INVALID_SOCK; + } + + /* Set send/recv timeout if requested */ + if (timeout_ms > 0) { +#ifdef RAY_OS_WINDOWS + DWORD tv = (DWORD)timeout_ms; + setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (const char*)&tv, sizeof(tv)); + setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, (const char*)&tv, sizeof(tv)); +#else + struct timeval tv; + tv.tv_sec = timeout_ms / 1000; + tv.tv_usec = (timeout_ms % 1000) * 1000; + setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); + setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)); +#endif + } + + if (connect(fd, res->ai_addr, (socklen_t)res->ai_addrlen) < 0) { + ray_sock_close(fd); + freeaddrinfo(res); + return RAY_INVALID_SOCK; + } + freeaddrinfo(res); + + int yes = 1; + setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (const char*)&yes, sizeof(yes)); + return fd; +} + +int64_t ray_sock_send(ray_sock_t s, const void* buf, size_t len) +{ + const uint8_t* p = (const uint8_t*)buf; + size_t rem = len; + while (rem > 0) { +#ifdef RAY_OS_WINDOWS + int n = send(s, (const char*)p, (int)rem, 0); +#else + ssize_t n = send(s, p, rem, MSG_NOSIGNAL); +#endif + if (n < 0) { + if (errno == EINTR) continue; + if (errno == EAGAIN || errno == EWOULDBLOCK) { + /* Wait for write-readiness before retry */ + struct pollfd pfd = { .fd = s, .events = POLLOUT }; + poll(&pfd, 1, -1); + continue; + } + return -1; + } + p += n; + rem -= (size_t)n; + } + return (int64_t)len; +} + +int64_t ray_sock_recv(ray_sock_t s, void* buf, size_t len) +{ + for (;;) { +#ifdef RAY_OS_WINDOWS + int n = recv(s, (char*)buf, (int)len, 0); +#else + ssize_t n = recv(s, buf, len, 0); +#endif + if (n < 0) { + if (errno == EINTR) continue; + return -1; + } + return (int64_t)n; /* 0 = peer closed */ + } +} + +void ray_sock_close(ray_sock_t s) +{ + if (s == RAY_INVALID_SOCK) return; +#ifdef RAY_OS_WINDOWS + closesocket(s); +#else + close(s); +#endif +} + +ray_err_t ray_sock_set_nonblocking(ray_sock_t s) +{ +#ifdef RAY_OS_WINDOWS + u_long mode = 1; + if (ioctlsocket(s, FIONBIO, &mode) != 0) + return RAY_ERR_IO; +#else + int flags = fcntl(s, F_GETFL, 0); + if (flags < 0) return RAY_ERR_IO; + if (fcntl(s, F_SETFL, flags | O_NONBLOCK) < 0) + return RAY_ERR_IO; +#endif + return RAY_OK; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/sock.h b/crates/rayforce-sys/vendor/rayforce/src/core/sock.h new file mode 100644 index 0000000..0e6575c --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/sock.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_SOCK_H +#define RAY_SOCK_H + +#include + +/* ===== Socket Abstraction ===== */ + +#ifdef RAY_OS_WINDOWS + typedef intptr_t ray_sock_t; + #define RAY_INVALID_SOCK ((ray_sock_t)-1) +#else + typedef int ray_sock_t; + #define RAY_INVALID_SOCK (-1) +#endif + +ray_sock_t ray_sock_listen(uint16_t port); +ray_sock_t ray_sock_accept(ray_sock_t srv); +ray_sock_t ray_sock_connect(const char* host, uint16_t port, int timeout_ms); +int64_t ray_sock_send(ray_sock_t s, const void* buf, size_t len); +int64_t ray_sock_recv(ray_sock_t s, void* buf, size_t len); +void ray_sock_close(ray_sock_t s); +ray_err_t ray_sock_set_nonblocking(ray_sock_t s); + +#endif /* RAY_SOCK_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/types.c b/crates/rayforce-sys/vendor/rayforce/src/core/types.c new file mode 100644 index 0000000..e811bac --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/types.c @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "core/types.h" + +/* Element sizes indexed by type tag. Only types 0-14 (vectors) have + * non-zero entries; remaining indices are zero (safe for non-vector types). */ +const uint8_t ray_type_sizes[256] = { + /* [RAY_LIST] = 0 */ 8, /* pointer-sized (ray_t*) */ + /* [RAY_BOOL] = 1 */ 1, + /* [RAY_U8] = 2 */ 1, + /* [RAY_I16] = 3 */ 2, + /* [RAY_I32] = 4 */ 4, + /* [RAY_I64] = 5 */ 8, + /* [RAY_F32] = 6 */ 4, + /* [RAY_F64] = 7 */ 8, + /* [RAY_DATE] = 8 */ 4, + /* [RAY_TIME] = 9 */ 4, + /* [RAY_TIMESTAMP] = 10 */ 8, + /* [RAY_GUID] = 11 */ 16, + /* [RAY_SYM] = 12 */ 8, /* W64 default; narrow widths use ray_sym_elem_size */ + /* [RAY_STR] = 13 */ 16, /* sizeof(ray_str_t) */ + /* [RAY_SEL] = 14 */ 0, /* variable-size layout, no elem_size */ +}; + +/* ===== Semantic Version API ===== */ + +/* Stringify helpers to build version string from header macros */ +#define RAY_VER_STR_(x) #x +#define RAY_VER_STR(x) RAY_VER_STR_(x) +#define RAY_VERSION_STRING_ \ + RAY_VER_STR(RAY_VERSION_MAJOR) "." RAY_VER_STR(RAY_VERSION_MINOR) "." RAY_VER_STR(RAY_VERSION_PATCH) + +int ray_version_major(void) { return RAY_VERSION_MAJOR; } +int ray_version_minor(void) { return RAY_VERSION_MINOR; } +int ray_version_patch(void) { return RAY_VERSION_PATCH; } +const char* ray_version_string(void) { return RAY_VERSION_STRING_; } diff --git a/crates/rayforce-sys/vendor/rayforce/src/core/types.h b/crates/rayforce-sys/vendor/rayforce/src/core/types.h new file mode 100644 index 0000000..18b5231 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/core/types.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_TYPES_H +#define RAY_TYPES_H + +/* + * types.h — Internal types header. + * + * The canonical type definitions (ray_t, type constants, attribute flags) + * live in (the public header). + * Internal .c files can include either rayforce.h directly or types.h. + */ +#include + +/* Number of types (positive range): must be > max type ID */ +#define RAY_TYPE_COUNT 15 + +/* Type sizes lookup table (defined in types.c) */ +extern const uint8_t ray_type_sizes[256]; + +/* Element size for a given type tag */ +#define ray_elem_size(t) (ray_type_sizes[(t)]) + +#endif /* RAY_TYPES_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/io/csv.c b/crates/rayforce-sys/vendor/rayforce/src/io/csv.c new file mode 100644 index 0000000..499db1c --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/io/csv.c @@ -0,0 +1,1821 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* ============================================================================ + * csv.c — Fast parallel CSV reader + * + * Design: + * 1. mmap + MAP_POPULATE for zero-copy file access + * 2. memchr-based newline scan for row offset discovery + * 3. Single-pass: sample-based type inference, then parallel value parsing + * 4. Inline integer/float parsers (bypass strtoll/strtod overhead) + * 5. Parallel row parsing via ray_pool_dispatch + * 6. Per-worker local sym tables, merged post-parse on main thread + * ============================================================================ */ + +#if defined(__linux__) + #define _GNU_SOURCE +#endif + +#include "csv.h" +#include "mem/heap.h" +#include "mem/sys.h" +#include "core/numparse.h" +#include "core/pool.h" +#include "lang/format.h" +#include "ops/hash.h" +#include "store/fileio.h" +#include "table/sym.h" +#include "vec/str.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#ifndef RAY_OS_WINDOWS +#include +#endif +#include + +/* -------------------------------------------------------------------------- + * Constants + * -------------------------------------------------------------------------- */ + +#define CSV_MAX_COLS 256 +#define CSV_SAMPLE_ROWS 100 + +/* -------------------------------------------------------------------------- + * mmap flags + * -------------------------------------------------------------------------- */ + +#ifdef __linux__ + #define MMAP_FLAGS (MAP_PRIVATE | MAP_POPULATE) +#else + #define MMAP_FLAGS MAP_PRIVATE +#endif + +/* -------------------------------------------------------------------------- + * Scratch memory helpers (same pattern as exec.c). + * Uses ray_alloc/ray_free (buddy allocator) instead of malloc/free. + * -------------------------------------------------------------------------- */ + +static inline void* scratch_alloc(ray_t** hdr_out, size_t nbytes) { + ray_t* h = ray_alloc(nbytes); + if (!h) { *hdr_out = NULL; return NULL; } + *hdr_out = h; + return ray_data(h); +} + +static inline void* scratch_realloc(ray_t** hdr_out, size_t old_bytes, size_t new_bytes) { + ray_t* old_h = *hdr_out; + ray_t* new_h = ray_alloc(new_bytes); + if (!new_h) return NULL; + void* new_p = ray_data(new_h); + if (old_h) { + memcpy(new_p, ray_data(old_h), old_bytes < new_bytes ? old_bytes : new_bytes); + ray_free(old_h); + } + *hdr_out = new_h; + return new_p; +} + +static inline void scratch_free(ray_t* hdr) { + if (hdr) ray_free(hdr); +} + +/* Hash uses wyhash from ops/hash.h (ray_hash_bytes) — much faster than FNV-1a + * for short strings typical in CSV columns. */ + +/* String reference — raw pointer into mmap'd buffer + length. + * Used during parse phase; interned into sym table after parse. */ +typedef struct { + const char* ptr; + uint32_t len; +} csv_strref_t; + +/* -------------------------------------------------------------------------- + * Type inference + * -------------------------------------------------------------------------- */ + +typedef enum { + CSV_TYPE_UNKNOWN = 0, + CSV_TYPE_BOOL, + CSV_TYPE_I64, + CSV_TYPE_F64, + CSV_TYPE_STR, + CSV_TYPE_DATE, + CSV_TYPE_TIME, + CSV_TYPE_TIMESTAMP, + CSV_TYPE_GUID +} csv_type_t; + +static csv_type_t detect_type(const char* f, size_t len) { + if (len == 0) return CSV_TYPE_UNKNOWN; + + /* Common null sentinel strings → UNKNOWN (will become NULL) */ + if ((len == 3 && (memcmp(f, "N/A", 3) == 0 || memcmp(f, "n/a", 3) == 0)) || + (len == 2 && (memcmp(f, "NA", 2) == 0 || memcmp(f, "na", 2) == 0)) || + (len == 4 && (memcmp(f, "null", 4) == 0 || memcmp(f, "NULL", 4) == 0 || + memcmp(f, "None", 4) == 0 || memcmp(f, "none", 4) == 0)) || + (len == 1 && f[0] == '.')) /* bare dot — not a valid value */ + return CSV_TYPE_UNKNOWN; + + /* NaN/Inf literals → float */ + if (len == 3) { + if ((f[0]=='n'||f[0]=='N') && (f[1]=='a'||f[1]=='A') && (f[2]=='n'||f[2]=='N')) + return CSV_TYPE_F64; + if ((f[0]=='i'||f[0]=='I') && (f[1]=='n'||f[1]=='N') && (f[2]=='f'||f[2]=='F')) + return CSV_TYPE_F64; + } + if ((len == 4 && (f[0]=='+' || f[0]=='-')) && + (f[1]=='i'||f[1]=='I') && (f[2]=='n'||f[2]=='N') && (f[3]=='f'||f[3]=='F')) + return CSV_TYPE_F64; + + /* Boolean */ + if ((len == 4 && memcmp(f, "true", 4) == 0) || + (len == 5 && memcmp(f, "false", 5) == 0) || + (len == 4 && memcmp(f, "TRUE", 4) == 0) || + (len == 5 && memcmp(f, "FALSE", 5) == 0)) + return CSV_TYPE_BOOL; + + /* Numeric scan */ + const char* p = f; + const char* end = f + len; + if (*p == '-' || *p == '+') p++; + bool has_dot = false, has_e = false, has_digit = false; + while (p < end) { + unsigned char c = (unsigned char)*p; + if (c >= '0' && c <= '9') { has_digit = true; p++; continue; } + if (c == '.' && !has_dot) { has_dot = true; p++; continue; } + if ((c == 'e' || c == 'E') && !has_e) { + has_e = true; p++; + if (p < end && (*p == '-' || *p == '+')) p++; + continue; + } + break; + } + if (p == end && has_digit) { + if (!has_dot && !has_e) return CSV_TYPE_I64; + return CSV_TYPE_F64; + } + + /* Date: YYYY-MM-DD (exactly 10 chars) or Timestamp: YYYY-MM-DD{T| }HH:MM:SS */ + if (len >= 10 && f[4] == '-' && f[7] == '-') { + bool is_date = true; + for (int i = 0; i < 10; i++) { + if (i == 4 || i == 7) continue; + if ((unsigned)(f[i] - '0') > 9) { is_date = false; break; } + } + if (is_date) { + if (len == 10) return CSV_TYPE_DATE; + if (len >= 19 && (f[10] == 'T' || f[10] == ' ') && + f[13] == ':' && f[16] == ':') { + const int tp[] = {11,12,14,15,17,18}; + bool is_ts = true; + for (int i = 0; i < 6; i++) { + if ((unsigned)(f[tp[i]] - '0') > 9) { is_ts = false; break; } + } + if (is_ts) return CSV_TYPE_TIMESTAMP; + } + } + } + + /* Time: HH:MM:SS[.ffffff] (at least 8 chars) */ + if (len >= 8 && f[2] == ':' && f[5] == ':') { + const int tp[] = {0,1,3,4,6,7}; + bool is_time = true; + for (int i = 0; i < 6; i++) { + if ((unsigned)(f[tp[i]] - '0') > 9) { is_time = false; break; } + } + if (is_time) return CSV_TYPE_TIME; + } + + return CSV_TYPE_STR; +} + +static csv_type_t promote_csv_type(csv_type_t cur, csv_type_t obs) { + if (cur == CSV_TYPE_UNKNOWN) return obs; + if (obs == CSV_TYPE_UNKNOWN) return cur; + if (cur == obs) return cur; + if (cur == CSV_TYPE_STR || obs == CSV_TYPE_STR) return CSV_TYPE_STR; + /* DATE + TIMESTAMP → TIMESTAMP */ + if ((cur == CSV_TYPE_DATE && obs == CSV_TYPE_TIMESTAMP) || + (cur == CSV_TYPE_TIMESTAMP && obs == CSV_TYPE_DATE)) + return CSV_TYPE_TIMESTAMP; + /* Numeric promotion: BOOL ⊂ I64 ⊂ F64 (enum values 1 < 2 < 3) */ + if (cur <= CSV_TYPE_F64 && obs <= CSV_TYPE_F64) { + if (cur == CSV_TYPE_F64 || obs == CSV_TYPE_F64) return CSV_TYPE_F64; + if (cur == CSV_TYPE_I64 || obs == CSV_TYPE_I64) return CSV_TYPE_I64; + return cur; + } + /* All other mixed types (e.g. DATE+I64, TIME+BOOL) → STR */ + return CSV_TYPE_STR; +} + +/* -------------------------------------------------------------------------- + * Zero-copy field scanner + * + * Returns pointer past the field's trailing delimiter (or at newline/end). + * Sets *out and *out_len to the field content. For unquoted fields, *out + * points directly into the mmap buffer. For quoted fields with escaped + * quotes, content is unescaped into esc_buf. + * -------------------------------------------------------------------------- */ + +static const char* scan_field_quoted(const char* p, const char* buf_end, + char delim, + const char** out, size_t* out_len, + char* esc_buf, char** dyn_esc) { + p++; /* skip opening quote */ + const char* fld_start = p; + bool has_escape = false; + + while (p < buf_end) { + if (*p == '"') { + if (p + 1 < buf_end && *(p + 1) == '"') { + has_escape = true; + p += 2; + } else { + break; /* closing quote */ + } + } else { + p++; + } + } + size_t raw_len = (size_t)(p - fld_start); + if (p < buf_end && *p == '"') p++; /* skip closing quote */ + + if (has_escape) { + /* raw_len >= output length (quotes are collapsed); no overflow. */ + char* dest = esc_buf; + if (RAY_UNLIKELY(raw_len > 8192)) { + /* Field too large for stack buffer — dynamically allocate */ + dest = (char*)ray_sys_alloc(raw_len); + if (!dest) { + /* OOM: fall back to raw (quotes remain) */ + *out = fld_start; + *out_len = raw_len; + goto advance; + } + *dyn_esc = dest; + } + size_t olen = 0; + for (const char* s = fld_start; s < fld_start + raw_len; s++) { + if (*s == '"' && s + 1 < fld_start + raw_len && *(s + 1) == '"') { + dest[olen++] = '"'; + s++; + } else { + dest[olen++] = *s; + } + } + *out = dest; + *out_len = olen; + } else { + *out = fld_start; + *out_len = raw_len; + } + +advance: + /* Advance past delimiter */ + if (p < buf_end && *p == delim) p++; + /* Don't advance past newline — caller handles row boundaries */ + return p; +} + +RAY_INLINE const char* scan_field(const char* p, const char* buf_end, + char delim, + const char** out, size_t* out_len, + char* esc_buf, char** dyn_esc) { + if (RAY_UNLIKELY(p >= buf_end)) { + *out = p; + *out_len = 0; + return p; + } + + if (RAY_LIKELY(*p != '"')) { + /* Unquoted field — fast path */ + const char* s = p; + while (p < buf_end && *p != delim && *p != '\n' && *p != '\r') p++; + *out = s; + *out_len = (size_t)(p - s); + if (p < buf_end && *p == delim) return p + 1; + return p; + } + + return scan_field_quoted(p, buf_end, delim, out, out_len, esc_buf, dyn_esc); +} + +/* -------------------------------------------------------------------------- + * Numeric field parsers — thin wrappers over core/numparse with the + * CSV semantics that the *entire* field must be consumed; otherwise + * the cell is null. + * -------------------------------------------------------------------------- */ + +RAY_INLINE int64_t fast_i64(const char* p, size_t len, bool* is_null) { + int64_t v = 0; + size_t n = ray_parse_i64(p, len, &v); + *is_null = (n == 0 || n != len); + return *is_null ? 0 : v; +} + +RAY_INLINE double fast_f64(const char* p, size_t len, bool* is_null) { + double v = 0.0; + size_t n = ray_parse_f64(p, len, &v); + *is_null = (n == 0 || n != len); + return *is_null ? 0.0 : v; +} + +/* -------------------------------------------------------------------------- + * Fast inline date/time parsers + * + * DATE: YYYY-MM-DD → int32_t (days since 2000-01-01) + * TIME: HH:MM:SS[.fff] → int32_t (milliseconds since midnight) + * TIMESTAMP: YYYY-MM-DD{T| }HH:MM:SS[.ffffff] → int64_t (µs since 2000-01-01) + * + * Uses Howard Hinnant's civil-calendar algorithm (public domain) for the + * date→days conversion — O(1), no tables, no branches. + * -------------------------------------------------------------------------- */ + +RAY_INLINE int32_t civil_to_days(int y, int m, int d) { + /* Shift Jan/Feb to months 10/11 of the previous year */ + if (m <= 2) { y--; m += 9; } else { m -= 3; } + int era = (y >= 0 ? y : y - 399) / 400; + int yoe = y - era * 400; + int doy = (153 * m + 2) / 5 + d - 1; + int doe = yoe * 365 + yoe / 4 - yoe / 100 + doy; + return (int32_t)(era * 146097 + doe - 719468 - 10957); +} + +RAY_INLINE int32_t fast_date(const char* p, size_t len, bool* is_null) { + if (RAY_UNLIKELY(len < 10)) { *is_null = true; return 0; } + *is_null = false; + int y = (p[0]-'0')*1000 + (p[1]-'0')*100 + (p[2]-'0')*10 + (p[3]-'0'); + int m = (p[5]-'0')*10 + (p[6]-'0'); + int d = (p[8]-'0')*10 + (p[9]-'0'); + if (RAY_UNLIKELY(m < 1 || m > 12 || d < 1 || d > 31)) { *is_null = true; return 0; } + return civil_to_days(y, m, d); +} + +/* TIME → int32_t milliseconds since midnight */ +RAY_INLINE int32_t fast_time(const char* p, size_t len, bool* is_null) { + if (RAY_UNLIKELY(len < 8)) { *is_null = true; return 0; } + *is_null = false; + int h = (p[0]-'0')*10 + (p[1]-'0'); + int mi = (p[3]-'0')*10 + (p[4]-'0'); + int s = (p[6]-'0')*10 + (p[7]-'0'); + if (RAY_UNLIKELY(h > 23 || mi > 59 || s > 59)) { *is_null = true; return 0; } + int32_t ms = h * 3600000 + mi * 60000 + s * 1000; + /* Fractional seconds → milliseconds */ + if (len > 8 && p[8] == '.') { + int frac = 0, digits = 0; + for (size_t i = 9; i < len && digits < 3; i++, digits++) { + unsigned di = (unsigned char)p[i] - '0'; + if (di > 9) break; + frac = frac * 10 + (int)di; + } + while (digits < 3) { frac *= 10; digits++; } + ms += (int32_t)frac; + } + return ms; +} + +/* Timestamp time component → int64_t nanoseconds. + * RAY_TIMESTAMP is nanoseconds since 2000-01-01 (matching + * src/lang/format.c:ts_to_parts and csv_write_timestamp). Accept up + * to 9 fractional digits; shorter fractions are right-padded with + * zeros, longer ones are truncated. */ +RAY_INLINE int64_t fast_time_ns(const char* p, size_t len, bool* is_null) { + if (RAY_UNLIKELY(len < 8)) { *is_null = true; return 0; } + *is_null = false; + int h = (p[0]-'0')*10 + (p[1]-'0'); + int mi = (p[3]-'0')*10 + (p[4]-'0'); + int s = (p[6]-'0')*10 + (p[7]-'0'); + if (RAY_UNLIKELY(h > 23 || mi > 59 || s > 59)) { *is_null = true; return 0; } + int64_t ns = (int64_t)h * 3600000000000LL + (int64_t)mi * 60000000000LL + + (int64_t)s * 1000000000LL; + if (len > 8 && p[8] == '.') { + int64_t frac = 0; + int digits = 0; + for (size_t i = 9; i < len && digits < 9; i++, digits++) { + unsigned di = (unsigned char)p[i] - '0'; + if (di > 9) break; + frac = frac * 10 + (int64_t)di; + } + while (digits < 9) { frac *= 10; digits++; } + ns += frac; + } + return ns; +} + +RAY_INLINE int64_t fast_timestamp(const char* p, size_t len, bool* is_null) { + if (RAY_UNLIKELY(len < 19)) { *is_null = true; return 0; } + *is_null = false; + int32_t days = fast_date(p, 10, is_null); + if (*is_null) return 0; + bool time_null = false; + int64_t time_ns = fast_time_ns(p + 11, len - 11, &time_null); + if (time_null) { *is_null = true; return 0; } + const int64_t NS_PER_DAY = 86400000000000LL; + return (int64_t)days * NS_PER_DAY + time_ns; +} + +/* -------------------------------------------------------------------------- + * Null-aware boolean parser + * -------------------------------------------------------------------------- */ + +RAY_INLINE uint8_t fast_bool(const char* s, size_t len, bool* is_null) { + if (len == 0) { *is_null = true; return 0; } + *is_null = false; + if ((len == 4 && (memcmp(s, "true", 4) == 0 || memcmp(s, "TRUE", 4) == 0)) || + (len == 1 && s[0] == '1')) + return 1; + if ((len == 5 && (memcmp(s, "false", 5) == 0 || memcmp(s, "FALSE", 5) == 0)) || + (len == 1 && s[0] == '0')) + return 0; + *is_null = true; + return 0; +} + +/* -------------------------------------------------------------------------- + * GUID parser (mirrors csv_write_guid: 8-4-4-4-12 hex, 36 chars). + * Writes 16 bytes to `dst`. Sets *is_null on shape or hex mismatch. + * -------------------------------------------------------------------------- */ + +RAY_INLINE int hex_nibble(unsigned char c) { + if (c >= '0' && c <= '9') return c - '0'; + if (c >= 'a' && c <= 'f') return c - 'a' + 10; + if (c >= 'A' && c <= 'F') return c - 'A' + 10; + return -1; +} + +RAY_INLINE void fast_guid(const char* p, size_t len, uint8_t* dst, bool* is_null) { + if (RAY_UNLIKELY(len != 36 || + p[8] != '-' || p[13] != '-' || + p[18] != '-' || p[23] != '-')) { + *is_null = true; + return; + } + /* Layout: bytes 0..3 from chars 0..7, then 4..5 from 9..12, + * 6..7 from 14..17, 8..9 from 19..22, 10..15 from 24..35. */ + static const uint8_t pos[16] = { 0,2,4,6, 9,11, 14,16, 19,21, 24,26,28,30,32,34 }; + for (int i = 0; i < 16; i++) { + int hi = hex_nibble((unsigned char)p[pos[i]]); + int lo = hex_nibble((unsigned char)p[pos[i] + 1]); + if (RAY_UNLIKELY((hi | lo) < 0)) { *is_null = true; return; } + dst[i] = (uint8_t)((hi << 4) | lo); + } + *is_null = false; +} + +/* -------------------------------------------------------------------------- + * Row offsets builder — memchr-accelerated + * + * Uses memchr (glibc: SIMD-accelerated ~15-20 GB/s) for newline scanning. + * Fast path for quote-free files; falls back to byte-by-byte for quoted + * fields with embedded newlines. Returns exact row count. + * + * Allocates offsets via scratch_alloc. Caller frees with scratch_free. + * -------------------------------------------------------------------------- */ + +static int64_t build_row_offsets(const char* buf, size_t buf_size, + size_t data_offset, + int64_t** offsets_out, ray_t** hdr_out) { + const char* p = buf + data_offset; + const char* end = buf + buf_size; + + /* Do NOT skip leading blank lines: empty lines in the data section + * are null rows (they were written out by write-csv for null-valued + * single-column tables). Header-level whitespace is consumed by the + * header parser before we reach data_offset. */ + if (p >= end) { *offsets_out = NULL; *hdr_out = NULL; return 0; } + + /* Estimate capacity: ~40 bytes per row + headroom. + * 40 bytes/row is conservative for typical CSVs; realloc path handles + * underestimates. */ + size_t remaining = (size_t)(end - p); + int64_t est = (int64_t)(remaining / 40) + 16; + ray_t* hdr = NULL; + int64_t* offs = (int64_t*)scratch_alloc(&hdr, (size_t)est * sizeof(int64_t)); + if (!offs) { *offsets_out = NULL; *hdr_out = NULL; return 0; } + + int64_t n = 0; + offs[n++] = (int64_t)(p - buf); + + /* Check if file has any quotes — determines fast vs slow path */ + bool has_quotes = (memchr(p, '"', remaining) != NULL); + + if (RAY_LIKELY(!has_quotes)) { + /* Fast path: no quotes, use memchr for newlines. + * Only scans for \n; pure \r line endings (old Mac) treated as single row. + * Empty lines are preserved as rows (for NULL handling). */ + for (;;) { + const char* nl = (const char*)memchr(p, '\n', (size_t)(end - p)); + if (!nl) break; + p = nl + 1; + /* Skip optional \r after \n (unusual \n\r endings) */ + if (p < end && *p == '\r') p++; + if (p >= end) break; + + if (n >= est) { + est *= 2; + offs = (int64_t*)scratch_realloc(&hdr, + (size_t)n * sizeof(int64_t), + (size_t)est * sizeof(int64_t)); + if (!offs) { scratch_free(hdr); *offsets_out = NULL; *hdr_out = NULL; return 0; } + } + offs[n++] = (int64_t)(p - buf); + } + } else { + /* Slow path: track quote parity, byte-by-byte. + * Empty lines preserved as rows (for NULL handling). */ + bool in_quote = false; + while (p < end) { + char c = *p; + if (c == '"') { + in_quote = !in_quote; + p++; + } else if (!in_quote && (c == '\n' || c == '\r')) { + if (c == '\r' && p + 1 < end && *(p + 1) == '\n') p++; + p++; + if (p < end) { + if (n >= est) { + est *= 2; + offs = (int64_t*)scratch_realloc(&hdr, + (size_t)n * sizeof(int64_t), + (size_t)est * sizeof(int64_t)); + if (!offs) { scratch_free(hdr); *offsets_out = NULL; *hdr_out = NULL; return 0; } + } + offs[n++] = (int64_t)(p - buf); + } + } else { + p++; + } + } + } + + *offsets_out = offs; + *hdr_out = hdr; + return n; +} + +/* -------------------------------------------------------------------------- + * Batch-intern string columns after parse. + * Single-threaded — walks each string column, interns into global sym table, + * writes sym IDs into the final uint32_t column. + * -------------------------------------------------------------------------- */ + +static bool csv_intern_strings(csv_strref_t** str_refs, int n_cols, + const csv_type_t* col_types, + const int8_t* resolved_types, + void** col_data, int64_t n_rows, + int64_t* col_max_ids, + uint8_t** col_nullmaps) { + bool ok = true; + for (int c = 0; c < n_cols; c++) { + if (col_types[c] != CSV_TYPE_STR) continue; + /* RAY_STR columns are materialized directly; skip sym interning. */ + if (resolved_types[c] == RAY_STR) continue; + csv_strref_t* refs = str_refs[c]; + uint32_t* ids = (uint32_t*)col_data[c]; + uint8_t* nm = col_nullmaps ? col_nullmaps[c] : NULL; + int64_t max_id = 0; + + /* Pre-grow: upper bound is n_rows unique strings */ + uint32_t current = ray_sym_count(); + if (!ray_sym_ensure_cap(current + (uint32_t)(n_rows < UINT32_MAX ? n_rows : UINT32_MAX))) + return false; /* OOM: cannot grow sym table */ + + for (int64_t r = 0; r < n_rows; r++) { + if (nm && (nm[r >> 3] & (1u << (r & 7)))) { + ids[r] = 0; + continue; + } + uint32_t hash = (uint32_t)ray_hash_bytes(refs[r].ptr, refs[r].len); + int64_t id = ray_sym_intern_prehashed(hash, refs[r].ptr, refs[r].len); + if (id < 0) { ok = false; id = 0; } + ids[r] = (uint32_t)id; + if (id > max_id) max_id = id; + } + if (col_max_ids) col_max_ids[c] = max_id; + } + return ok; +} + +/* Free strref pointers that were heap-allocated for escaped CSV fields. + * Any strref whose ptr falls outside the mmap buffer [buf, buf+buf_size) + * was allocated by the parse loop and must be freed here. */ +static void csv_free_escaped_strrefs(csv_strref_t** str_refs, int n_cols, + const csv_type_t* col_types, + int64_t n_rows, + const char* buf, size_t buf_size) { + const char* buf_end = buf + buf_size; + for (int c = 0; c < n_cols; c++) { + if (col_types[c] != CSV_TYPE_STR || !str_refs[c]) continue; + for (int64_t r = 0; r < n_rows; r++) { + const char* p = str_refs[c][r].ptr; + if (p && (p < buf || p >= buf_end)) + ray_sys_free((void*)p); + } + } +} + +/* Materialize RAY_STR columns from parsed strrefs. Two-pass so the per-column + * string pool is sized exactly once — avoids the repeated realloc/COW path + * that ray_str_vec_set would take for a freshly-owned vector. */ +static bool csv_fill_str_cols(csv_strref_t** str_refs, int n_cols, + const int8_t* resolved_types, + ray_t** col_vecs, int64_t n_rows, + uint8_t** col_nullmaps) { + for (int c = 0; c < n_cols; c++) { + if (resolved_types[c] != RAY_STR) continue; + csv_strref_t* refs = str_refs[c]; + uint8_t* nm = col_nullmaps ? col_nullmaps[c] : NULL; + ray_t* vec = col_vecs[c]; + ray_str_t* dst = (ray_str_t*)ray_data(vec); + + /* ray_str_t.pool_off is u32 — the per-column pool is capped at 4 GiB. + * Sum as u64 so the add itself can't wrap, then bail if the total + * wouldn't fit in the u32 offset field. */ + uint64_t pool_bytes = 0; + for (int64_t r = 0; r < n_rows; r++) { + if (nm && (nm[r >> 3] & (1u << (r & 7)))) continue; + uint32_t l = refs[r].len; + if (l > RAY_STR_INLINE_MAX) pool_bytes += l; + } + if (pool_bytes > UINT32_MAX) return false; + + if (pool_bytes > 0) { + ray_t* pool = ray_alloc((size_t)pool_bytes); + if (!pool || RAY_IS_ERR(pool)) return false; + pool->type = RAY_U8; + pool->len = 0; + vec->str_pool = pool; + } + + char* pool_base = vec->str_pool ? (char*)ray_data(vec->str_pool) : NULL; + uint32_t pool_off = 0; + + for (int64_t r = 0; r < n_rows; r++) { + memset(&dst[r], 0, sizeof(ray_str_t)); + if (nm && (nm[r >> 3] & (1u << (r & 7)))) continue; + const char* p = refs[r].ptr; + uint32_t l = refs[r].len; + dst[r].len = l; + if (l <= RAY_STR_INLINE_MAX) { + if (l > 0) memcpy(dst[r].data, p, l); + } else { + memcpy(dst[r].prefix, p, 4); + dst[r].pool_off = pool_off; + memcpy(pool_base + pool_off, p, l); + pool_off += l; /* cannot wrap: pool_bytes <= UINT32_MAX */ + } + } + if (vec->str_pool) vec->str_pool->len = (int64_t)pool_off; + } + return true; +} + +/* -------------------------------------------------------------------------- + * Stage 9b helper: dispatch csv_fill_str_cols and csv_intern_strings on + * separate threads when a pool is available. They write to disjoint + * column data, and intern_strings is the only one that touches the + * global sym table (so it stays single-threaded; we just run it in + * parallel with fill_str_cols). + * -------------------------------------------------------------------------- */ + +typedef struct { + csv_strref_t** str_refs; + int n_cols; + const csv_type_t* parse_types; + const int8_t* resolved_types; + void** col_data; + ray_t** col_vecs; + int64_t n_rows; + int64_t* sym_max_ids; + uint8_t** col_nullmaps; + bool fill_ok; + bool intern_ok; +} csv_finalize_ctx_t; + +static void csv_finalize_task(void* arg, uint32_t worker_id, + int64_t start, int64_t end_idx) { + (void)worker_id; (void)end_idx; + csv_finalize_ctx_t* ctx = (csv_finalize_ctx_t*)arg; + if (start == 0) { + ctx->fill_ok = csv_fill_str_cols(ctx->str_refs, ctx->n_cols, + ctx->resolved_types, ctx->col_vecs, ctx->n_rows, ctx->col_nullmaps); + } else { + ctx->intern_ok = csv_intern_strings(ctx->str_refs, ctx->n_cols, + ctx->parse_types, ctx->resolved_types, ctx->col_data, + ctx->n_rows, ctx->sym_max_ids, ctx->col_nullmaps); + } +} + +/* -------------------------------------------------------------------------- + * Parallel parse context and callback + * -------------------------------------------------------------------------- */ + +typedef struct { + const char* buf; + size_t buf_size; + const int64_t* row_offsets; + int64_t n_rows; + int n_cols; + char delim; + const csv_type_t* col_types; + void** col_data; /* non-const: workers write parsed values into columns */ + csv_strref_t** str_refs; /* [n_cols] — strref arrays for string columns, NULL for others */ + uint8_t** col_nullmaps; + bool* worker_had_null; /* [n_workers * n_cols] */ +} csv_par_ctx_t; + +static void csv_parse_fn(void* arg, uint32_t worker_id, + int64_t start, int64_t end_row) { + csv_par_ctx_t* ctx = (csv_par_ctx_t*)arg; + char esc_buf[8192]; + const char* buf_end = ctx->buf + ctx->buf_size; + bool* my_had_null = &ctx->worker_had_null[(size_t)worker_id * (size_t)ctx->n_cols]; + + for (int64_t row = start; row < end_row; row++) { + const char* p = ctx->buf + ctx->row_offsets[row]; + const char* row_end = (row + 1 < ctx->n_rows) + ? ctx->buf + ctx->row_offsets[row + 1] + : buf_end; + + for (int c = 0; c < ctx->n_cols; c++) { + /* Guard: if past row boundary, fill remaining columns with defaults + null */ + if (p >= row_end) { + for (; c < ctx->n_cols; c++) { + switch (ctx->col_types[c]) { + case CSV_TYPE_BOOL: ((uint8_t*)ctx->col_data[c])[row] = 0; break; + case CSV_TYPE_I64: ((int64_t*)ctx->col_data[c])[row] = 0; break; + case CSV_TYPE_F64: ((double*)ctx->col_data[c])[row] = 0.0; break; + case CSV_TYPE_DATE: ((int32_t*)ctx->col_data[c])[row] = 0; break; + case CSV_TYPE_TIME: ((int32_t*)ctx->col_data[c])[row] = 0; break; + case CSV_TYPE_TIMESTAMP: + ((int64_t*)ctx->col_data[c])[row] = 0; break; + case CSV_TYPE_GUID: + memset((uint8_t*)ctx->col_data[c] + (size_t)row * 16, 0, 16); + break; + case CSV_TYPE_STR: + ctx->str_refs[c][row].ptr = NULL; + ctx->str_refs[c][row].len = 0; + break; + default: break; + } + ctx->col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); + my_had_null[c] = true; + } + break; + } + + const char* fld; + size_t flen; + char* dyn_esc = NULL; + p = scan_field(p, buf_end, ctx->delim, &fld, &flen, esc_buf, &dyn_esc); + + /* Strip trailing \r from last field of row */ + if (c == ctx->n_cols - 1 && flen > 0 && fld[flen - 1] == '\r') + flen--; + + switch (ctx->col_types[c]) { + case CSV_TYPE_BOOL: { + bool is_null; + uint8_t v = fast_bool(fld, flen, &is_null); + ((uint8_t*)ctx->col_data[c])[row] = v; + if (is_null) { + ctx->col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); + my_had_null[c] = true; + } + break; + } + case CSV_TYPE_I64: { + bool is_null; + int64_t v = fast_i64(fld, flen, &is_null); + ((int64_t*)ctx->col_data[c])[row] = v; + if (is_null) { + ctx->col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); + my_had_null[c] = true; + } + break; + } + case CSV_TYPE_F64: { + bool is_null; + double v = fast_f64(fld, flen, &is_null); + ((double*)ctx->col_data[c])[row] = v; + if (is_null) { + ctx->col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); + my_had_null[c] = true; + } + break; + } + case CSV_TYPE_DATE: { + bool is_null; + int32_t v = fast_date(fld, flen, &is_null); + ((int32_t*)ctx->col_data[c])[row] = v; + if (is_null) { + ctx->col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); + my_had_null[c] = true; + } + break; + } + case CSV_TYPE_TIME: { + bool is_null; + int32_t v = fast_time(fld, flen, &is_null); + ((int32_t*)ctx->col_data[c])[row] = v; + if (is_null) { + ctx->col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); + my_had_null[c] = true; + } + break; + } + case CSV_TYPE_TIMESTAMP: { + bool is_null; + int64_t v = fast_timestamp(fld, flen, &is_null); + ((int64_t*)ctx->col_data[c])[row] = v; + if (is_null) { + ctx->col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); + my_had_null[c] = true; + } + break; + } + case CSV_TYPE_GUID: { + bool is_null; + uint8_t* slot = (uint8_t*)ctx->col_data[c] + (size_t)row * 16; + fast_guid(fld, flen, slot, &is_null); + if (is_null) { + memset(slot, 0, 16); + ctx->col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); + my_had_null[c] = true; + } + break; + } + case CSV_TYPE_STR: { + if (flen == 0) { + ctx->str_refs[c][row].ptr = NULL; + ctx->str_refs[c][row].len = 0; + ctx->col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); + my_had_null[c] = true; + } else { + /* fld may point into esc_buf (stack) or dyn_esc + * (freed below) — both die before csv_fill_str_cols + * reads the strref. Persist escaped fields. */ + if (fld < ctx->buf || fld >= buf_end) { + if (dyn_esc && fld == dyn_esc) { + dyn_esc = NULL; /* transfer ownership */ + } else { + char* cp = (char*)ray_sys_alloc(flen); + if (cp) { memcpy(cp, fld, flen); fld = cp; } + } + } + ctx->str_refs[c][row].ptr = fld; + ctx->str_refs[c][row].len = (uint32_t)flen; + } + break; + } + default: + break; + } + if (RAY_UNLIKELY(dyn_esc != NULL)) ray_sys_free(dyn_esc); + } + } +} + +/* -------------------------------------------------------------------------- + * Serial parse fallback (small files or no thread pool) + * -------------------------------------------------------------------------- */ + +static void csv_parse_serial(const char* buf, size_t buf_size, + const int64_t* row_offsets, int64_t n_rows, + int n_cols, char delim, + const csv_type_t* col_types, void** col_data, + csv_strref_t** str_refs, + uint8_t** col_nullmaps, bool* col_had_null) { + char esc_buf[8192]; + const char* buf_end = buf + buf_size; + + for (int64_t row = 0; row < n_rows; row++) { + const char* p = buf + row_offsets[row]; + const char* row_end = (row + 1 < n_rows) + ? buf + row_offsets[row + 1] + : buf_end; + + for (int c = 0; c < n_cols; c++) { + /* Guard: if past row boundary, fill remaining columns with defaults + null */ + if (p >= row_end) { + for (; c < n_cols; c++) { + switch (col_types[c]) { + case CSV_TYPE_BOOL: ((uint8_t*)col_data[c])[row] = 0; break; + case CSV_TYPE_I64: ((int64_t*)col_data[c])[row] = 0; break; + case CSV_TYPE_F64: ((double*)col_data[c])[row] = 0.0; break; + case CSV_TYPE_DATE: ((int32_t*)col_data[c])[row] = 0; break; + case CSV_TYPE_TIME: ((int32_t*)col_data[c])[row] = 0; break; + case CSV_TYPE_TIMESTAMP: + ((int64_t*)col_data[c])[row] = 0; break; + case CSV_TYPE_GUID: + memset((uint8_t*)col_data[c] + (size_t)row * 16, 0, 16); + break; + case CSV_TYPE_STR: + str_refs[c][row].ptr = NULL; + str_refs[c][row].len = 0; + break; + default: break; + } + col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); + col_had_null[c] = true; + } + break; + } + + const char* fld; + size_t flen; + char* dyn_esc = NULL; + p = scan_field(p, buf_end, delim, &fld, &flen, esc_buf, &dyn_esc); + + /* Strip trailing \r from last field of row */ + if (c == n_cols - 1 && flen > 0 && fld[flen - 1] == '\r') + flen--; + + switch (col_types[c]) { + case CSV_TYPE_BOOL: { + bool is_null; + uint8_t v = fast_bool(fld, flen, &is_null); + ((uint8_t*)col_data[c])[row] = v; + if (is_null) { + col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); + col_had_null[c] = true; + } + break; + } + case CSV_TYPE_I64: { + bool is_null; + int64_t v = fast_i64(fld, flen, &is_null); + ((int64_t*)col_data[c])[row] = v; + if (is_null) { + col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); + col_had_null[c] = true; + } + break; + } + case CSV_TYPE_F64: { + bool is_null; + double v = fast_f64(fld, flen, &is_null); + ((double*)col_data[c])[row] = v; + if (is_null) { + col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); + col_had_null[c] = true; + } + break; + } + case CSV_TYPE_DATE: { + bool is_null; + int32_t v = fast_date(fld, flen, &is_null); + ((int32_t*)col_data[c])[row] = v; + if (is_null) { + col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); + col_had_null[c] = true; + } + break; + } + case CSV_TYPE_TIME: { + bool is_null; + int32_t v = fast_time(fld, flen, &is_null); + ((int32_t*)col_data[c])[row] = v; + if (is_null) { + col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); + col_had_null[c] = true; + } + break; + } + case CSV_TYPE_TIMESTAMP: { + bool is_null; + int64_t v = fast_timestamp(fld, flen, &is_null); + ((int64_t*)col_data[c])[row] = v; + if (is_null) { + col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); + col_had_null[c] = true; + } + break; + } + case CSV_TYPE_GUID: { + bool is_null; + uint8_t* slot = (uint8_t*)col_data[c] + (size_t)row * 16; + fast_guid(fld, flen, slot, &is_null); + if (is_null) { + memset(slot, 0, 16); + col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); + col_had_null[c] = true; + } + break; + } + case CSV_TYPE_STR: { + if (flen == 0) { + str_refs[c][row].ptr = NULL; + str_refs[c][row].len = 0; + col_nullmaps[c][row >> 3] |= (uint8_t)(1u << (row & 7)); + col_had_null[c] = true; + } else { + /* fld may point into esc_buf (stack) or dyn_esc + * (freed below) — both die before csv_fill_str_cols + * reads the strref. Persist escaped fields. */ + if (fld < buf || fld >= buf_end) { + if (dyn_esc && fld == dyn_esc) { + dyn_esc = NULL; /* transfer ownership */ + } else { + char* cp = (char*)ray_sys_alloc(flen); + if (cp) { memcpy(cp, fld, flen); fld = cp; } + } + } + str_refs[c][row].ptr = fld; + str_refs[c][row].len = (uint32_t)flen; + } + break; + } + default: + break; + } + if (RAY_UNLIKELY(dyn_esc != NULL)) ray_sys_free(dyn_esc); + } + } +} + +/* -------------------------------------------------------------------------- + * ray_read_csv_opts — main CSV parser + * -------------------------------------------------------------------------- */ + +ray_t* ray_read_csv_opts(const char* path, char delimiter, bool header, + const int8_t* col_types_in, int32_t n_types) { + /* ---- 1. Open file and get size ---- */ + int fd = open(path, O_RDONLY); + if (fd < 0) return ray_error("io", NULL); + + struct stat st; + if (fstat(fd, &st) != 0 || st.st_size <= 0) { + close(fd); + return ray_error("io", NULL); + } + size_t file_size = (size_t)st.st_size; + + /* ---- 2. mmap the file ---- */ + char* buf = (char*)mmap(NULL, file_size, PROT_READ, MMAP_FLAGS, fd, 0); + close(fd); + if (buf == MAP_FAILED) return ray_error("io", NULL); + +#ifdef __APPLE__ + madvise(buf, file_size, MADV_SEQUENTIAL); +#endif + + const char* buf_end = buf + file_size; + ray_t* result = NULL; + + /* ---- 3. Detect delimiter ---- */ + /* Delimiter auto-detected from header row only. Files where the header + * has a different delimiter distribution than data rows may be misdetected; + * pass an explicit delimiter for such files. Scanning additional data rows + * was considered but adds complexity for a rare edge case. */ + if (delimiter == 0) { + int commas = 0, tabs = 0; + for (const char* p = buf; p < buf_end && *p != '\n'; p++) { + if (*p == ',') commas++; + if (*p == '\t') tabs++; + } + delimiter = (tabs > commas) ? '\t' : ','; + } + + /* ---- 4. Count columns from first line ---- */ + int ncols = 1; + { + const char* p = buf; + bool in_quote = false; + while (p < buf_end && (in_quote || (*p != '\n' && *p != '\r'))) { + if (*p == '"') in_quote = !in_quote; + else if (!in_quote && *p == delimiter) ncols++; + p++; + } + } + if (ncols > CSV_MAX_COLS) { + munmap(buf, file_size); + /* fd already closed after mmap (line 1044) — do not close again */ + return ray_error("range", NULL); /* too many columns */ + } + + /* ---- 5. Parse header row ---- */ + const char* p = buf; + char esc_buf[8192]; + int64_t col_name_ids[CSV_MAX_COLS]; + + if (header) { + for (int c = 0; c < ncols; c++) { + const char* fld; + size_t flen; + char* dyn_esc = NULL; + p = scan_field(p, buf_end, delimiter, &fld, &flen, esc_buf, &dyn_esc); + col_name_ids[c] = ray_sym_intern(fld, flen); + if (dyn_esc) ray_sys_free(dyn_esc); + } + /* Consume exactly one line terminator (\r, \n, or \r\n) after the + * header row — NOT a run of newlines, because subsequent empty + * lines are null data rows. */ + if (p < buf_end && *p == '\r') p++; + if (p < buf_end && *p == '\n') p++; + } else { + for (int c = 0; c < ncols; c++) { + char name[32]; + snprintf(name, sizeof(name), "V%d", c + 1); + col_name_ids[c] = ray_sym_intern(name, strlen(name)); + } + } + + size_t data_offset = (size_t)(p - buf); + + /* ---- 6. Build row offsets (memchr-accelerated) ---- */ + ray_t* row_offsets_hdr = NULL; + int64_t* row_offsets = NULL; + int64_t n_rows = build_row_offsets(buf, file_size, data_offset, + &row_offsets, &row_offsets_hdr); + + if (n_rows == 0) { + /* Empty file → empty table */ + ray_t* tbl = ray_table_new(ncols); + if (!tbl || RAY_IS_ERR(tbl)) goto fail_unmap; + for (int c = 0; c < ncols; c++) { + ray_t* empty_vec = ray_vec_new(RAY_F64, 0); + if (empty_vec && !RAY_IS_ERR(empty_vec)) { + tbl = ray_table_add_col(tbl, col_name_ids[c], empty_vec); + ray_release(empty_vec); + } + } + munmap(buf, file_size); + return tbl; + } + + /* ---- 7. Resolve column types ---- */ + int8_t resolved_types[CSV_MAX_COLS]; + if (col_types_in && n_types >= ncols) { + /* Explicit types provided by caller — validate against known types */ + for (int c = 0; c < ncols; c++) { + int8_t t = col_types_in[c]; + if (t < RAY_BOOL || t >= RAY_TYPE_COUNT || t == RAY_TABLE) { + /* Invalid type constant — fall through to error */ + goto fail_offsets; + } + resolved_types[c] = t; + } + } else if (!col_types_in) { + /* Auto-infer from sample rows */ + csv_type_t col_types[CSV_MAX_COLS]; + memset(col_types, 0, (size_t)ncols * sizeof(csv_type_t)); + /* Type inference from first 100 rows. Heterogeneous CSVs with type + * changes after row 100 will be mistyped. Use explicit schema + * (col_types_in) for such files. */ + int64_t sample_n = (n_rows < CSV_SAMPLE_ROWS) ? n_rows : CSV_SAMPLE_ROWS; + for (int64_t r = 0; r < sample_n; r++) { + const char* rp = buf + row_offsets[r]; + for (int c = 0; c < ncols; c++) { + const char* fld; + size_t flen; + char* dyn_esc = NULL; + rp = scan_field(rp, buf_end, delimiter, &fld, &flen, esc_buf, &dyn_esc); + csv_type_t t = detect_type(fld, flen); + if (dyn_esc) ray_sys_free(dyn_esc); + col_types[c] = promote_csv_type(col_types[c], t); + } + } + for (int c = 0; c < ncols; c++) { + switch (col_types[c]) { + case CSV_TYPE_BOOL: resolved_types[c] = RAY_BOOL; break; + case CSV_TYPE_I64: resolved_types[c] = RAY_I64; break; + case CSV_TYPE_F64: resolved_types[c] = RAY_F64; break; + case CSV_TYPE_DATE: resolved_types[c] = RAY_DATE; break; + case CSV_TYPE_TIME: resolved_types[c] = RAY_TIME; break; + case CSV_TYPE_TIMESTAMP: resolved_types[c] = RAY_TIMESTAMP; break; + default: resolved_types[c] = RAY_SYM; break; + } + } + } else { + /* col_types_in provided but too short — error */ + goto fail_offsets; + } + + /* ---- 8. Allocate column vectors ---- */ + ray_t* col_vecs[CSV_MAX_COLS]; + void* col_data[CSV_MAX_COLS]; + + for (int c = 0; c < ncols; c++) { + int8_t type = resolved_types[c]; + /* String columns: allocate RAY_SYM at W32 (4B/elem) for sym IDs. + * After intern, narrow to W8/W16 if max sym ID permits. */ + col_vecs[c] = (type == RAY_SYM) ? ray_sym_vec_new(RAY_SYM_W32, n_rows) + : ray_vec_new(type, n_rows); + if (!col_vecs[c] || RAY_IS_ERR(col_vecs[c])) { + for (int j = 0; j < c; j++) ray_release(col_vecs[j]); + goto fail_offsets; + } + /* len set early so parallel workers can write to full extent; + * parse errors return before table is used. */ + col_vecs[c]->len = n_rows; + col_data[c] = ray_data(col_vecs[c]); + } + + /* ---- 8b. Pre-allocate nullmaps for all columns ---- */ + uint8_t* col_nullmaps[CSV_MAX_COLS]; + bool col_had_null[CSV_MAX_COLS]; + if (ncols > 0) memset(col_had_null, 0, (size_t)ncols * sizeof(bool)); + + for (int c = 0; c < ncols; c++) { + ray_t* vec = col_vecs[c]; + /* RAY_STR aliases bytes 8-15 of the header with str_pool — inline + * nullmap would corrupt the pool pointer, so force external. */ + bool force_ext = (resolved_types[c] == RAY_STR); + if (n_rows <= 128 && !force_ext) { + vec->attrs |= RAY_ATTR_HAS_NULLS; + memset(vec->nullmap, 0, 16); + col_nullmaps[c] = vec->nullmap; + } else { + size_t bmp_bytes = ((size_t)n_rows + 7) / 8; + ray_t* ext = ray_vec_new(RAY_U8, (int64_t)bmp_bytes); + if (!ext || RAY_IS_ERR(ext)) { + for (int j = 0; j <= c; j++) ray_release(col_vecs[j]); + goto fail_offsets; + } + ext->len = (int64_t)bmp_bytes; + memset(ray_data(ext), 0, bmp_bytes); + vec->ext_nullmap = ext; + vec->attrs |= RAY_ATTR_HAS_NULLS | RAY_ATTR_NULLMAP_EXT; + col_nullmaps[c] = (uint8_t*)ray_data(ext); + } + } + + /* Build csv_type_t array for parse functions (maps td types → csv types) */ + csv_type_t parse_types[CSV_MAX_COLS]; + for (int c = 0; c < ncols; c++) { + switch (resolved_types[c]) { + case RAY_BOOL: parse_types[c] = CSV_TYPE_BOOL; break; + case RAY_I64: parse_types[c] = CSV_TYPE_I64; break; + case RAY_F64: parse_types[c] = CSV_TYPE_F64; break; + case RAY_DATE: parse_types[c] = CSV_TYPE_DATE; break; + case RAY_TIME: parse_types[c] = CSV_TYPE_TIME; break; + case RAY_TIMESTAMP: parse_types[c] = CSV_TYPE_TIMESTAMP; break; + case RAY_GUID: parse_types[c] = CSV_TYPE_GUID; break; + default: parse_types[c] = CSV_TYPE_STR; break; + } + } + + /* ---- 9. Parse data ---- */ + int64_t sym_max_ids[CSV_MAX_COLS]; + memset(sym_max_ids, 0, (size_t)ncols * sizeof(int64_t)); + + /* Check if any string columns exist */ + int has_str_cols = 0; + for (int c = 0; c < ncols; c++) { + if (parse_types[c] == CSV_TYPE_STR) { has_str_cols = 1; break; } + } + + /* Allocate strref arrays for string columns (temporary, freed after intern) */ + csv_strref_t* str_ref_bufs[CSV_MAX_COLS]; + ray_t* str_ref_hdrs[CSV_MAX_COLS]; + memset(str_ref_bufs, 0, sizeof(str_ref_bufs)); + memset(str_ref_hdrs, 0, sizeof(str_ref_hdrs)); + for (int c = 0; c < ncols; c++) { + if (parse_types[c] == CSV_TYPE_STR) { + size_t sz = (size_t)n_rows * sizeof(csv_strref_t); + str_ref_bufs[c] = (csv_strref_t*)scratch_alloc(&str_ref_hdrs[c], sz); + if (!str_ref_bufs[c]) { + for (int j = 0; j < ncols; j++) ray_release(col_vecs[j]); + for (int j = 0; j < c; j++) scratch_free(str_ref_hdrs[j]); + goto fail_offsets; + } + } + } + + { + ray_pool_t* pool = ray_pool_get(); + bool use_parallel = pool && n_rows > 8192; + + if (use_parallel) { + uint32_t n_workers = ray_pool_total_workers(pool); + size_t whn_sz = (size_t)n_workers * (size_t)ncols * sizeof(bool); + bool* worker_had_null_buf = (bool*)ray_sys_alloc(whn_sz); + if (!worker_had_null_buf) { + use_parallel = false; + } else { + memset(worker_had_null_buf, 0, whn_sz); + + csv_par_ctx_t ctx = { + .buf = buf, + .buf_size = file_size, + .row_offsets = row_offsets, + .n_rows = n_rows, + .n_cols = ncols, + .delim = delimiter, + .col_types = parse_types, + .col_data = col_data, + .str_refs = str_ref_bufs, + .col_nullmaps = col_nullmaps, + .worker_had_null = worker_had_null_buf, + }; + + ray_pool_dispatch(pool, csv_parse_fn, &ctx, n_rows); + + /* OR worker null flags into col_had_null */ + for (uint32_t w = 0; w < n_workers; w++) { + for (int c = 0; c < ncols; c++) { + if (worker_had_null_buf[(size_t)w * (size_t)ncols + (size_t)c]) + col_had_null[c] = true; + } + } + ray_sys_free(worker_had_null_buf); + } + } + + if (!use_parallel) { + csv_parse_serial(buf, file_size, row_offsets, n_rows, + ncols, delimiter, parse_types, col_data, + str_ref_bufs, col_nullmaps, col_had_null); + } + } + + /* ---- 9b. Materialize RAY_STR columns AND batch-intern sym columns ---- + * These two phases touch disjoint columns and (after the GUID fix) + * intern_strings is the only one that mutates the global sym table. + * Dispatch them as two thread-pool tasks so they overlap in wall time + * — typically saves the smaller of the two phases. */ + if (has_str_cols) { + csv_finalize_ctx_t fctx = { + .str_refs = str_ref_bufs, + .n_cols = ncols, + .parse_types = parse_types, + .resolved_types = resolved_types, + .col_data = col_data, + .col_vecs = col_vecs, + .n_rows = n_rows, + .sym_max_ids = sym_max_ids, + .col_nullmaps = col_nullmaps, + .fill_ok = true, + .intern_ok = true, + }; + ray_pool_t* fpool = ray_pool_get(); + if (fpool && ray_pool_total_workers(fpool) >= 2) { + ray_pool_dispatch_n(fpool, csv_finalize_task, &fctx, 2); + } else { + csv_finalize_task(&fctx, 0, 0, 1); + csv_finalize_task(&fctx, 0, 1, 2); + } + if (!fctx.fill_ok || !fctx.intern_ok) { + csv_free_escaped_strrefs(str_ref_bufs, ncols, parse_types, n_rows, buf, file_size); + for (int c = 0; c < ncols; c++) scratch_free(str_ref_hdrs[c]); + for (int c = 0; c < ncols; c++) ray_release(col_vecs[c]); + goto fail_offsets; + } + } + + /* Free heap-allocated escaped string copies, then strref buffers */ + csv_free_escaped_strrefs(str_ref_bufs, ncols, parse_types, n_rows, buf, file_size); + for (int c = 0; c < ncols; c++) scratch_free(str_ref_hdrs[c]); + + /* ---- 9c. Strip nullmaps from all-valid columns ---- */ + for (int c = 0; c < ncols; c++) { + if (col_had_null[c]) continue; + ray_t* vec = col_vecs[c]; + if (vec->attrs & RAY_ATTR_NULLMAP_EXT) { + ray_release(vec->ext_nullmap); + vec->ext_nullmap = NULL; + } + vec->attrs &= (uint8_t)~(RAY_ATTR_HAS_NULLS | RAY_ATTR_NULLMAP_EXT); + /* RAY_STR stores str_pool in bytes 8-15 of the header — don't wipe. */ + if (vec->type != RAY_STR) memset(vec->nullmap, 0, 16); + } + + /* ---- 10. Narrow sym columns to optimal width ---- */ + for (int c = 0; c < ncols; c++) { + if (resolved_types[c] != RAY_SYM) continue; + uint8_t new_w = ray_sym_dict_width(sym_max_ids[c]); + if (new_w >= RAY_SYM_W32) continue; /* already at W32, no savings */ + ray_t* narrow = ray_sym_vec_new(new_w, n_rows); + if (!narrow || RAY_IS_ERR(narrow)) continue; + narrow->len = n_rows; + const uint32_t* src = (const uint32_t*)col_data[c]; + void* dst = ray_data(narrow); + if (new_w == RAY_SYM_W8) { + uint8_t* d = (uint8_t*)dst; + for (int64_t r = 0; r < n_rows; r++) d[r] = (uint8_t)src[r]; + } else { /* RAY_SYM_W16 */ + uint16_t* d = (uint16_t*)dst; + for (int64_t r = 0; r < n_rows; r++) d[r] = (uint16_t)src[r]; + } + /* Transfer nullmap to narrowed vector */ + if (col_vecs[c]->attrs & RAY_ATTR_HAS_NULLS) { + narrow->attrs |= (col_vecs[c]->attrs & (RAY_ATTR_HAS_NULLS | RAY_ATTR_NULLMAP_EXT)); + if (col_vecs[c]->attrs & RAY_ATTR_NULLMAP_EXT) { + narrow->ext_nullmap = col_vecs[c]->ext_nullmap; + ray_retain(narrow->ext_nullmap); + } else { + memcpy(narrow->nullmap, col_vecs[c]->nullmap, 16); + } + } + ray_release(col_vecs[c]); + col_vecs[c] = narrow; + col_data[c] = dst; + } + + /* ---- 11. Build table ---- */ + { + ray_t* tbl = ray_table_new(ncols); + if (!tbl || RAY_IS_ERR(tbl)) { + for (int c = 0; c < ncols; c++) ray_release(col_vecs[c]); + goto fail_offsets; + } + + for (int c = 0; c < ncols; c++) { + tbl = ray_table_add_col(tbl, col_name_ids[c], col_vecs[c]); + ray_release(col_vecs[c]); + } + + result = tbl; + } + + /* ---- 12. Cleanup ---- */ + scratch_free(row_offsets_hdr); + munmap(buf, file_size); + return result; + + /* Error paths */ +fail_offsets: + scratch_free(row_offsets_hdr); +fail_unmap: + munmap(buf, file_size); + return ray_error("oom", NULL); +} + +/* -------------------------------------------------------------------------- + * ray_read_csv — convenience wrapper with default options + * -------------------------------------------------------------------------- */ + +ray_t* ray_read_csv(const char* path) { + return ray_read_csv_opts(path, 0, true, NULL, 0); +} + +/* ============================================================================ + * ray_write_csv — Write a table to a CSV file (RFC 4180) + * + * Writes header row with column names, then data rows. + * Strings containing commas, quotes, or newlines are quoted. + * Returns RAY_OK on success, error code on failure. + * ============================================================================ */ + +/* ----------------------------------------------------------------------------- + * write-csv writer state + * + * Wraps FILE* with a sticky error flag so the dispatch loop can stay flat + * and still report the first I/O error. On any write failure subsequent + * writes are skipped and the final ray_write_csv returns RAY_ERR_IO. + * --------------------------------------------------------------------------- */ + +typedef struct csv_writer_t { + FILE* fp; + int err; /* 0 = OK, non-zero = sticky error */ +} csv_writer_t; + +static inline void cw_putc(csv_writer_t* w, int c) { + if (w->err) return; + if (fputc(c, w->fp) == EOF) w->err = 1; +} + +static inline void cw_write(csv_writer_t* w, const char* s, size_t len) { + if (w->err || len == 0) return; + if (fwrite(s, 1, len, w->fp) != len) w->err = 1; +} + +static inline void cw_puts(csv_writer_t* w, const char* s) { + if (!s) return; + cw_write(w, s, strlen(s)); +} + +/* bounded, error-propagating fprintf replacement */ +static void cw_printf(csv_writer_t* w, const char* fmt, ...) { + if (w->err) return; + char buf[64]; + va_list ap; + va_start(ap, fmt); + int n = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + if (n < 0) { w->err = 1; return; } + if ((size_t)n >= sizeof(buf)) { w->err = 1; return; } + cw_write(w, buf, (size_t)n); +} + +/* Write a string value, quoting if it contains special chars */ +static void csv_write_str(csv_writer_t* w, const char* s, size_t len) { + int need_quote = 0; + for (size_t i = 0; i < len; i++) { + if (s[i] == ',' || s[i] == '"' || s[i] == '\n' || s[i] == '\r') { + need_quote = 1; + break; + } + } + if (need_quote) { + cw_putc(w, '"'); + size_t start = 0; + for (size_t i = 0; i < len; i++) { + if (s[i] == '"') { + cw_write(w, s + start, i - start); + cw_putc(w, '"'); /* escaped quote */ + start = i; + } + } + cw_write(w, s + start, len - start); + cw_putc(w, '"'); + } else { + cw_write(w, s, len); + } +} + +static void csv_write_date(csv_writer_t* w, int32_t v) { + /* days since 2000-01-01 → YYYY-MM-DD, civil_from_days (Hinnant) */ + int32_t z = v + 10957 + 719468; + int32_t era = (z >= 0 ? z : z - 146096) / 146097; + uint32_t doe = (uint32_t)(z - era * 146097); + uint32_t yoe = (doe - doe/1460 + doe/36524 - doe/146096) / 365; + int32_t y = (int32_t)yoe + era * 400; + uint32_t doy = doe - (365*yoe + yoe/4 - yoe/100); + uint32_t mp = (5*doy + 2) / 153; + int32_t d = (int32_t)(doy - (153*mp + 2)/5 + 1); + int32_t m = (int32_t)(mp < 10 ? mp + 3 : mp - 9); + if (m <= 2) y++; + cw_printf(w, "%04d-%02d-%02d", y, m, d); +} + +static void csv_write_time(csv_writer_t* w, int32_t ms) { + /* RAY_TIME is a signed ms-of-day. Negative values represent + * negative durations (Rayforce convention); render them + * with a leading "-" and the absolute magnitude rather than + * wrapping modulo one day, which would lose the sign. */ + int32_t sign = ms < 0 ? -1 : 1; + /* Absolute value: handle INT32_MIN by widening. */ + uint32_t u = (ms == INT32_MIN) ? (uint32_t)INT32_MAX + 1u : (uint32_t)(sign == -1 ? -ms : ms); + uint32_t h = u / 3600000u; + uint32_t mi = (u % 3600000u) / 60000u; + uint32_t s = (u % 60000u) / 1000u; + uint32_t frac = u % 1000u; + if (sign == -1) cw_putc(w, '-'); + if (frac) cw_printf(w, "%02u:%02u:%02u.%03u", h, mi, s, frac); + else cw_printf(w, "%02u:%02u:%02u", h, mi, s); +} + +static void csv_write_timestamp(csv_writer_t* w, int64_t ns) { + /* RAY_TIMESTAMP stores *nanoseconds* since 2000-01-01, matching + * the language-level formatter (src/lang/format.c:ts_to_parts). + * Splitting with C's truncating / and % rounds toward zero, so + * fix up after the fact for negative values. */ + const int64_t NS_PER_DAY = 86400000000000LL; + int64_t days = ns / NS_PER_DAY; + int64_t ns_in = ns % NS_PER_DAY; + if (ns_in < 0) { days--; ns_in += NS_PER_DAY; } + /* int64 ns / NS_PER_DAY is bounded by ±~106,752 days above INT32, + * so even INT64_MIN fits once converted to days. Still, use + * int64 through csv_write_date by taking the low bits — any + * timestamp that actually fits in an int64 ns count produces a + * days value well within int32 range (~±5.88M years). */ + csv_write_date(w, (int32_t)days); + cw_putc(w, 'T'); + uint64_t tns = (uint64_t)ns_in; + uint32_t h = (uint32_t)(tns / 3600000000000ULL); + uint32_t mi = (uint32_t)((tns % 3600000000000ULL) / 60000000000ULL); + uint32_t s = (uint32_t)((tns % 60000000000ULL) / 1000000000ULL); + uint32_t frac = (uint32_t)(tns % 1000000000ULL); + if (frac) cw_printf(w, "%02u:%02u:%02u.%09u", h, mi, s, frac); + else cw_printf(w, "%02u:%02u:%02u", h, mi, s); +} + +static void csv_write_f64(csv_writer_t* w, double v) { + if (isnan(v)) { cw_puts(w, "nan"); return; } + if (isinf(v)) { cw_puts(w, v < 0 ? "-inf" : "inf"); return; } + /* %.17g is the standard round-trip format; wrap in cw_printf so + * a 64-byte buffer stack overflow guards the write. */ + cw_printf(w, "%.17g", v); +} + +static void csv_write_guid(csv_writer_t* w, const uint8_t* g) { + /* RFC 4122 canonical: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx */ + cw_printf(w, + "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", + g[0], g[1], g[2], g[3], g[4], g[5], g[6], g[7], + g[8], g[9], g[10], g[11], g[12], g[13], g[14], g[15]); +} + +/* Per-column resolution: slice-aware data pointer, base row offset, + * underlying parent (for ray_vec_is_null), and a cached null flag. */ +typedef struct csv_col_info_t { + ray_t* col; /* original column (may be sliced) */ + ray_t* data_owner; /* slice_parent or col */ + int64_t base_row; /* slice_offset or 0 */ + const void* data; /* ray_data(data_owner) */ + int8_t type; + uint8_t attrs; /* of data_owner */ + bool has_nulls; /* requires per-row ray_vec_is_null probe */ +} csv_col_info_t; + +static void csv_col_info_init(csv_col_info_t* ci, ray_t* col) { + ci->col = col; + ci->data_owner = col; + ci->base_row = 0; + if (col && (col->attrs & RAY_ATTR_SLICE) && col->slice_parent) { + ci->data_owner = col->slice_parent; + ci->base_row = col->slice_offset; + } + ci->type = col ? col->type : 0; + ci->attrs = ci->data_owner ? ci->data_owner->attrs : 0; + ci->data = ci->data_owner ? ray_data(ci->data_owner) : NULL; + /* has_nulls must consult the slice_parent, since a slice view + * never carries its own nullmap — ray_vec_is_null handles the + * redirect but we still want a fast bypass when neither has nulls. */ + ci->has_nulls = false; + if (col && (col->attrs & RAY_ATTR_HAS_NULLS)) ci->has_nulls = true; + if (ci->data_owner && (ci->data_owner->attrs & RAY_ATTR_HAS_NULLS)) + ci->has_nulls = true; +} + +static void csv_write_cell(csv_writer_t* w, const csv_col_info_t* ci, int64_t r) { + if (!ci->col) return; + /* Null cell -> empty field (consistent with read-csv). */ + if (ci->has_nulls && ray_vec_is_null(ci->col, r)) return; + + int64_t dr = ci->base_row + r; + int8_t t = ci->type; + const void* d = ci->data; + + switch (t) { + case RAY_I64: case RAY_TIMESTAMP: break; /* handled below */ + default: break; + } + + switch (t) { + case RAY_I64: + cw_printf(w, "%" PRId64, ((const int64_t*)d)[dr]); + break; + case RAY_I32: + cw_printf(w, "%" PRId32, ((const int32_t*)d)[dr]); + break; + case RAY_I16: + cw_printf(w, "%d", (int)((const int16_t*)d)[dr]); + break; + case RAY_BOOL: + cw_puts(w, ((const uint8_t*)d)[dr] ? "true" : "false"); + break; + case RAY_U8: + cw_printf(w, "%u", (unsigned)((const uint8_t*)d)[dr]); + break; + case RAY_F64: + csv_write_f64(w, ((const double*)d)[dr]); + break; + case RAY_DATE: + csv_write_date(w, ((const int32_t*)d)[dr]); + break; + case RAY_TIME: + csv_write_time(w, ((const int32_t*)d)[dr]); + break; + case RAY_TIMESTAMP: + csv_write_timestamp(w, ((const int64_t*)d)[dr]); + break; + case RAY_SYM: { + int64_t sym = ray_read_sym(d, dr, t, ci->attrs); + ray_t* s = ray_sym_str(sym); + if (s) csv_write_str(w, ray_str_ptr(s), ray_str_len(s)); + /* unknown sym id -> empty field rather than a phantom value */ + break; + } + case RAY_STR: { + /* ray_str_vec_get accepts the original (possibly sliced) col and + * resolves the parent+offset internally. It returns NULL for + * nulls, which we already filtered above, so treat NULL as + * empty-but-valid (e.g. a 0-length inline string). */ + size_t slen = 0; + const char* sp = ray_str_vec_get(ci->col, r, &slen); + csv_write_str(w, sp ? sp : "", slen); + break; + } + case RAY_GUID: + csv_write_guid(w, (const uint8_t*)d + dr * 16); + break; + case RAY_LIST: { + /* LIST cells: recursively format each element as a string via + * the atom's printable representation. For nested tables / + * lists-of-lists this produces a best-effort flat string; the + * whole list field is quoted to keep commas inside from + * breaking column alignment. A LIST element is itself a + * ray_t*, so reuse ray_fmt to get a string form. */ + ray_t** elems = (ray_t**)d; + ray_t* e = elems[dr]; + if (!e || RAY_IS_ERR(e)) return; + ray_t* fmt = ray_fmt(e, false); + if (!fmt || RAY_IS_ERR(fmt)) return; + csv_write_str(w, ray_str_ptr(fmt), ray_str_len(fmt)); + ray_release(fmt); + break; + } + default: + /* Unhandled type: emit an empty field rather than corrupting + * downstream columns. Callers can inspect the file and see + * the missing data explicitly. */ + break; + } +} + +ray_err_t ray_write_csv(ray_t* table, const char* path) { + if (!table || !path || path[0] == '\0') return RAY_ERR_TYPE; + + int64_t ncols = ray_table_ncols(table); + int64_t nrows = ray_table_nrows(table); + if (ncols <= 0) return RAY_ERR_TYPE; + + /* Crash-safe atomic write: tmp -> fsync -> rename. Mirrors + * ray_col_save so an interrupted write never replaces the + * destination with a partial file. */ + char tmp_path[1024]; + if (snprintf(tmp_path, sizeof(tmp_path), "%s.tmp", path) >= (int)sizeof(tmp_path)) + return RAY_ERR_IO; + + FILE* fp = fopen(tmp_path, "wb"); + if (!fp) return RAY_ERR_IO; + + csv_writer_t w = { .fp = fp, .err = 0 }; + + /* Resolve every column once (slice parent, nullability, type) so + * the hot loop just indexes into pre-computed pointers. */ + ray_t* col_info_block = ray_alloc((size_t)ncols * sizeof(csv_col_info_t)); + if (!col_info_block || RAY_IS_ERR(col_info_block)) { + fclose(fp); + remove(tmp_path); + return RAY_ERR_OOM; + } + csv_col_info_t* ci = (csv_col_info_t*)ray_data(col_info_block); + for (int64_t c = 0; c < ncols; c++) + csv_col_info_init(&ci[c], ray_table_get_col_idx(table, c)); + + /* Header row: column names */ + for (int64_t c = 0; c < ncols; c++) { + if (c > 0) cw_putc(&w, ','); + int64_t name_id = ray_table_col_name(table, c); + ray_t* name_atom = ray_sym_str(name_id); + if (name_atom) + csv_write_str(&w, ray_str_ptr(name_atom), ray_str_len(name_atom)); + } + cw_putc(&w, '\n'); + + /* Data rows */ + for (int64_t r = 0; r < nrows && !w.err; r++) { + for (int64_t c = 0; c < ncols; c++) { + if (c > 0) cw_putc(&w, ','); + csv_write_cell(&w, &ci[c], r); + } + cw_putc(&w, '\n'); + } + + ray_free(col_info_block); + + /* Flush user-space buffer before fsync/rename. */ + if (fflush(fp) != 0) w.err = 1; + int close_err = (fclose(fp) != 0); + if (close_err) w.err = 1; + + if (w.err) { + remove(tmp_path); + return RAY_ERR_IO; + } + + /* fsync the temp file so the rename is backed by durable bytes. */ + ray_fd_t fd = ray_file_open(tmp_path, RAY_OPEN_READ | RAY_OPEN_WRITE); + if (fd == RAY_FD_INVALID) { remove(tmp_path); return RAY_ERR_IO; } + ray_err_t sync_err = ray_file_sync(fd); + ray_file_close(fd); + if (sync_err != RAY_OK) { remove(tmp_path); return sync_err; } + + ray_err_t rn_err = ray_file_rename(tmp_path, path); + if (rn_err != RAY_OK) { remove(tmp_path); return rn_err; } + + return RAY_OK; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/io/csv.h b/crates/rayforce-sys/vendor/rayforce/src/io/csv.h new file mode 100644 index 0000000..2240ae4 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/io/csv.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_CSV_H +#define RAY_CSV_H + +#include + +ray_t* ray_read_csv(const char* path); +ray_t* ray_read_csv_opts(const char* path, char delimiter, bool header, + const int8_t* col_types, int32_t n_types); +ray_err_t ray_write_csv(ray_t* table, const char* path); + +#endif /* RAY_CSV_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/lang/cal.h b/crates/rayforce-sys/vendor/rayforce/src/lang/cal.h new file mode 100644 index 0000000..a36aee8 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/lang/cal.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_CAL_H +#define RAY_CAL_H + +#include + +/* ===== Calendar primitives shared by format.c and parse.c ===== */ + +#define RAY_DATE_EPOCH 2000 + +/* Cumulative days-in-month lookup: [leap][month]. + * Index 0 = Jan start (0 days), index 12 = Dec end (365 or 366). */ +static const uint32_t MONTHDAYS[2][13] = { + {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}, + {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}, +}; + +static inline int date_leap_year(int year) { + return (year % 4 == 0 && year % 100 != 0) || year % 400 == 0; +} + +static inline int32_t date_years_by_days(int yy) { + return (int32_t)((int64_t)yy * 365 + yy / 4 - yy / 100 + yy / 400); +} + +/* Decode: days-since-epoch → year/month/day */ +static inline void date_to_ymd(int32_t days, int* y, int* m, int* d) { + int32_t offset = days + date_years_by_days(RAY_DATE_EPOCH - 1); + double approx = (double)offset / 365.2425; + int32_t years = (int32_t)(approx >= 0.0 ? approx + 0.5 : approx - 0.5); + + if (date_years_by_days(years) > offset) + years -= 1; + + int32_t rem = offset - date_years_by_days(years); + int yy = years + 1; + int leap = date_leap_year(yy); + int mid = 0; + + for (mid = 12; mid > 0; mid--) + if (MONTHDAYS[leap][mid] != 0 && rem / (int32_t)MONTHDAYS[leap][mid] != 0) + break; + + if (mid == 12 || mid < 0) + mid = 0; + + *y = yy; + *m = 1 + mid % 12; + *d = 1 + rem - (int32_t)MONTHDAYS[leap][mid]; +} + +/* Encode: year/month/day → days-since-epoch */ +static inline int32_t ymd_to_date(int year, int month, int day) { + int yy = (year > 0) ? year - 1 : 0; + int32_t ydays = date_years_by_days(yy); + int leap = date_leap_year(year); + int mm = (month > 0) ? month - 1 : 0; + int32_t mdays = (int32_t)MONTHDAYS[leap][mm]; + return ydays - date_years_by_days(RAY_DATE_EPOCH - 1) + mdays + day - 1; +} + +#endif /* RAY_CAL_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/lang/compile.c b/crates/rayforce-sys/vendor/rayforce/src/lang/compile.c new file mode 100644 index 0000000..61bc2cf --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/lang/compile.c @@ -0,0 +1,518 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "lang/eval.h" +#include "lang/env.h" +#include "lang/nfo.h" +#include +#include + +/* ── Compiler state ── + * Internal buffers are ray_t objects whose data area holds the raw + * bytes / pointers. This avoids calling malloc/free. */ +typedef struct { + ray_t *code_obj; /* RAY_U8 vector used as growable byte buffer */ + uint8_t *code; /* == ray_data(code_obj) */ + int32_t code_len; + int32_t code_cap; + + ray_t *consts_obj; /* RAY_LIST used as growable pointer array */ + ray_t **consts; /* == ray_data(consts_obj) */ + int32_t n_consts; + int32_t consts_cap; + + struct { int64_t sym_id; int32_t slot; } locals[256]; + int32_t n_locals; + int32_t max_locals; + bool error; + ray_t *lambda; /* the lambda being compiled (for 'self' resolution) */ + + ray_t *dbg_obj; /* I64 vector: pairs of [offset, span.id] */ + int32_t dbg_len; +} compiler_t; + +static void compile_expr(compiler_t *c, ray_t *ast); + +static bool compiler_init(compiler_t *c) { + memset(c, 0, sizeof(*c)); + c->code_cap = 256; + c->code_obj = ray_alloc(c->code_cap); + if (!c->code_obj) return false; + c->code_obj->type = RAY_U8; + c->code_obj->len = 0; + c->code = (uint8_t *)ray_data(c->code_obj); + + c->consts_cap = 16; + c->consts_obj = ray_alloc(c->consts_cap * sizeof(ray_t *)); + if (!c->consts_obj) { ray_release(c->code_obj); return false; } + c->consts_obj->type = RAY_LIST; + c->consts_obj->len = 0; + c->consts = (ray_t **)ray_data(c->consts_obj); + memset(c->consts, 0, c->consts_cap * sizeof(ray_t *)); + return true; +} + +static void compiler_destroy(compiler_t *c) { + for (int32_t i = 0; i < c->n_consts; i++) + if (c->consts[i]) ray_release(c->consts[i]); + ray_release(c->consts_obj); + ray_release(c->code_obj); +} + +/* ── Debug info helpers ── */ +static void dbg_append(compiler_t* c, int32_t offset, int64_t span_id) { + if (!c->dbg_obj) { + c->dbg_obj = ray_vec_new(RAY_I64, 0); + if (!c->dbg_obj) return; + } + int64_t off64 = (int64_t)offset; + c->dbg_obj = ray_vec_append(c->dbg_obj, &off64); + c->dbg_obj = ray_vec_append(c->dbg_obj, &span_id); +} + +#define EMIT_DBG(c, ast) do { \ + if ((c)->lambda && LAMBDA_NFO((c)->lambda)) { \ + ray_span_t _sp = ray_nfo_get(LAMBDA_NFO((c)->lambda), (ast)); \ + if (_sp.id != 0) dbg_append(c, (c)->code_len, _sp.id); \ + } \ +} while(0) + +/* ── Emit helpers ── */ +static void emit(compiler_t *c, uint8_t byte) { + if (c->code_len >= c->code_cap) { + int32_t new_cap = c->code_cap * 2; + ray_t *new_obj = ray_alloc(new_cap); + if (!new_obj) { c->error = true; return; } + new_obj->type = RAY_U8; + new_obj->len = 0; + memcpy(ray_data(new_obj), c->code, c->code_len); + ray_release(c->code_obj); + c->code_obj = new_obj; + c->code = (uint8_t *)ray_data(new_obj); + c->code_cap = new_cap; + } + c->code[c->code_len++] = byte; +} + +static void emit_const(compiler_t *c, int32_t idx) { + if (idx < 256) { + emit(c, OP_LOADCONST); + emit(c, (uint8_t)idx); + } else { + emit(c, OP_LOADCONST_W); + emit(c, (uint8_t)(idx >> 8)); + emit(c, (uint8_t)(idx & 0xFF)); + } +} + +/* ── Constant pool ── */ +static int32_t add_constant(compiler_t *c, ray_t *value) { + for (int32_t i = 0; i < c->n_consts; i++) { + ray_t *v = c->consts[i]; + if (v == value) return i; + if (v->type == value->type && ray_is_atom(v)) { + if (v->type == -RAY_I64 && v->i64 == value->i64) return i; + if (v->type == -RAY_F64 && v->f64 == value->f64) return i; + if (v->type == -RAY_BOOL && v->b8 == value->b8) return i; + if (v->type == -RAY_SYM && v->i64 == value->i64 && + v->attrs == value->attrs) return i; + } + } + if (c->n_consts >= c->consts_cap) { + int32_t new_cap = c->consts_cap * 2; + ray_t *new_obj = ray_alloc(new_cap * sizeof(ray_t *)); + if (!new_obj || RAY_IS_ERR(new_obj)) { c->error = true; return c->n_consts; } + new_obj->type = RAY_LIST; + new_obj->len = 0; + ray_t **new_arr = (ray_t **)ray_data(new_obj); + memcpy(new_arr, c->consts, c->n_consts * sizeof(ray_t *)); + memset(new_arr + c->n_consts, 0, (new_cap - c->n_consts) * sizeof(ray_t *)); + ray_release(c->consts_obj); + c->consts_obj = new_obj; + c->consts = new_arr; + c->consts_cap = new_cap; + } + ray_retain(value); + c->consts[c->n_consts] = value; + return c->n_consts++; +} + +/* ── Local variable tracking ── */ +static int32_t find_local(compiler_t *c, int64_t sym_id) { + for (int32_t i = c->n_locals - 1; i >= 0; i--) + if (c->locals[i].sym_id == sym_id) return c->locals[i].slot; + return -1; +} + +static int32_t add_local(compiler_t *c, int64_t sym_id) { + if (c->n_locals >= 256) return -1; + int32_t slot = c->n_locals; + c->locals[c->n_locals].sym_id = sym_id; + c->locals[c->n_locals].slot = slot; + c->n_locals++; + if (c->n_locals > c->max_locals) c->max_locals = c->n_locals; + return slot; +} + +/* ── Jump helpers ── */ +static int32_t emit_jump(compiler_t *c, uint8_t opcode) { + emit(c, opcode); + int32_t patch_pos = c->code_len; + emit(c, 0); + emit(c, 0); + return patch_pos; +} + +static void patch_jump(compiler_t *c, int32_t pos) { + int32_t raw = c->code_len - pos - 2; + if (raw > 32767 || raw < -32768) { c->error = true; return; } + int16_t offset = (int16_t)raw; + c->code[pos] = (uint8_t)(offset >> 8); + c->code[pos + 1] = (uint8_t)(offset & 0xFF); +} + +/* Cached sym IDs for special forms */ +static _Thread_local int64_t sf_set = -1, sf_let = -1, sf_if = -1, sf_do = -1, sf_fn = -1, sf_self = -1, sf_try = -1; + +static void init_sf_syms(void) { + if (sf_set >= 0) return; + sf_set = ray_sym_intern("set", 3); + sf_let = ray_sym_intern("let", 3); + sf_if = ray_sym_intern("if", 2); + sf_do = ray_sym_intern("do", 2); + sf_fn = ray_sym_intern("fn", 2); + sf_self = ray_sym_intern("self", 4); + sf_try = ray_sym_intern("try", 3); +} + +/* ── Compile a list (special form or function call) ── */ +static void compile_list(compiler_t *c, ray_t *ast) { + if (c->error) return; + EMIT_DBG(c, ast); + int64_t n = ray_len(ast); + if (n == 0) { c->error = true; return; } + ray_t **elems = (ray_t **)ray_data(ast); + ray_t *head = elems[0]; + + init_sf_syms(); + + /* Check for special forms by name */ + if (head->type == -RAY_SYM && (head->attrs & RAY_ATTR_NAME)) { + int64_t sym_id = head->i64; + + /* (set name value) — dynamic eval (set modifies global env) */ + if (sym_id == sf_set && n == 3) { + int32_t idx = add_constant(c, ast); + emit_const(c, idx); + emit(c, OP_CALLD); + emit(c, 0); + return; + } + + /* (let name value) — compile value, store in local slot. + * Reserved names (`.sys.*`, `.os.*`, `.csv.*`, `.ipc.*`) are + * refused here so a compiled lambda can't shadow a builtin + * through its local-slot table — the same guard + * ray_env_set_local enforces on the tree-walking path. + * Setting c->error aborts bytecode emission; call_lambda + * then falls back to the tree-walking interpreter which + * raises the proper `reserve` error via ray_let_fn. */ + if (sym_id == sf_let && n == 3) { + ray_t *name_obj = elems[1]; + if (name_obj->type != -RAY_SYM || + ray_sym_is_reserved(name_obj->i64)) { + c->error = true; + return; + } + compile_expr(c, elems[2]); + emit(c, OP_DUP); + int32_t slot = find_local(c, name_obj->i64); + if (slot < 0) slot = add_local(c, name_obj->i64); + if (slot < 0) { c->error = true; return; } + emit(c, OP_STOREENV); + emit(c, (uint8_t)slot); + return; + } + + /* (if cond then else?) */ + if (sym_id == sf_if && n >= 3) { + compile_expr(c, elems[1]); + int32_t jmpf_pos = emit_jump(c, OP_JMPF); + compile_expr(c, elems[2]); + if (n >= 4) { + int32_t jmp_pos = emit_jump(c, OP_JMP); + patch_jump(c, jmpf_pos); + compile_expr(c, elems[3]); + patch_jump(c, jmp_pos); + } else { + int32_t jmp_pos = emit_jump(c, OP_JMP); + patch_jump(c, jmpf_pos); + ray_t *zero = ray_alloc(0); + zero->type = -RAY_I64; + zero->i64 = 0; + int32_t idx = add_constant(c, zero); + ray_release(zero); + emit_const(c, idx); + patch_jump(c, jmp_pos); + } + return; + } + + /* (do expr1 expr2 ...) */ + if (sym_id == sf_do && n >= 2) { + for (int64_t i = 1; i < n; i++) { + if (i > 1) emit(c, OP_POP); + compile_expr(c, elems[i]); + } + return; + } + + /* (fn [params] body...) — nested lambda via dynamic eval */ + if (sym_id == sf_fn && n >= 3) { + int32_t idx = add_constant(c, ast); + emit_const(c, idx); + emit(c, OP_CALLD); + emit(c, 0); + return; + } + + /* (try body handler) — compile to OP_TRAP/OP_TRAP_END */ + if (sym_id == sf_try && n == 3) { + /* Reserve a hidden local for err_val */ + int32_t err_slot = add_local(c, -1); + if (err_slot < 0) { c->error = true; return; } + + int32_t trap_pos = emit_jump(c, OP_TRAP); + compile_expr(c, elems[1]); /* body */ + emit(c, OP_TRAP_END); + int32_t jmp_pos = emit_jump(c, OP_JMP); + patch_jump(c, trap_pos); /* handler starts here */ + /* err_val is on stack (pushed by vm_error_cleanup). + * Stash it, compile handler fn, reload err_val, call. */ + emit(c, OP_STOREENV); + emit(c, (uint8_t)err_slot); + compile_expr(c, elems[2]); /* handler fn */ + emit(c, OP_LOADENV); + emit(c, (uint8_t)err_slot); + emit(c, OP_CALLF); + emit(c, 1); /* call handler(err_val) */ + patch_jump(c, jmp_pos); /* end */ + return; + } + } + + /* Self-recursive call: emit OP_CALLS (lean frame reuse, no fn object) */ + if (head->type == -RAY_SYM && (head->attrs & RAY_ATTR_NAME) && + head->i64 == sf_self) { + int64_t argc = n - 1; + if (argc > 64) { c->error = true; return; } + for (int64_t i = 1; i < n; i++) + compile_expr(c, elems[i]); + emit(c, OP_CALLS); + emit(c, (uint8_t)argc); + return; + } + + /* Look up head at compile time to determine call type */ + ray_t *fn = NULL; + if (head->type == -RAY_SYM && (head->attrs & RAY_ATTR_NAME)) + fn = ray_env_get(head->i64); + + /* Unrecognized special form: dynamic eval on entire form */ + if (fn && (fn->attrs & RAY_FN_SPECIAL_FORM)) { + int32_t idx = add_constant(c, ast); + emit_const(c, idx); + emit(c, OP_CALLD); + emit(c, 0); + return; + } + + /* General function call: compile head, args, then dispatch. + * If head resolved to a builtin at compile time, emit LOADCONST + * instead of RESOLVE to skip the runtime hash lookup. */ + if (fn && (fn->type == RAY_UNARY || fn->type == RAY_BINARY || fn->type == RAY_VARY)) { + int32_t idx = add_constant(c, fn); + emit_const(c, idx); + } else { + compile_expr(c, head); + } + int64_t argc = n - 1; + if (argc > 64) { c->error = true; return; } + for (int64_t i = 1; i < n; i++) + compile_expr(c, elems[i]); + + /* Record call-site span so errors point to the call expression, not the last arg */ + EMIT_DBG(c, ast); + + if (fn) { + switch (fn->type) { + case RAY_UNARY: + if (argc == 1) { emit(c, OP_CALL1); return; } + break; + case RAY_BINARY: + if (argc == 2) { emit(c, OP_CALL2); return; } + break; + case RAY_VARY: + emit(c, OP_CALLN); + emit(c, (uint8_t)argc); + return; + case RAY_LAMBDA: + emit(c, OP_CALLF); + emit(c, (uint8_t)argc); + return; + default: + break; + } + } + + emit(c, OP_CALLF); + emit(c, (uint8_t)argc); +} + +/* ── Compile expression ── */ +static void compile_expr(compiler_t *c, ray_t *ast) { + if (c->error) return; + if (!ast || RAY_IS_ERR(ast)) return; + EMIT_DBG(c, ast); + + if (ray_is_atom(ast)) { + if (ast->type == -RAY_SYM && (ast->attrs & RAY_ATTR_NAME)) { + int32_t slot = find_local(c, ast->i64); + if (slot >= 0) { + emit(c, OP_LOADENV); + emit(c, (uint8_t)slot); + } else { + int32_t idx = add_constant(c, ast); + if (idx < 256) { + emit(c, OP_RESOLVE); + emit(c, (uint8_t)idx); + } else { + emit(c, OP_RESOLVE_W); + emit(c, (uint8_t)(idx >> 8)); + emit(c, (uint8_t)(idx & 0xFF)); + } + } + return; + } + int32_t idx = add_constant(c, ast); + emit_const(c, idx); + return; + } + + if (ast->type != RAY_LIST) { + int32_t idx = add_constant(c, ast); + emit_const(c, idx); + return; + } + + if (ray_len(ast) == 0) { + int32_t idx = add_constant(c, ast); + emit_const(c, idx); + return; + } + + compile_list(c, ast); +} + +/* ── Public API ── */ +void ray_compile(ray_t *lambda) { + if (LAMBDA_IS_COMPILED(lambda)) return; + + compiler_t c; + if (!compiler_init(&c)) return; + c.lambda = lambda; + + /* Register params as locals */ + ray_t *params_list = LAMBDA_PARAMS(lambda); + int64_t param_count = ray_len(params_list); + int64_t *param_ids = (int64_t*)ray_data(params_list); + for (int64_t i = 0; i < param_count; i++) { + if (add_local(&c, param_ids[i]) < 0) { c.error = true; break; } + } + + /* Compile body expressions */ + ray_t *body = LAMBDA_BODY(lambda); + int64_t body_count = ray_len(body); + ray_t **body_exprs = (ray_t **)ray_data(body); + for (int64_t i = 0; i < body_count; i++) { + if (i > 0) emit(&c, OP_POP); + compile_expr(&c, body_exprs[i]); + } + emit(&c, OP_RET); + + if (c.error) { + if (c.dbg_obj) ray_release(c.dbg_obj); + compiler_destroy(&c); + return; + } + + /* Build bytecode vector */ + ray_t *bc = ray_alloc(c.code_len); + if (!bc) { compiler_destroy(&c); return; } + bc->type = RAY_U8; + bc->len = c.code_len; + memcpy(ray_data(bc), c.code, c.code_len); + + /* Build constants list */ + ray_t *consts = ray_alloc(c.n_consts * sizeof(ray_t *)); + if (!consts) { ray_release(bc); compiler_destroy(&c); return; } + consts->type = RAY_LIST; + consts->len = c.n_consts; + ray_t **cpool = (ray_t **)ray_data(consts); + for (int32_t i = 0; i < c.n_consts; i++) { + ray_retain(c.consts[i]); + cpool[i] = c.consts[i]; + } + + LAMBDA_BC(lambda) = bc; + LAMBDA_CONSTS(lambda) = consts; + LAMBDA_NLOCALS(lambda) = c.max_locals; + lambda->attrs |= RAY_FN_COMPILED; + + if (c.dbg_obj) { + LAMBDA_DBG(lambda) = c.dbg_obj; + /* dbg_obj is now owned by the lambda, don't release it */ + } + + compiler_destroy(&c); +} + +ray_span_t ray_bc_dbg_get(ray_t* dbg, int32_t ip) { + ray_span_t span = {0}; + if (!dbg || dbg->len == 0) return span; + int64_t* data = (int64_t*)ray_data(dbg); + int64_t n = dbg->len; + int64_t best_offset = -1; + for (int64_t i = 0; i < n; i += 2) { + int64_t offset = data[i]; + if (offset <= ip && offset > best_offset) { + best_offset = offset; + span.id = data[i + 1]; + } + } + return span; +} + +void ray_compile_reset(void) { + sf_set = sf_let = sf_if = sf_do = sf_fn = sf_self = sf_try = -1; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/lang/env.c b/crates/rayforce-sys/vendor/rayforce/src/lang/env.c new file mode 100644 index 0000000..8bb2a50 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/lang/env.c @@ -0,0 +1,658 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "lang/env.h" +#include "table/sym.h" +#include "table/dict.h" +#include "ops/temporal.h" +#include "ops/linkop.h" +#include +#include +#include + +/* ---- Function constructors ---- */ + +/* Builtin name stored inline in nullmap[2..15] (max 13 chars + null). + * Bytes 0-1 reserved for DAG opcode (any type, not just binary). */ +static void fn_set_name(ray_t* obj, const char* name) { + memset(obj->nullmap, 0, 16); + size_t len = strlen(name); + if (len > 13) len = 13; + memcpy(obj->nullmap + 2, name, len); +} + +ray_t* ray_fn_unary(const char* name, uint8_t fn_attrs, ray_unary_fn fn) { + ray_t* obj = ray_alloc(0); + if (!obj) return ray_error("oom", NULL); + obj->type = RAY_UNARY; + obj->attrs = fn_attrs; + obj->i64 = (int64_t)(uintptr_t)fn; + fn_set_name(obj, name); + return obj; +} + +ray_t* ray_fn_binary(const char* name, uint8_t fn_attrs, ray_binary_fn fn) { + ray_t* obj = ray_alloc(0); + if (!obj) return ray_error("oom", NULL); + obj->type = RAY_BINARY; + obj->attrs = fn_attrs; + obj->i64 = (int64_t)(uintptr_t)fn; + fn_set_name(obj, name); + return obj; +} + +ray_t* ray_fn_vary(const char* name, uint8_t fn_attrs, ray_vary_fn fn) { + ray_t* obj = ray_alloc(0); + if (!obj) return ray_error("oom", NULL); + obj->type = RAY_VARY; + obj->attrs = fn_attrs; + obj->i64 = (int64_t)(uintptr_t)fn; + fn_set_name(obj, name); + return obj; +} + +/* ---- Global environment ---- */ + +/* Spinlock protecting g_env mutations in ray_env_set */ +static _Atomic(int) g_env_lock = 0; +static inline void env_lock(void) { + while (atomic_exchange_explicit(&g_env_lock, 1, memory_order_acquire)) { +#if defined(__x86_64__) || defined(__i386__) + __builtin_ia32_pause(); +#endif + } +} +static inline void env_unlock(void) { + atomic_store_explicit(&g_env_lock, 0, memory_order_release); +} + +#define ENV_CAP 1024 + +static struct { + int64_t keys[ENV_CAP]; + ray_t* vals[ENV_CAP]; + /* Per-slot flag: 1 iff this binding was last written by user code + * (ray_env_set / ray_env_set_local-promoted-to-global), 0 if the + * latest writer was builtin registration (ray_env_bind / _flat). + * Powers ray_env_list_user, which the journal snapshot uses to + * pick which globals to dump to .qdb. A user `(set + 42)` + * over a builtin flips the slot to user=1 so the override is + * preserved across snapshot/restore. */ + uint8_t user[ENV_CAP]; + int32_t count; +} g_env; + +/* ---- Local scope stack ---- */ + +#define SCOPE_CAP 64 +#define FRAME_CAP 64 + +typedef struct { + int64_t keys[FRAME_CAP]; + ray_t* vals[FRAME_CAP]; + int32_t count; +} ray_scope_frame_t; + +static _Thread_local ray_scope_frame_t scope_stack[SCOPE_CAP]; +static _Thread_local int32_t scope_depth = 0; + +int32_t ray_env_scope_depth(void) { return scope_depth; } +int32_t ray_env_global_count(void) { return g_env.count; } + +ray_err_t ray_env_init(void) { + memset(&g_env, 0, sizeof(g_env)); + scope_depth = 0; + return RAY_OK; +} + +void ray_env_destroy(void) { + /* Pop any remaining scopes */ + while (scope_depth > 0) ray_env_pop_scope(); + for (int32_t i = 0; i < g_env.count; i++) { + if (g_env.vals[i]) ray_release(g_env.vals[i]); + } + memset(&g_env, 0, sizeof(g_env)); +} + +/* Flat (non-dotted) lookup — scope stack top-down, then global env. + * Returns NULL if not bound. Always used as the head-segment resolver + * for dotted paths, and as the fast path for plain names. */ +static ray_t* env_lookup_flat(int64_t sym_id) { + for (int32_t d = scope_depth - 1; d >= 0; d--) { + ray_scope_frame_t* f = &scope_stack[d]; + for (int32_t i = 0; i < f->count; i++) { + if (f->keys[i] == sym_id) return f->vals[i]; + } + } + for (int32_t i = 0; i < g_env.count; i++) { + if (g_env.keys[i] == sym_id) return g_env.vals[i]; + } + return NULL; +} + +ray_t* ray_env_get(int64_t sym_id) { + /* Flat lookup first — covers every non-dotted name AND every + * reserved builtin like `.sys.gc` which is bound both flat (for + * O(1) resolution + prefix enumeration) and inside the `.sys` + * namespace dict (for REPL introspection). */ + ray_t* flat = env_lookup_flat(sym_id); + if (flat) return flat; + if (!ray_sym_is_dotted(sym_id)) return NULL; + + /* Dotted walk: head resolves via scope+global, rest are sym-keyed + * container probes — dicts probe the keys SYM vec and read the + * matching slot from the vals LIST, tables look up by schema sym + * id, anything else is surfaced as "undefined" (NULL). Missing + * intermediate keys also return NULL so the evaluator's name-error + * reporting stays consistent with plain names. Returning env-owned + * pointers (never fresh allocations) keeps the caller's retain/release + * balance correct. */ + const int64_t* segs; + int n = ray_sym_segs(sym_id, &segs); + if (n < 2) return NULL; /* defensive — dotted bit without segments */ + + ray_t* v = env_lookup_flat(segs[0]); + for (int i = 1; v && i < n; i++) { + v = ray_container_probe_sym(v, segs[i]); + } + return v; +} + +/* Owned-ref variant. Always returns rc>=1 on success; caller must + * release. Additionally handles temporal field extraction in the dotted + * walk (e.g. `date.dd`, `ts.hh`) — when the next container-probe step + * would fail and the current value is a RAY_DATE / RAY_TIME / + * RAY_TIMESTAMP vector or atom, we try mapping the segment sym to a + * RAY_EXTRACT_* field and call ray_temporal_extract, which allocates a + * fresh result. Those fresh allocations are exactly why this function + * has a different retain contract from ray_env_get. */ +ray_t* ray_env_resolve(int64_t sym_id) { + /* Flat lookup first — short-circuits dotted reserved builtins + * (`.sys.gc`, `.os.getenv`, …) that are additionally bound flat + * alongside their namespace dict. Non-dotted names take the + * same path. */ + ray_t* flat = env_lookup_flat(sym_id); + if (flat) { ray_retain(flat); return flat; } + if (!ray_sym_is_dotted(sym_id)) return NULL; + + const int64_t* segs; + int n = ray_sym_segs(sym_id, &segs); + if (n < 2) return NULL; + + /* `v` is either a borrowed env/container pointer (fresh=false) or a + * fresh temporal-extract result (fresh=true). When switching between + * the two we must release the previous fresh value to avoid leaks. */ + ray_t* v = env_lookup_flat(segs[0]); + bool fresh = false; + + for (int i = 1; v && i < n; i++) { + ray_t* next = NULL; + bool next_fresh = false; + /* Linked column: deref segs[i] as a target field name (returns + * a fresh owning result, columns the same length as v). Errors + * from ray_link_deref (e.g. "nyi: target table has a parted + * column") must be surfaced to the caller — silently downgrading + * to NULL would convert a real wrong-answer-bug guard into a + * confusing "name undefined" message. */ + if (ray_link_has(v)) { + next = ray_link_deref(v, segs[i]); + if (next && RAY_IS_ERR(next)) { + if (fresh) ray_release(v); + return next; + } + next_fresh = (next != NULL); + } + if (!next) next = ray_container_probe_sym(v, segs[i]); + if (next) { + if (fresh) ray_release(v); + v = next; + fresh = next_fresh; + continue; + } + + /* Container probe miss — try method dispatch: look up the + * segment as a callable in env, and if it's a unary function, + * apply it to the current value. This makes `ts.ss`, `d.dd`, + * or any future `x.some_fn` work the same way, with the + * segment resolution going through the normal function + * registration path instead of a bespoke table. + * + * Walk both scope and global env looking for a RAY_UNARY + * binding — a local non-callable (e.g. a column named `ss` + * pushed into scope by the select fallback) must not shadow + * the globally-registered accessor function. */ + ray_t* fn = NULL; + for (int32_t d = scope_depth - 1; d >= 0 && !fn; d--) { + ray_scope_frame_t* f = &scope_stack[d]; + for (int32_t k = 0; k < f->count; k++) { + if (f->keys[k] == segs[i] && f->vals[k] + && f->vals[k]->type == RAY_UNARY) { + fn = f->vals[k]; + break; + } + } + } + if (!fn) { + for (int32_t k = 0; k < g_env.count; k++) { + if (g_env.keys[k] == segs[i] && g_env.vals[k] + && g_env.vals[k]->type == RAY_UNARY) { + fn = g_env.vals[k]; + break; + } + } + } + if (fn) { + ray_unary_fn f = (ray_unary_fn)(uintptr_t)fn->i64; + ray_t* r = f(v); + if (fresh) ray_release(v); + if (!r || RAY_IS_ERR(r)) return NULL; + v = r; + fresh = true; + continue; + } + + /* Nothing matched — propagate "undefined". */ + if (fresh) ray_release(v); + return NULL; + } + + if (!v) return NULL; + if (!fresh) ray_retain(v); /* hand back an owned ref */ + return v; +} + +/* Flat-binding helpers: mutate a specific scope (global or top frame) by + * sym_id. Used by both the simple and dotted set paths. Passing val=NULL + * means "delete" — if a slot exists, release its value and compact the + * slot out of the array (no-op if the slot doesn't exist). This matches + * ray_del_fn's contract via ray_env_set(sym, NULL) and also covers the + * cascade-up case in env_set_dotted where every dict in a dotted path was + * emptied by the delete. */ +static ray_err_t env_bind_global_impl(int64_t sym_id, ray_t* val, int is_user) { + env_lock(); + for (int32_t i = 0; i < g_env.count; i++) { + if (g_env.keys[i] == sym_id) { + if (val == NULL) { + if (g_env.vals[i]) ray_release(g_env.vals[i]); + for (int32_t j = i; j + 1 < g_env.count; j++) { + g_env.keys[j] = g_env.keys[j + 1]; + g_env.vals[j] = g_env.vals[j + 1]; + g_env.user[j] = g_env.user[j + 1]; + } + g_env.count--; + env_unlock(); + return RAY_OK; + } + if (g_env.vals[i]) ray_release(g_env.vals[i]); + ray_retain(val); + g_env.vals[i] = val; + /* User write upgrades a builtin slot to user-defined, so a + * (set + 42) override survives snapshot/restore. A builtin + * re-bind (e.g. theoretical hot reload) leaves the existing + * flag alone — once user, always user, until the slot is + * deleted. */ + if (is_user) g_env.user[i] = 1; + env_unlock(); + return RAY_OK; + } + } + if (val == NULL) { /* deleting an absent binding: no-op */ + env_unlock(); + return RAY_OK; + } + if (g_env.count >= ENV_CAP) { + env_unlock(); + return RAY_ERR_OOM; + } + g_env.keys[g_env.count] = sym_id; + ray_retain(val); + g_env.vals[g_env.count] = val; + g_env.user[g_env.count] = is_user ? 1 : 0; + g_env.count++; + env_unlock(); + return RAY_OK; +} + +/* Function-pointer-shaped wrapper used by env_set_dotted's bind_fn + * indirection — preserves the existing signature. */ +static ray_err_t env_bind_global(int64_t sym_id, ray_t* val) { + return env_bind_global_impl(sym_id, val, 0); +} + +/* User-flagged sibling: identical except the slot is marked user=1. + * Used by ray_env_set and the dotted-set path it drives. */ +static ray_err_t env_bind_global_user(int64_t sym_id, ray_t* val) { + return env_bind_global_impl(sym_id, val, 1); +} + +static ray_err_t env_bind_local(int64_t sym_id, ray_t* val) { + ray_scope_frame_t* f = &scope_stack[scope_depth - 1]; + for (int32_t i = 0; i < f->count; i++) { + if (f->keys[i] == sym_id) { + if (val == NULL) { + if (f->vals[i]) ray_release(f->vals[i]); + for (int32_t j = i; j + 1 < f->count; j++) { + f->keys[j] = f->keys[j + 1]; + f->vals[j] = f->vals[j + 1]; + } + f->count--; + return RAY_OK; + } + if (f->vals[i]) ray_release(f->vals[i]); + ray_retain(val); + f->vals[i] = val; + return RAY_OK; + } + } + if (val == NULL) return RAY_OK; + if (f->count >= FRAME_CAP) return RAY_ERR_OOM; + f->keys[f->count] = sym_id; + ray_retain(val); + f->vals[f->count] = val; + f->count++; + return RAY_OK; +} + +/* Dotted-path write. base_lookup(head_sym) returns the current binding in + * the scope we are writing to (global or local frame), or NULL. bind_fn + * rebinds the new top-level dict in that same scope. Walks the existing + * chain (if any) for intermediate dicts, then COW-rebuilds bottom-up using + * dict_upsert. Auto-creates missing intermediates as empty dicts. */ +static ray_err_t env_set_dotted(int64_t sym_id, ray_t* val, + ray_t* (*base_lookup)(int64_t), + ray_err_t (*bind_fn)(int64_t, ray_t*)) { + const int64_t* segs; + int n = ray_sym_segs(sym_id, &segs); + if (n < 2) return RAY_ERR_TYPE; /* dotted flag without segments */ + + /* Walk existing chain to the deepest parent that still exists. Record + * each level's dict pointer (borrowed) so we can rebuild upward. Any + * non-dict intermediate is an error. */ + ray_t* parents[256]; + parents[0] = base_lookup(segs[0]); + if (parents[0] && parents[0]->type != RAY_DICT) + return RAY_ERR_TYPE; + + /* parents[i] is the dict at path prefix segs[0..i]. If an intermediate + * key is missing, parents[i+1..n-2] are NULL and ray_dict_upsert will + * create fresh dicts on the way back up. */ + for (int i = 1; i < n - 1; i++) { + if (!parents[i - 1]) { parents[i] = NULL; continue; } + ray_t* child = ray_dict_probe_sym_borrowed(parents[i - 1], segs[i]); + if (child && child->type != RAY_DICT) + return RAY_ERR_TYPE; + parents[i] = child; + } + + /* Delete path: (del ns.x) lowers to ray_env_set(sym_id, NULL). The + * non-dotted path removes the env slot; the dotted path must actually + * remove the key from the leaf dict and rebuild the chain — otherwise + * the user would see a zombie entry like {:x NULL} instead of the + * key being gone. No-op cleanly if any part of the path is missing. + * If the leaf-removal empties the containing dict, we must not rebind + * {} upward — that would leave a stale empty namespace. Instead + * cascade up: at each level, if `cur` is empty, delete that key from + * its parent instead of upserting it. If the cascade reaches the + * head with an empty dict, we rebind the head to NULL (env_bind_* + * treats NULL as "remove the slot"). */ + int start_i; + ray_t* cur; + bool deleting = (val == NULL); + if (deleting) { + ray_t* leaf_parent = parents[n - 2]; + if (!leaf_parent) return RAY_OK; + if (!ray_dict_probe_sym_borrowed(leaf_parent, segs[n - 1])) return RAY_OK; + ray_retain(leaf_parent); + ray_t* k = ray_sym(segs[n - 1]); + cur = ray_dict_remove(leaf_parent, k); + ray_release(k); + if (!cur || RAY_IS_ERR(cur)) return RAY_ERR_OOM; + start_i = n - 2; /* rebuild from the parent of the deleted key up */ + } else { + ray_retain(val); + cur = val; + start_i = n - 1; + } + + /* Build new chain bottom-up. ray_dict_upsert consumes its `dict` arg, + * so we retain parents before passing. Missing-parent levels are + * created from a fresh empty dict. On failure we release cur and bail + * — parents are env-owned borrowed refs. */ + for (int i = start_i; i >= 1; i--) { + ray_t* parent = parents[i - 1]; + + if (deleting && cur && cur->type == RAY_DICT && ray_dict_len(cur) == 0) { + /* Cascade: the rebuilt child became empty, so remove the key + * at this level rather than storing {}. If parent is absent + * too, nothing more to do. */ + ray_release(cur); + if (!parent) { cur = NULL; break; } + ray_retain(parent); + ray_t* k = ray_sym(segs[i]); + cur = ray_dict_remove(parent, k); + ray_release(k); + if (!cur || RAY_IS_ERR(cur)) return RAY_ERR_OOM; + continue; + } + + ray_t* dict_in; + if (parent) { + ray_retain(parent); + dict_in = parent; + } else { + ray_t* keys = ray_sym_vec_new(RAY_SYM_W64, 1); + ray_t* vals = ray_list_new(1); + dict_in = ray_dict_new(keys, vals); + if (!dict_in || RAY_IS_ERR(dict_in)) { ray_release(cur); return RAY_ERR_OOM; } + } + ray_t* k = ray_sym(segs[i]); + ray_t* next = ray_dict_upsert(dict_in, k, cur); + ray_release(k); + ray_release(cur); + if (!next || RAY_IS_ERR(next)) return RAY_ERR_OOM; + cur = next; + } + + /* If cascade reduced the head-level dict to empty (or propagated up + * past a missing parent), rebind the head as NULL so the stale empty + * namespace disappears from introspection and from future lookups. */ + ray_t* to_bind = cur; + if (deleting && cur && cur->type == RAY_DICT && ray_dict_len(cur) == 0) { + to_bind = NULL; + } + ray_err_t err = bind_fn(segs[0], to_bind); + if (cur) ray_release(cur); + return err; +} + +/* Scope-specific base lookups used by env_set_dotted. */ +static ray_t* lookup_global(int64_t sym_id) { + for (int32_t i = 0; i < g_env.count; i++) { + if (g_env.keys[i] == sym_id) return g_env.vals[i]; + } + return NULL; +} + +static ray_t* lookup_top_frame(int64_t sym_id) { + if (scope_depth <= 0) return NULL; + ray_scope_frame_t* f = &scope_stack[scope_depth - 1]; + for (int32_t i = 0; i < f->count; i++) { + if (f->keys[i] == sym_id) return f->vals[i]; + } + return NULL; +} + +/* A sym belongs to the reserved system namespace if its name starts with + * a dot (e.g. `.sys.gc`, `.os.getenv`). The leading segment is the + * category tag; builtin registration populates these via ray_env_bind + * and every user-level binder refuses such names so the system + * bindings can't be shadowed in any scope. */ +bool ray_sym_is_reserved(int64_t sym_id) { + ray_t* s = ray_sym_str(sym_id); + if (!s) return false; + const char* p = ray_str_ptr(s); + size_t n = ray_str_len(s); + return n > 0 && p && p[0] == '.'; +} + +ray_err_t ray_env_bind(int64_t sym_id, ray_t* val) { + if (ray_sym_is_dotted(sym_id)) { + return env_set_dotted(sym_id, val, lookup_global, env_bind_global); + } + return env_bind_global(sym_id, val); +} + +ray_err_t ray_env_bind_flat(int64_t sym_id, ray_t* val) { + return env_bind_global(sym_id, val); +} + +ray_err_t ray_env_set(int64_t sym_id, ray_t* val) { + if (ray_sym_is_reserved(sym_id)) return RAY_ERR_RESERVED; + /* Same machinery as ray_env_bind, but routes through the user-flagged + * binder so the journal snapshot can pick this slot. Without this + * flip, env_bind_global would also be reached via ray_env_bind below + * and the slot would carry user=0 — leaving it out of .qdb. */ + if (ray_sym_is_dotted(sym_id)) + return env_set_dotted(sym_id, val, lookup_global, env_bind_global_user); + return env_bind_global_user(sym_id, val); +} + +ray_err_t ray_env_push_scope(void) { + if (scope_depth >= SCOPE_CAP) return RAY_ERR_OOM; + scope_stack[scope_depth].count = 0; + scope_depth++; + return RAY_OK; +} + +void ray_env_pop_scope(void) { + if (scope_depth <= 0) return; + scope_depth--; + ray_scope_frame_t* f = &scope_stack[scope_depth]; + for (int32_t i = 0; i < f->count; i++) { + if (f->vals[i]) ray_release(f->vals[i]); + } + f->count = 0; +} + +/* ---- Iteration ---- */ + +int32_t ray_env_list(int64_t* sym_ids, ray_t** vals, int32_t max_entries) { + int32_t n = g_env.count < max_entries ? g_env.count : max_entries; + for (int32_t i = 0; i < n; i++) { + sym_ids[i] = g_env.keys[i]; + vals[i] = g_env.vals[i]; + } + return n; +} + +int32_t ray_env_list_user(int64_t* sym_ids, ray_t** vals, int32_t max_entries) { + int32_t out = 0; + for (int32_t i = 0; i < g_env.count && out < max_entries; i++) { + if (!g_env.user[i]) continue; + sym_ids[out] = g_env.keys[i]; + vals[out] = g_env.vals[i]; + out++; + } + return out; +} + +/* ---- Prefix lookup ---- */ + +static const char* s_keywords[] = { + "def", "do", "false", "fn", "if", "let", "set", "true", NULL +}; + +/* Compare helper for qsort on const char* */ +static int cmp_str_ptr(const void* a, const void* b) { + return strcmp(*(const char**)a, *(const char**)b); +} + +bool ray_env_has_name(const char* name, int64_t len) { + if (!name || len <= 0) return false; + for (int32_t i = 0; i < g_env.count; i++) { + ray_t* s = ray_sym_str(g_env.keys[i]); + if (!s) continue; + const char* n = ray_str_ptr(s); + if (!n) continue; + if ((int64_t)strlen(n) == len && memcmp(n, name, (size_t)len) == 0) + return true; + } + for (const char** kw = s_keywords; *kw; kw++) { + if ((int64_t)strlen(*kw) == len && memcmp(*kw, name, (size_t)len) == 0) + return true; + } + return false; +} + +int64_t ray_env_lookup_prefix(const char* prefix, int64_t len, + const char** results, int64_t max_results) { + int64_t count = 0; + + /* Scan global env keys */ + for (int32_t i = 0; i < g_env.count && count < max_results; i++) { + ray_t* s = ray_sym_str(g_env.keys[i]); + if (!s) continue; + const char* name = ray_str_ptr(s); + if (!name) continue; + int64_t nlen = (int64_t)strlen(name); + if (nlen >= len && strncmp(name, prefix, (size_t)len) == 0) { + /* Deduplicate against what we already have */ + int dup = 0; + for (int64_t j = 0; j < count; j++) { + if (strcmp(results[j], name) == 0) { dup = 1; break; } + } + if (!dup) results[count++] = name; + } + } + + /* Scan static keyword list */ + for (const char** kw = s_keywords; *kw && count < max_results; kw++) { + int64_t klen = (int64_t)strlen(*kw); + if (klen >= len && strncmp(*kw, prefix, (size_t)len) == 0) { + int dup = 0; + for (int64_t j = 0; j < count; j++) { + if (strcmp(results[j], *kw) == 0) { dup = 1; break; } + } + if (!dup) results[count++] = *kw; + } + } + + /* Sort alphabetically */ + if (count > 1) { + qsort((void*)results, (size_t)count, sizeof(const char*), cmp_str_ptr); + } + return count; +} + +ray_err_t ray_env_set_local(int64_t sym_id, ray_t* val) { + /* Reserved names (.sys.*, .os.*, .csv.*, .ipc.*) can only be + * populated by builtin registration (ray_env_bind). Refuse at + * every user-reachable binding path so `(let .sys.gc 99)` or a + * lambda parameter named `.sys.gc` cannot shadow the builtin. */ + if (ray_sym_is_reserved(sym_id)) return RAY_ERR_RESERVED; + if (scope_depth <= 0) return ray_env_set(sym_id, val); + if (ray_sym_is_dotted(sym_id)) { + return env_set_dotted(sym_id, val, lookup_top_frame, env_bind_local); + } + return env_bind_local(sym_id, val); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/lang/env.h b/crates/rayforce-sys/vendor/rayforce/src/lang/env.h new file mode 100644 index 0000000..e92b528 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/lang/env.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_ENV_H +#define RAY_ENV_H + +#include +#include "lang/eval.h" + +/* Create function objects. Name stored inline in nullmap[0..15]. + * The function pointer is in the i64 field. */ +ray_t* ray_fn_unary(const char* name, uint8_t fn_attrs, ray_unary_fn fn); +ray_t* ray_fn_binary(const char* name, uint8_t fn_attrs, ray_binary_fn fn); +ray_t* ray_fn_vary(const char* name, uint8_t fn_attrs, ray_vary_fn fn); + +/* Read builtin name from nullmap[2..15] (null-terminated, max 13 chars). + * Bytes 0-1 reserved for DAG opcode on all function types. */ +static inline const char* ray_fn_name(const ray_t* fn) { + return (const char*)fn->nullmap + 2; +} + +/* Global environment: symbol -> function object dict */ +ray_err_t ray_env_init(void); +void ray_env_destroy(void); +ray_t* ray_env_get(int64_t sym_id); + +/* User-facing binder. Refuses any name starting with `.` — that root is + * reserved for system namespaces (.sys, .os, .io, .ipc, …) populated by + * builtin registration. Returns RAY_ERR_RESERVED in that case. */ +ray_err_t ray_env_set(int64_t sym_id, ray_t* val); + +/* Internal binder used by builtin registration. Identical to ray_env_set + * but WITHOUT the reserved-namespace guard. Do NOT call this from user- + * exposed paths; it is the intended way to populate `.sys` / `.os` etc. + * during ray_lang_init. */ +ray_err_t ray_env_bind(int64_t sym_id, ray_t* val); + +/* Flat variant of ray_env_bind: writes the binding directly into the + * global env hash without traversing dotted-segment dict upserts. + * Used to register every fully-qualified builtin name (`.sys.gc`, + * `.os.getenv`, …) alongside the root namespace dict, so prefix + * lookup (REPL completion + highlighter) enumerates them all. */ +ray_err_t ray_env_bind_flat(int64_t sym_id, ray_t* val); + +/* True if a symbol's interned name starts with `.` — i.e. it belongs to + * the reserved namespace populated at startup by builtin registration. + * User-level binders (ray_env_set, ray_env_set_local, lambda parameter + * installer) refuse such names so system bindings can't be shadowed. */ +bool ray_sym_is_reserved(int64_t sym_id); + +/* Resolve a name for a Rayfall expression (tree-walking eval or bytecode + * op_resolve): returns an OWNED ref (rc >= 1) that the caller must + * release, or NULL if undefined. Unlike ray_env_get which returns a + * borrowed ref and leaves refcount management to the caller, env_resolve + * retains before returning — so name-resolution sites can drop their + * manual ray_retain and still participate in the dotted-sym temporal + * extraction path (e.g. `trades.Time.dd`), which allocates fresh values + * mid-walk. */ +ray_t* ray_env_resolve(int64_t sym_id); + +/* Prefix lookup: scan global env + keywords for names starting with prefix. + * Fills results[] with pointers to interned name strings (valid until next + * sym table mutation). Returns count of matches (up to max_results). + * Results are sorted alphabetically. */ +int64_t ray_env_lookup_prefix(const char* prefix, int64_t len, + const char** results, int64_t max_results); + +/* True iff `name[0..len)` is an exact-match global env binding or + * keyword. Does NOT intern the probed string (unlike ray_env_get which + * would need a sym_id). Used by the REPL highlighter to decide whether + * to paint the current word green — the prefix-lookup API returns only + * the first-matching entry, which would misclassify `de` as non-builtin + * when an alphabetically-earlier `desc`/`del` hits the same prefix. */ +bool ray_env_has_name(const char* name, int64_t len); + +/* Iterate global environment entries. + * Fills sym_ids[] and vals[] with up to max_entries items. + * Returns count of entries written. */ +int32_t ray_env_list(int64_t* sym_ids, ray_t** vals, int32_t max_entries); + +/* Iterate ONLY user-defined bindings (slots last written via ray_env_set, + * not ray_env_bind). Powers the journal snapshot — the .qdb file would + * otherwise carry every builtin, which is wasteful and breaks on reload + * because builtin function objects hold absolute pointers from the prior + * process. A user `(set + 42)` over a builtin flips the slot to user- + * defined, so explicit overrides are preserved. */ +int32_t ray_env_list_user(int64_t* sym_ids, ray_t** vals, int32_t max_entries); + +/* Total number of bindings currently in the global env (builtins + + * user). Useful for sizing buffers before ray_env_list. */ +int32_t ray_env_global_count(void); + +/* Local scope stack for lexical binding (let, do, lambda) */ +ray_err_t ray_env_push_scope(void); +void ray_env_pop_scope(void); +ray_err_t ray_env_set_local(int64_t sym_id, ray_t* val); + +#endif /* RAY_ENV_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/lang/eval.c b/crates/rayforce-sys/vendor/rayforce/src/lang/eval.c new file mode 100644 index 0000000..7d3442e --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/lang/eval.c @@ -0,0 +1,2626 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "lang/eval.h" +#include "lang/internal.h" +#include "lang/env.h" +#include "lang/nfo.h" +#include "lang/parse.h" +#include "core/types.h" +#include "ops/ops.h" +#include "ops/temporal.h" +#include "ops/datalog.h" +#include "ops/idxop.h" +#include "ops/linkop.h" +#include "table/sym.h" +#include "core/profile.h" +#include "table/sym.h" +#include "mem/heap.h" +#include "mem/sys.h" +/* store/serde.h, store/splay.h, store/part.h moved to system.c */ +/* ray_lang_print, ray_cast_fn, etc. moved to ops/builtins.c */ +/* ray_error() is declared in (included via eval.h) */ + +#include +#include +#include +#include +#include +#include +#include + +/* Maximum recursion depth for ray_eval() to prevent stack overflow */ +#define RAY_EVAL_MAX_DEPTH 512 +_Thread_local static int eval_depth = 0; + +/* Thread-local nfo for eval context — tracks source locations during evaluation */ +static _Thread_local ray_t* g_eval_nfo = NULL; + +/* Thread-local error trace — list of [span_i64, filename, fn_name, source] frames */ +static _Thread_local ray_t* g_error_trace = NULL; + +/* Interrupt flag — set by REPL signal handler, checked by eval/VM loops */ +static volatile sig_atomic_t g_eval_interrupted = 0; + +void ray_request_interrupt(void) { g_eval_interrupted = 1; } +void ray_clear_interrupt(void) { g_eval_interrupted = 0; } +bool ray_interrupted(void) { return g_eval_interrupted != 0; } + +/* Legacy internal names — thin wrappers kept for existing callers. */ +void ray_eval_request_interrupt(void) { ray_request_interrupt(); } +void ray_eval_clear_interrupt(void) { ray_clear_interrupt(); } +int ray_eval_is_interrupted(void) { return ray_interrupted(); } + +ray_t* ray_eval_get_nfo(void) { return g_eval_nfo; } +void ray_eval_set_nfo(ray_t* nfo) { g_eval_nfo = nfo; } + +ray_t* ray_get_error_trace(void) { return g_error_trace; } +void ray_clear_error_trace(void) { + if (g_error_trace) { ray_release(g_error_trace); g_error_trace = NULL; } +} + +/* ══════════════════════════════════════════ + * Restricted-mode check + * ══════════════════════════════════════════ */ + +static _Thread_local bool g_eval_restricted = false; + +void ray_eval_set_restricted(bool on) { g_eval_restricted = on; } +bool ray_eval_get_restricted(void) { return g_eval_restricted; } + +static inline bool fn_is_restricted(ray_t* fn_obj) { + return g_eval_restricted && (fn_obj->attrs & RAY_FN_RESTRICTED); +} + +/* ══════════════════════════════════════════ + * Error handling: try / raise + * ══════════════════════════════════════════ */ + +static _Thread_local ray_t *__raise_val = NULL; + +/* (raise value) — raise an error with the given value */ +ray_t* ray_raise_fn(ray_t* val) { + if (__raise_val) ray_release(__raise_val); + ray_retain(val); + __raise_val = val; + return ray_error("domain", NULL); +} + +/* (try expr handler) — evaluate expr, if error call handler with error value. + * Special form: receives unevaluated args. */ +ray_t* ray_try_fn(ray_t* expr, ray_t* handler_expr) { + ray_t* result = ray_eval(expr); + if (!RAY_IS_ERR(result)) return result; + + /* Get error value (set by raise, or default for runtime errors) */ + ray_t* err_val = __raise_val; + __raise_val = NULL; + if (!err_val) err_val = make_i64(0); + + /* Evaluate handler expression */ + ray_t* handler = ray_eval(handler_expr); + if (RAY_IS_ERR(handler)) { + ray_release(err_val); + return handler; + } + + /* Call handler with error value */ + ray_t* handler_result; + if (handler->type == RAY_LAMBDA) { + ray_t* args[1] = { err_val }; + handler_result = call_lambda(handler, args, 1); + } else if (handler->type == RAY_UNARY) { + ray_unary_fn fn = (ray_unary_fn)(uintptr_t)handler->i64; + handler_result = fn(err_val); + } else { + handler_result = ray_error("type", NULL); + } + + ray_release(err_val); + ray_release(handler); + return handler_result; +} + +/* ══════════════════════════════════════════ + * FN_ATOMIC auto-mapping helpers + * ══════════════════════════════════════════ */ + +/* Convert a typed vector to a boxed list. If already a list, retains + * and returns it directly. Caller owns the returned object. */ +ray_t* to_boxed_list(ray_t* x) { + if (!x || RAY_IS_ERR(x)) return x; + if (x->type == RAY_LIST) { ray_retain(x); return x; } + if (!ray_is_vec(x)) return ray_error("type", NULL); + + int64_t len = ray_len(x); + ray_t* list = ray_alloc(len * sizeof(ray_t*)); + if (!list) return ray_error("oom", NULL); + list->type = RAY_LIST; + list->len = len; + ray_t** dst = (ray_t**)ray_data(list); + + for (int64_t i = 0; i < len; i++) { + int alloc = 0; + dst[i] = collection_elem(x, i, &alloc); + if (RAY_IS_ERR(dst[i])) { + for (int64_t j = 0; j < i; j++) ray_release(dst[j]); + ray_release(list); + return dst[i]; + } + /* collection_elem always allocates for typed vecs, so ownership transfers */ + } + return list; +} + +/* Unbox a typed vector argument to a boxed list for use in builtins. + * Sets *_bx to the allocated boxed list (caller must release) or NULL. + * Returns the (possibly converted) argument, or an error. */ +ray_t* unbox_vec_arg(ray_t* x, ray_t** _bx) { + *_bx = NULL; + if (x && !RAY_IS_ERR(x) && ray_is_vec(x)) { + *_bx = to_boxed_list(x); + return *_bx; + } + return x; +} + +/* Construct a zero-valued owned atom matching the element type of a + * vector (typed or RAY_LIST). Used only for empty-collection type + * probing by the atomic-map helpers: it lets us invoke a binary or + * unary `fn` with a representative scalar so the result's output + * type is observable even when the input has no elements. + * + * Symbol / string / GUID columns must produce atoms of their own + * element type — falling back to i64(0) for those would make, e.g., + * `(== empty_sym_col 'foo)` probe an integer comparison and return + * I64 instead of the BOOL a non-empty input would yield. Unknown + * element types still fall back to ray_i64(0). */ +static ray_t* zero_atom_for_elem_type(ray_t* coll) { + if (!coll) return ray_i64(0); + if (coll->type == RAY_LIST) return ray_i64(0); + switch (coll->type) { + case RAY_I64: return ray_i64(0); + case RAY_I32: return ray_i32(0); + case RAY_I16: return ray_i16(0); + case RAY_U8: return ray_u8(0); + case RAY_BOOL: return make_bool(0); + case RAY_F64: return make_f64(0.0); + case RAY_DATE: return ray_date(0); + case RAY_TIME: return ray_time(0); + case RAY_TIMESTAMP: return ray_timestamp(0); + case RAY_SYM: return ray_sym(0); + case RAY_STR: return ray_str("", 0); + case RAY_GUID: { + static const uint8_t zero_guid[16] = {0}; + return ray_guid(zero_guid); + } + default: return ray_i64(0); + } +} + +/* Map a binary function element-wise over collections. + * Both args can be collections (zip-map) or one scalar (broadcast). + * Produces typed vectors when output is numeric/bool, boxed lists otherwise. */ +ray_t* atomic_map_binary_op(ray_binary_fn fn, uint16_t dag_opcode, ray_t* left, ray_t* right) { + int left_coll = is_collection(left); + int right_coll = is_collection(right); + + if (!left_coll && !right_coll) return fn(left, right); + + int64_t len; + if (left_coll && right_coll) { + len = ray_len(left) < ray_len(right) ? ray_len(left) : ray_len(right); + } else { + len = left_coll ? ray_len(left) : ray_len(right); + } + + if (len == 0) { + /* Empty collection — no first element to probe, so fabricate a + * zero-valued atom of each operand's element type and run `fn` + * on it to learn the output type. Without this the result was + * hardcoded to I64 and lost the semantics of type-preserving + * ops (e.g. `(xbar empty_TIME_col 10000)` returned an I64 empty + * vector instead of a TIME one). */ + ray_t* la = left_coll ? zero_atom_for_elem_type(left) : left; + ray_t* ra = right_coll ? zero_atom_for_elem_type(right) : right; + ray_t* probe = (la && ra && !RAY_IS_ERR(la) && !RAY_IS_ERR(ra)) + ? fn(la, ra) : NULL; + if (left_coll && la) ray_release(la); + if (right_coll && ra) ray_release(ra); + if (probe && !RAY_IS_ERR(probe) && probe->type < 0) { + int8_t t = (int8_t)(-probe->type); + ray_release(probe); + return ray_vec_new(t, 0); + } + if (probe && !RAY_IS_ERR(probe)) ray_release(probe); + return ray_vec_new(RAY_I64, 0); + } + + /* Probe first element to determine output type */ + int la0 = 0, ra0 = 0; + ray_t* a0 = left_coll ? collection_elem(left, 0, &la0) : left; + ray_t* b0 = right_coll ? collection_elem(right, 0, &ra0) : right; + ray_t* e0; + if (RAY_IS_ERR(a0) || RAY_IS_ERR(b0)) { + e0 = ray_error("type", NULL); + } else if (is_collection(a0) || is_collection(b0)) { + e0 = atomic_map_binary(fn, a0, b0); + } else { + e0 = fn(a0, b0); + } + if (la0) ray_release(a0); + if (ra0) ray_release(b0); + if (RAY_IS_ERR(e0)) return e0; + + int8_t out_type = -(e0->type); /* atom type (-RAY_I64) → vector type (RAY_I64) */ + + /* If either input is a boxed list (mixed types), always use boxed list output + * to preserve type heterogeneity */ + int force_boxed = (left_coll && left->type == RAY_LIST) || + (right_coll && right->type == RAY_LIST); + + /* When the probed result is a null atom, the fn already chose the correct + * result type (e.g., division returns left-operand-typed null). Skip the + * wider-wins promotion so the typed null lands in the right vector type. */ + int e0_null = RAY_ATOM_IS_NULL(e0); + + /* When the probed result is a boolean (from comparison ops like ==, <, etc.), + * preserve the bool output type — do not promote to wider integer type. */ + int e0_bool = (e0->type == -RAY_BOOL); + + /* When LEFT is scalar broadcast to RIGHT vector, the output type follows + * the RIGHT vector's element type for integer types, + * unless float or temporal promotion is involved. */ + if (!e0_null && !e0_bool && !left_coll && right_coll && ray_is_vec(right) && out_type != RAY_F64) { + int8_t vec_type = right->type; + /* Only override for integer family: if probed type is wider int, downcast */ + int out_is_int = (out_type == RAY_I64 || out_type == RAY_I32 || out_type == RAY_I16 || out_type == RAY_U8); + int vec_is_int = (vec_type == RAY_I64 || vec_type == RAY_I32 || vec_type == RAY_I16 || vec_type == RAY_U8); + if (out_is_int && vec_is_int) + out_type = vec_type; + /* For temporal: only override if both are same temporal family */ + if ((vec_type == RAY_DATE || vec_type == RAY_TIME || vec_type == RAY_TIMESTAMP) && + out_type == vec_type) + out_type = vec_type; /* no-op, just keep it */ + } + /* When LEFT is vector and RIGHT is scalar, output follows WIDER integer + * type between left vector and right scalar */ + if (!e0_null && !e0_bool && left_coll && !right_coll && ray_is_vec(left) && out_type != RAY_F64 && + ray_is_atom(right)) { + int8_t vt = left->type, st = -(right->type); + int vt_int = (vt == RAY_I64 || vt == RAY_I32 || vt == RAY_I16 || vt == RAY_U8); + int st_int = (st == RAY_I64 || st == RAY_I32 || st == RAY_I16 || st == RAY_U8); + int out_is_int = (out_type == RAY_I64 || out_type == RAY_I32 || out_type == RAY_I16 || out_type == RAY_U8); + if (out_is_int && vt_int && st_int) + out_type = (vt >= st) ? vt : st; /* wider wins */ + } + /* When both are vectors, output type follows wider integer type */ + if (!e0_null && !e0_bool && left_coll && right_coll && ray_is_vec(left) && ray_is_vec(right) && out_type != RAY_F64) { + int8_t lt = left->type, rt = right->type; + int lt_int = (lt == RAY_I64 || lt == RAY_I32 || lt == RAY_I16 || lt == RAY_U8); + int rt_int = (rt == RAY_I64 || rt == RAY_I32 || rt == RAY_I16 || rt == RAY_U8); + if (lt_int && rt_int) { + /* Pick wider: I64 > I32 > I16 > U8 (using type tag ordering) */ + out_type = (lt >= rt) ? lt : rt; + } + } + + /* When LEFT is a vector collection, override i32 output to match the + * left vector type or i64 (e.g., [DATE]-DATE → i64, [i64]-i32 → i64). + * Keeps i32 only when left vector is actually i32. */ + if (!e0_null && !e0_bool && out_type == RAY_I32 && left_coll && ray_is_vec(left) && left->type != RAY_I32) { + out_type = RAY_I64; + } + + /* ══════════════════════════════════════════════════════════════ + * FAST PATH: opcode-driven vectorized execution. + * I64 ops use direct array loops (lowest overhead). + * F64/comparison ops route through DAG executor. + * ══════════════════════════════════════════════════════════════ */ + + /* Direct array loops — only for cross-temporal and mixed-width cases + * that the DAG can't handle. All same-type ops go through DAG. */ + if (0 && !force_boxed && (dag_opcode == OP_DIV || dag_opcode == OP_MOD)) { + int8_t ltype = left_coll ? left->type : -(left->type); + int8_t rtype = right_coll ? right->type : -(right->type); + int esz_l = (ltype == RAY_I64 || ltype == RAY_TIMESTAMP) ? 8 : + (ltype == RAY_I32 || ltype == RAY_DATE || ltype == RAY_TIME) ? 4 : + (ltype == RAY_I16) ? 2 : (ltype == RAY_U8) ? 1 : 0; + int esz_r = (rtype == RAY_I64 || rtype == RAY_TIMESTAMP) ? 8 : + (rtype == RAY_I32 || rtype == RAY_DATE || rtype == RAY_TIME) ? 4 : + (rtype == RAY_I16) ? 2 : (rtype == RAY_U8) ? 1 : 0; + int lv = left_coll && ray_is_vec(left) && esz_l > 0; + int rv = right_coll && ray_is_vec(right) && esz_r > 0; + int ls = !left_coll && esz_l > 0; + int rs = !right_coll && esz_r > 0; + + /* Cross-type temporal arithmetic (DATE+TIME→TIMESTAMP) needs eval-level + * conversion — only use fast path when types are compatible for raw arithmetic */ + int8_t ltype2 = lv ? left->type : -(left->type); + int8_t rtype2 = rv ? right->type : -(right->type); + int same_class = (esz_l == esz_r) || /* same storage width */ + (ltype2 == RAY_I64 && rtype2 == RAY_I64) || /* both i64 */ + (ltype2 == RAY_TIMESTAMP && rtype2 == RAY_TIMESTAMP) || + /* scalar int + any integer vec is fine (just adds raw values) */ + (ls && (rtype2 == ltype2 || ltype2 == RAY_I64)) || + (rs && (ltype2 == rtype2 || rtype2 == RAY_I64)); + /* Reject cross-temporal: DATE+TIME, TIMESTAMP+DATE, etc. */ + int l_temporal = (ltype2==RAY_DATE||ltype2==RAY_TIME||ltype2==RAY_TIMESTAMP); + int r_temporal = (rtype2==RAY_DATE||rtype2==RAY_TIME||rtype2==RAY_TIMESTAMP); + if (l_temporal && r_temporal && ltype2 != rtype2) same_class = 0; + + if (same_class && ((ls && rv) || (lv && rs) || (lv && rv))) { + /* Read elements as i64 regardless of storage width */ + #define READ_INT(ptr, esz, i) \ + ((esz)==8 ? ((int64_t*)(ptr))[(i)] : \ + (esz)==4 ? (int64_t)((int32_t*)(ptr))[(i)] : \ + (esz)==2 ? (int64_t)((int16_t*)(ptr))[(i)] : \ + (int64_t)((uint8_t*)(ptr))[(i)]) + #define SCALAR_INT(obj) \ + (((obj)->type==-RAY_I64||(obj)->type==-RAY_TIMESTAMP) ? (obj)->i64 : \ + ((obj)->type==-RAY_I32||(obj)->type==-RAY_DATE||(obj)->type==-RAY_TIME) ? (int64_t)(obj)->i32 : \ + ((obj)->type==-RAY_I16) ? (int64_t)(obj)->i16 : (int64_t)(obj)->u8) + + /* Reuse input buffer when rc==1 and type matches (avoids allocation). + * Retain so the caller's ray_release(left/right) doesn't free our output. */ + ray_t* vec; + if (lv && left->rc == 1 && left->type == out_type) { + vec = left; + ray_retain(vec); /* caller will release left; we keep ownership */ + } else if (rv && right->rc == 1 && right->type == out_type) { + vec = right; + ray_retain(vec); + } else { + vec = ray_vec_new(out_type, len); + } + if (!vec || RAY_IS_ERR(vec)) { ray_release(e0); return vec; } + vec->len = len; + + void* ldata = lv ? ray_data(left) : NULL; + void* rdata = rv ? ray_data(right) : NULL; + int64_t lsv = ls ? SCALAR_INT(left) : 0; + int64_t rsv = rs ? SCALAR_INT(right) : 0; + int out_esz = ray_elem_size(out_type); + int l_atom_null = ls && RAY_ATOM_IS_NULL(left); + int r_atom_null = rs && RAY_ATOM_IS_NULL(right); + + #define LA(i) (ldata ? READ_INT(ldata, esz_l, i) : lsv) + #define RA(i) (rdata ? READ_INT(rdata, esz_r, i) : rsv) + + /* Hoist null check: skip per-element null testing when no nulls */ + bool l_has_nulls = l_atom_null || (lv && (left->attrs & RAY_ATTR_HAS_NULLS)); + bool r_has_nulls = r_atom_null || (rv && (right->attrs & RAY_ATTR_HAS_NULLS)); + bool any_nulls = l_has_nulls || r_has_nulls; + void* out_data = ray_data(vec); /* hoist out of loop */ + + if (!any_nulls) { + /* Fast path: no nulls — tight loop, no per-element checks */ + for (int64_t i = 0; i < len; i++) { + int64_t a = LA(i), b = RA(i); + int64_t r; + switch (dag_opcode) { + case OP_ADD: r = (int64_t)((uint64_t)a + (uint64_t)b); break; + case OP_SUB: r = (int64_t)((uint64_t)a - (uint64_t)b); break; + case OP_MUL: r = (int64_t)((uint64_t)a * (uint64_t)b); break; + case OP_DIV: if (b==0) { if (out_esz==8) ((int64_t*)out_data)[i]=0; else if (out_esz==4) ((int32_t*)out_data)[i]=0; else if (out_esz==2) ((int16_t*)out_data)[i]=0; else ((uint8_t*)out_data)[i]=0; ray_vec_set_null(vec,i,true); continue; } + r=a/b; if ((a^b)<0 && r*b!=a) r--; break; + case OP_MOD: if (b==0) { if (out_esz==8) ((int64_t*)out_data)[i]=0; else if (out_esz==4) ((int32_t*)out_data)[i]=0; else if (out_esz==2) ((int16_t*)out_data)[i]=0; else ((uint8_t*)out_data)[i]=0; ray_vec_set_null(vec,i,true); continue; } + r=a%b; if (r && (r^b)<0) r+=b; break; + default: r = 0; break; + } + if (out_esz == 8) ((int64_t*)out_data)[i] = r; + else if (out_esz == 4) ((int32_t*)out_data)[i] = (int32_t)r; + else if (out_esz == 2) ((int16_t*)out_data)[i] = (int16_t)r; + else ((uint8_t*)out_data)[i] = (uint8_t)r; + } + } else { + /* Slow path: check nulls per element */ + #define ISNULL_L(i) (l_atom_null || (lv && ray_vec_is_null(left, i))) + #define ISNULL_R(i) (r_atom_null || (rv && ray_vec_is_null(right, i))) + for (int64_t i = 0; i < len; i++) { + int64_t a = LA(i), b = RA(i); + int64_t r; + if (ISNULL_L(i) || ISNULL_R(i)) { + if (out_esz == 8) ((int64_t*)out_data)[i] = 0; + else if (out_esz == 4) ((int32_t*)out_data)[i] = 0; + else if (out_esz == 2) ((int16_t*)out_data)[i] = 0; + else ((uint8_t*)out_data)[i] = 0; + ray_vec_set_null(vec, i, true); + continue; + } + switch (dag_opcode) { + case OP_ADD: r = (int64_t)((uint64_t)a + (uint64_t)b); break; + case OP_SUB: r = (int64_t)((uint64_t)a - (uint64_t)b); break; + case OP_MUL: r = (int64_t)((uint64_t)a * (uint64_t)b); break; + case OP_DIV: if (b==0) { ((int64_t*)out_data)[i]=0; ray_vec_set_null(vec,i,true); continue; } + r=a/b; if ((a^b)<0 && r*b!=a) r--; break; + case OP_MOD: if (b==0) { ((int64_t*)out_data)[i]=0; ray_vec_set_null(vec,i,true); continue; } + r=a%b; if (r && (r^b)<0) r+=b; break; + default: r = 0; break; + } + if (out_esz == 8) ((int64_t*)out_data)[i] = r; + else if (out_esz == 4) ((int32_t*)out_data)[i] = (int32_t)r; + else if (out_esz == 2) ((int16_t*)out_data)[i] = (int16_t)r; + else ((uint8_t*)out_data)[i] = (uint8_t)r; + } + #undef ISNULL_L + #undef ISNULL_R + } + #undef LA + #undef RA + #undef READ_INT + #undef SCALAR_INT + ray_release(e0); + return vec; + } + } + + /* DAG executor — for F64 and comparisons */ + if (!force_boxed && dag_opcode > 0) { + int is_idiv = (dag_opcode == OP_DIV || dag_opcode == OP_MOD); + int is_cmp = (dag_opcode >= OP_EQ && dag_opcode <= OP_GE); + + /* Classify operands: numeric/temporal vectors or scalars */ + int8_t lt = left_coll ? left->type : -(left->type); + int8_t rt = right_coll ? right->type : -(right->type); + #define IS_NUM_TYPE(t) ((t)==RAY_I64||(t)==RAY_F64||(t)==RAY_I32||(t)==RAY_I16|| \ + (t)==RAY_U8||(t)==RAY_DATE||(t)==RAY_TIME||(t)==RAY_TIMESTAMP) + int l_num_vec = left_coll && ray_is_vec(left) && IS_NUM_TYPE(lt); + int r_num_vec = right_coll && ray_is_vec(right) && IS_NUM_TYPE(rt); + int l_num_scalar = !left_coll && IS_NUM_TYPE(lt); + int r_num_scalar = !right_coll && IS_NUM_TYPE(rt); + #undef IS_NUM_TYPE + + int can_dag = (l_num_vec || r_num_vec) && + (l_num_vec || l_num_scalar) && (r_num_vec || r_num_scalar); + /* Null scalar atoms lose their null bit in DAG constants — use slow path */ + if (l_num_scalar && RAY_ATOM_IS_NULL(left)) can_dag = 0; + if (r_num_scalar && RAY_ATOM_IS_NULL(right)) can_dag = 0; + /* TODO: migrate expr.c to bitmap nulls and remove this bail-out. + * DAG executor (expr.c) still uses sentinel-based null checks. */ + if (l_num_vec && (left->attrs & RAY_ATTR_HAS_NULLS)) can_dag = 0; + if (r_num_vec && (right->attrs & RAY_ATTR_HAS_NULLS)) can_dag = 0; + + /* Div/mod: only I64×I64 (executor has floor-div semantics for I64) */ + if (is_idiv && !(lt == RAY_I64 && rt == RAY_I64)) can_dag = 0; + /* Comparisons: same-type only (cross-type promotion loses type info) */ + if (is_cmp && lt != rt) can_dag = 0; + /* Cross-type temporal: DAG promote() loses type tag (int+TIMESTAMP→I64 not TIMESTAMP) */ + { int lt_temp = (lt==RAY_DATE||lt==RAY_TIME||lt==RAY_TIMESTAMP); + int rt_temp = (rt==RAY_DATE||rt==RAY_TIME||rt==RAY_TIMESTAMP); + if ((lt_temp || rt_temp) && lt != rt) can_dag = 0; + } + + if (can_dag) { + ray_graph_t* g = ray_graph_new(NULL); + if (g) { + /* Build left operand node */ + ray_op_t* lop = NULL; + if (l_num_scalar) { + if (left->type == -RAY_F64) + lop = ray_const_f64(g, left->f64); + else { + int64_t sv = as_i64(left); + lop = ray_const_i64(g, sv); + if (lop) lop->out_type = -(left->type); + } + } else { + lop = ray_const_vec(g, left); + } + ray_op_t* rop = NULL; + if (r_num_scalar) { + if (right->type == -RAY_F64) + rop = ray_const_f64(g, right->f64); + else { + int64_t sv = as_i64(right); + rop = ray_const_i64(g, sv); + if (rop) rop->out_type = -(right->type); + } + } else { + rop = ray_const_vec(g, right); + } + if (lop && rop) { + ray_op_t* root = ray_binop(g, dag_opcode, lop, rop); + if (root) { + /* For integer floor-division: ray_binop sets F64 output + * for OP_DIV; override to I64 for floor-div with null prop */ + if (is_idiv) root->out_type = RAY_I64; + ray_t* result = ray_execute(g, root); + ray_graph_free(g); + if (result && !RAY_IS_ERR(result)) { + /* Restore temporal type tag if promote() collapsed it */ + if (ray_is_vec(result) && result->type != out_type && + ray_elem_size(result->type) == ray_elem_size(out_type)) + result->type = out_type; + /* Floor-div post-pass (OP_DIV only) */ + if (dag_opcode == OP_DIV && ray_is_vec(result) && + result->type == RAY_F64) { + double* d = (double*)ray_data(result); + for (int64_t fi = 0; fi < result->len; fi++) + d[fi] = floor(d[fi]); + } + ray_release(e0); + return result; + } + } else { ray_graph_free(g); } + } else { ray_graph_free(g); } + } + } + } + /* SLOW PATH: per-element scalar loop (fallback for mixed types, temporal, etc.) */ + if (!force_boxed && + (out_type == RAY_I64 || out_type == RAY_F64 || out_type == RAY_I32 || + out_type == RAY_I16 || out_type == RAY_BOOL || out_type == RAY_U8 || + out_type == RAY_DATE || out_type == RAY_TIME || out_type == RAY_TIMESTAMP)) { + ray_t* vec = ray_vec_new(out_type, len); + if (RAY_IS_ERR(vec)) { ray_release(e0); return vec; } + vec->len = len; + store_typed_elem(vec, 0, e0); + ray_release(e0); + + for (int64_t i = 1; i < len; i++) { + int la = 0, ra = 0; + ray_t* a = left_coll ? collection_elem(left, i, &la) : left; + ray_t* b = right_coll ? collection_elem(right, i, &ra) : right; + ray_t* elem = (RAY_IS_ERR(a) || RAY_IS_ERR(b)) + ? ray_error("type", NULL) : fn(a, b); + if (la) ray_release(a); + if (ra) ray_release(b); + if (RAY_IS_ERR(elem)) { ray_release(vec); return elem; } + store_typed_elem(vec, i, elem); + ray_release(elem); + } + return vec; + } + + /* Determine scalar int type for list+scalar coercion. + * When a boxed list is combined with a scalar, integer results + * are coerced to the scalar's integer type (K/q semantics). */ + int8_t scalar_int_type = 0; + if (force_boxed) { + ray_t* scalar = (!left_coll) ? left : (!right_coll ? right : NULL); + if (scalar && ray_is_atom(scalar)) { + int8_t st = scalar->type; + if (st == -RAY_I16 || st == -RAY_I32 || st == -RAY_I64 || st == -RAY_U8) + scalar_int_type = st; + } + } + + /* Coerce an integer atom to the scalar's integer type */ + #define COERCE_TO_SCALAR(elem) do { \ + if (scalar_int_type && ray_is_atom(elem) && elem->type != scalar_int_type && \ + elem->type != -RAY_F64 && is_numeric(elem)) { \ + int64_t _v = as_i64(elem); \ + ray_t* _coerced = make_typed_int(scalar_int_type, _v); \ + ray_release(elem); \ + elem = _coerced; \ + } \ + } while(0) + + /* Fallback: boxed list for non-numeric output or mixed-type input */ + COERCE_TO_SCALAR(e0); + ray_t* result = ray_alloc(len * sizeof(ray_t*)); + if (!result) { ray_release(e0); return ray_error("oom", NULL); } + result->type = RAY_LIST; + result->len = len; + ray_t** out = (ray_t**)ray_data(result); + out[0] = e0; /* first element already computed */ + + for (int64_t i = 1; i < len; i++) { + int la = 0, ra = 0; + ray_t* a = left_coll ? collection_elem(left, i, &la) : left; + ray_t* b = right_coll ? collection_elem(right, i, &ra) : right; + ray_t* elem; + if (RAY_IS_ERR(a) || RAY_IS_ERR(b)) { + elem = ray_error("type", NULL); + } else if (is_collection(a) || is_collection(b)) { + /* Recursive auto-map when list element is itself a collection */ + elem = atomic_map_binary(fn, a, b); + } else { + elem = fn(a, b); + } + if (la) ray_release(a); + if (ra) ray_release(b); + if (RAY_IS_ERR(elem)) { + for (int64_t j = 0; j < i; j++) ray_release(out[j]); + ray_release(result); + return elem; + } + COERCE_TO_SCALAR(elem); + out[i] = elem; + } + #undef COERCE_TO_SCALAR + return result; +} + +/* Map a unary function element-wise over a collection. + * Produces typed vectors when output is numeric/bool, boxed lists otherwise. */ +ray_t* atomic_map_unary(ray_unary_fn fn, ray_t* arg) { + if (!is_collection(arg)) return fn(arg); + + int64_t len = ray_len(arg); + + if (len == 0) { + /* Empty — fabricate a zero atom of the element type and run + * `fn` to learn the output type; fall back to I64 if the + * probe can't resolve a typed atom. */ + ray_t* z = zero_atom_for_elem_type(arg); + ray_t* probe = (z && !RAY_IS_ERR(z)) ? fn(z) : NULL; + if (z) ray_release(z); + if (probe && !RAY_IS_ERR(probe) && probe->type < 0) { + int8_t t = (int8_t)(-probe->type); + ray_release(probe); + return ray_vec_new(t, 0); + } + if (probe && !RAY_IS_ERR(probe)) ray_release(probe); + return ray_vec_new(RAY_I64, 0); + } + + /* Probe first element to determine output type */ + int alloc0 = 0; + ray_t* e0_in = collection_elem(arg, 0, &alloc0); + ray_t* e0 = RAY_IS_ERR(e0_in) ? e0_in : fn(e0_in); + if (alloc0) ray_release(e0_in); + if (RAY_IS_ERR(e0)) return e0; + + int8_t out_type = -(e0->type); + + /* Try typed vector path for numeric/bool/temporal output */ + if (out_type == RAY_I64 || out_type == RAY_F64 || out_type == RAY_I32 || + out_type == RAY_I16 || out_type == RAY_BOOL || out_type == RAY_U8 || + out_type == RAY_DATE || out_type == RAY_TIME || out_type == RAY_TIMESTAMP) { + ray_t* vec = ray_vec_new(out_type, len); + if (RAY_IS_ERR(vec)) { ray_release(e0); return vec; } + vec->len = len; + store_typed_elem(vec, 0, e0); + ray_release(e0); + + for (int64_t i = 1; i < len; i++) { + int alloc = 0; + ray_t* e = collection_elem(arg, i, &alloc); + ray_t* elem = RAY_IS_ERR(e) ? e : fn(e); + if (alloc) ray_release(e); + if (RAY_IS_ERR(elem)) { ray_release(vec); return elem; } + store_typed_elem(vec, i, elem); + ray_release(elem); + } + return vec; + } + + /* Fallback: boxed list for non-numeric output */ + ray_t* result = ray_alloc(len * sizeof(ray_t*)); + if (!result) { ray_release(e0); return ray_error("oom", NULL); } + result->type = RAY_LIST; + result->len = len; + ray_t** out = (ray_t**)ray_data(result); + out[0] = e0; + + for (int64_t i = 1; i < len; i++) { + int alloc = 0; + ray_t* e = collection_elem(arg, i, &alloc); + ray_t* elem = RAY_IS_ERR(e) ? e : fn(e); + if (alloc) ray_release(e); + if (RAY_IS_ERR(elem)) { + for (int64_t j = 0; j < i; j++) ray_release(out[j]); + ray_release(result); + return elem; + } + out[i] = elem; + } + return result; +} + +/* ══════════════════════════════════════════ + * Higher-order functions: map, pmap, fold, scan, filter, apply + * ══════════════════════════════════════════ */ + +/* Helper: call a function object with 1 arg, returning result. + * Handles UNARY, BINARY, LAMBDA types. Does not release fn or arg. */ +ray_t* call_fn1(ray_t* fn, ray_t* arg) { + if (fn_is_restricted(fn)) return ray_error("access", "restricted"); + if (fn->type == RAY_UNARY) { + ray_unary_fn f = (ray_unary_fn)(uintptr_t)fn->i64; + if ((fn->attrs & RAY_FN_ATOMIC) && is_collection(arg)) + return atomic_map_unary(f, arg); + return f(arg); + } + if (fn->type == RAY_LAMBDA) { + ray_t* args[1] = { arg }; + return call_lambda(fn, args, 1); + } + return ray_error("type", NULL); +} + +/* Helper: call a function object with 2 args. Does not release fn or args. */ +ray_t* call_fn2(ray_t* fn, ray_t* a, ray_t* b) { + if (fn_is_restricted(fn)) return ray_error("access", "restricted"); + if (fn->type == RAY_BINARY) { + ray_binary_fn f = (ray_binary_fn)(uintptr_t)fn->i64; + if ((fn->attrs & RAY_FN_ATOMIC) && (is_collection(a) || is_collection(b))) + return atomic_map_binary(f, a, b); + return f(a, b); + } + if (fn->type == RAY_LAMBDA) { + ray_t* args[2] = { a, b }; + return call_lambda(fn, args, 2); + } + if (fn->type == RAY_UNARY) { + /* Partial application not supported, just call with first arg */ + ray_unary_fn f = (ray_unary_fn)(uintptr_t)fn->i64; + return f(a); + } + return ray_error("type", NULL); +} + + +/* ══════════════════════════════════════════ + * Sorting builtins + * ══════════════════════════════════════════ */ + +/* Reorder vector elements by an index array */ +ray_t* gather_by_idx(ray_t* vec, int64_t* idx, int64_t n) { + int8_t type = vec->type; + + /* Check nulls once — resolve through slices */ + bool has_nulls = (vec->attrs & RAY_ATTR_HAS_NULLS) || + ((vec->attrs & RAY_ATTR_SLICE) && vec->slice_parent && + (vec->slice_parent->attrs & RAY_ATTR_HAS_NULLS)); + + if (type == RAY_STR) { + ray_t* result = ray_vec_new(type, n); + if (RAY_IS_ERR(result)) return result; + result->len = n; + for (int64_t i = 0; i < n; i++) { + if (has_nulls && ray_vec_is_null(vec, idx[i])) { + result = ray_str_vec_set(result, i, "", 0); + ray_vec_set_null(result, i, true); + } else { + size_t slen; + const char* s = ray_str_vec_get(vec, idx[i], &slen); + result = ray_str_vec_set(result, i, s ? s : "", s ? slen : 0); + } + } + return result; + } + + /* RAY_SYM: use adaptive width, create with matching width */ + if (type == RAY_SYM) { + uint8_t w = vec->attrs & RAY_SYM_W_MASK; + ray_t* result = ray_sym_vec_new(w, n); + if (RAY_IS_ERR(result)) return result; + result->len = n; + uint8_t esz = (uint8_t)RAY_SYM_ELEM(w); + char* src = (char*)ray_data(vec); + char* dst = (char*)ray_data(result); + switch (esz) { + case 8: for (int64_t i = 0; i < n; i++) memcpy(dst + i*8, src + idx[i]*8, 8); break; + case 4: for (int64_t i = 0; i < n; i++) memcpy(dst + i*4, src + idx[i]*4, 4); break; + case 2: for (int64_t i = 0; i < n; i++) memcpy(dst + i*2, src + idx[i]*2, 2); break; + case 1: for (int64_t i = 0; i < n; i++) dst[i] = src[idx[i]]; break; + default: for (int64_t i = 0; i < n; i++) memcpy(dst + i*esz, src + idx[i]*esz, esz); break; + } + if (vec->sym_dict) { + ray_retain(vec->sym_dict); + result->sym_dict = vec->sym_dict; + } + if (has_nulls) { + for (int64_t i = 0; i < n; i++) + if (ray_vec_is_null(vec, idx[i])) + ray_vec_set_null(result, i, true); + } + return result; + } + + /* LIST: pointer gather with retain */ + if (type == RAY_LIST) { + ray_t* result = ray_alloc(n * sizeof(ray_t*)); + if (!result || RAY_IS_ERR(result)) return result ? result : ray_error("oom", NULL); + result->type = type; + result->len = n; + ray_t** src_ptrs = (ray_t**)ray_data(vec); + ray_t** dst_ptrs = (ray_t**)ray_data(result); + for (int64_t i = 0; i < n; i++) { + dst_ptrs[i] = src_ptrs[idx[i]]; + if (dst_ptrs[i]) ray_retain(dst_ptrs[i]); + } + return result; + } + + ray_t* result = ray_vec_new(type, n); + if (RAY_IS_ERR(result)) return result; + result->len = n; + uint8_t esz = ray_type_sizes[type]; + char* src = (char*)ray_data(vec); + char* dst = (char*)ray_data(result); + /* Typed gather — compiler constant esz enables vectorization, alias-safe */ + switch (esz) { + case 8: for (int64_t i = 0; i < n; i++) memcpy(dst + i*8, src + idx[i]*8, 8); break; + case 4: for (int64_t i = 0; i < n; i++) memcpy(dst + i*4, src + idx[i]*4, 4); break; + case 2: for (int64_t i = 0; i < n; i++) memcpy(dst + i*2, src + idx[i]*2, 2); break; + case 1: for (int64_t i = 0; i < n; i++) dst[i] = src[idx[i]]; break; + default: for (int64_t i = 0; i < n; i++) memcpy(dst + i*esz, src + idx[i]*esz, esz); break; + case 16: for (int64_t i = 0; i < n; i++) memcpy(dst + i*16, src + idx[i]*16, 16); break; + } + + /* Propagate null bitmap */ + if (has_nulls) { + for (int64_t i = 0; i < n; i++) + if (ray_vec_is_null(vec, idx[i])) + ray_vec_set_null(result, i, true); + } + + return result; +} + +/* ══════════════════════════════════════════ + * Table construction and access + * ══════════════════════════════════════════ */ + +/* (list v1 v2 ...) — package args into a list */ +ray_t* ray_list_fn(ray_t** args, int64_t n) { + ray_t* result = ray_alloc(n * sizeof(ray_t*)); + if (!result) return ray_error("oom", NULL); + result->type = RAY_LIST; + result->len = n; + ray_t** out = (ray_t**)ray_data(result); + for (int64_t i = 0; i < n; i++) { + ray_retain(args[i]); + out[i] = args[i]; + } + return result; +} + +/* (table [col_names] (list col1 col2 ...)) — build a RAY_TABLE */ +ray_t* ray_table_fn(ray_t* names, ray_t* cols) { + ray_t *_bxn = NULL, *_bxc = NULL; + names = unbox_vec_arg(names, &_bxn); + if (RAY_IS_ERR(names)) return names; + cols = unbox_vec_arg(cols, &_bxc); + if (RAY_IS_ERR(cols)) { if (_bxn) ray_release(_bxn); return cols; } + if (!is_list(names) || !is_list(cols)) { if (_bxn) ray_release(_bxn); if (_bxc) ray_release(_bxc); return ray_error("type", NULL); } + int64_t ncols = ray_len(names); + if (ray_len(cols) != ncols) { if (_bxn) ray_release(_bxn); if (_bxc) ray_release(_bxc); return ray_error("domain", NULL); } + + ray_t** name_elems = (ray_t**)ray_data(names); + ray_t** col_elems = (ray_t**)ray_data(cols); + int64_t expected_rows = -1; + + ray_t* tbl = ray_table_new(ncols); + if (RAY_IS_ERR(tbl)) { if (_bxn) ray_release(_bxn); if (_bxc) ray_release(_bxc); return tbl; } + + for (int64_t i = 0; i < ncols; i++) { + if (name_elems[i]->type != -RAY_SYM) + { ray_release(tbl); if (_bxn) ray_release(_bxn); if (_bxc) ray_release(_bxc); return ray_error("type", NULL); } + int64_t name_id = name_elems[i]->i64; + + /* Convert Rayfall list (or typed vec) to typed column vector */ + ray_t* col_src = col_elems[i]; + + /* Single atom → wrap in a 1-element vector */ + ray_t* atom_wrap = NULL; + if (ray_is_atom(col_src) && col_src->type != -RAY_SYM) { + int8_t atype = -col_src->type; + if (atype == RAY_GUID) { + atom_wrap = ray_vec_new(RAY_GUID, 1); + if (!RAY_IS_ERR(atom_wrap) && col_src->obj) + memcpy(ray_data(atom_wrap), ray_data(col_src->obj), 16); + if (!RAY_IS_ERR(atom_wrap)) atom_wrap->len = 1; + } else if (atype == RAY_TIMESTAMP || atype == RAY_I64 || atype == RAY_SYM) { + atom_wrap = ray_vec_new(atype, 1); + if (!RAY_IS_ERR(atom_wrap)) { ((int64_t*)ray_data(atom_wrap))[0] = col_src->i64; atom_wrap->len = 1; } + } else if (atype == RAY_F64) { + atom_wrap = ray_vec_new(RAY_F64, 1); + if (!RAY_IS_ERR(atom_wrap)) { ((double*)ray_data(atom_wrap))[0] = col_src->f64; atom_wrap->len = 1; } + } else if (atype == RAY_DATE || atype == RAY_TIME || atype == RAY_I32) { + atom_wrap = ray_vec_new(atype, 1); + if (!RAY_IS_ERR(atom_wrap)) { ((int32_t*)ray_data(atom_wrap))[0] = col_src->i32; atom_wrap->len = 1; } + } else if (atype == RAY_BOOL) { + atom_wrap = ray_vec_new(RAY_BOOL, 1); + if (!RAY_IS_ERR(atom_wrap)) { ((uint8_t*)ray_data(atom_wrap))[0] = col_src->b8; atom_wrap->len = 1; } + } + if (atom_wrap && !RAY_IS_ERR(atom_wrap)) col_src = atom_wrap; + } + + /* If the column is already a typed vector, use it directly */ + if (ray_is_vec(col_src)) { + int64_t nrows = ray_len(col_src); + if (expected_rows < 0) expected_rows = nrows; + else if (nrows != expected_rows) + { ray_release(tbl); if (_bxn) ray_release(_bxn); if (_bxc) ray_release(_bxc); return ray_error("domain", NULL); } + ray_retain(col_src); + tbl = ray_table_add_col(tbl, name_id, col_src); + ray_release(col_src); + if (RAY_IS_ERR(tbl)) { if (_bxn) ray_release(_bxn); if (_bxc) ray_release(_bxc); return tbl; } + continue; + } + + if (!is_list(col_src)) + { ray_release(tbl); if (_bxn) ray_release(_bxn); if (_bxc) ray_release(_bxc); return ray_error("type", NULL); } + int64_t nrows = ray_len(col_src); + + /* Validate all columns have consistent row count */ + if (expected_rows < 0) expected_rows = nrows; + else if (nrows != expected_rows) + { ray_release(tbl); if (_bxn) ray_release(_bxn); if (_bxc) ray_release(_bxc); return ray_error("domain", NULL); } + + ray_t** row_elems = (ray_t**)ray_data(col_src); + + /* If the LIST contains non-atom values (e.g. nested vectors for an + * embedding column), store the LIST as the column directly rather + * than trying to build a typed vector from non-atomic elements. */ + if (nrows > 0 && row_elems[0] && !ray_is_atom(row_elems[0])) { + ray_retain(col_src); + tbl = ray_table_add_col(tbl, name_id, col_src); + ray_release(col_src); + if (RAY_IS_ERR(tbl)) { if (_bxn) ray_release(_bxn); if (_bxc) ray_release(_bxc); return tbl; } + continue; + } + + /* Determine column type from elements (scan for mixed I64/F64 → F64) */ + int8_t col_type = RAY_I64; + if (nrows > 0) { + if (row_elems[0]->type == -RAY_F64) col_type = RAY_F64; + else if (row_elems[0]->type == -RAY_BOOL) col_type = RAY_BOOL; + else if (row_elems[0]->type == -RAY_SYM) col_type = RAY_SYM; + else if (row_elems[0]->type == -RAY_STR) col_type = RAY_STR; + else if (row_elems[0]->type == -RAY_GUID) col_type = RAY_GUID; + else if (row_elems[0]->type == -RAY_TIMESTAMP) col_type = RAY_TIMESTAMP; + else if (row_elems[0]->type == -RAY_DATE) col_type = RAY_DATE; + else if (row_elems[0]->type == -RAY_TIME) col_type = RAY_TIME; + /* RAY_CHAR removed — char atoms are now -RAY_STR */ + } + /* Promote I64 → F64 if any element is F64 */ + if (col_type == RAY_I64) { + for (int64_t j = 0; j < nrows; j++) { + if (row_elems[j]->type == -RAY_F64) { col_type = RAY_F64; break; } + } + } + + ray_t* col_vec = ray_vec_new(col_type, nrows); + if (RAY_IS_ERR(col_vec)) + { ray_release(tbl); if (_bxn) ray_release(_bxn); if (_bxc) ray_release(_bxc); return col_vec; } + + for (int64_t j = 0; j < nrows; j++) { + if (col_type == RAY_STR) { + if (row_elems[j]->type != -RAY_STR) { + ray_release(col_vec); ray_release(tbl); + if (_bxn) ray_release(_bxn); + if (_bxc) ray_release(_bxc); + return ray_error("type", NULL); + } + const char *sptr = ray_str_ptr(row_elems[j]); + size_t slen = ray_str_len(row_elems[j]); + col_vec = ray_str_vec_append(col_vec, sptr, slen); + } else if (col_type == RAY_GUID) { + if (row_elems[j]->type != -RAY_GUID || !row_elems[j]->obj) { + ray_release(col_vec); ray_release(tbl); + if (_bxn) ray_release(_bxn); + if (_bxc) ray_release(_bxc); + return ray_error("type", NULL); + } + col_vec = ray_vec_append(col_vec, ray_data(row_elems[j]->obj)); + } else { + /* Validate each element matches the column type (allow I64→F64 promotion) */ + int type_ok = (row_elems[j]->type == -col_type); + if (!type_ok && col_type == RAY_F64 && row_elems[j]->type == -RAY_I64) type_ok = 1; + if (!type_ok) { + ray_release(col_vec); ray_release(tbl); + if (_bxn) ray_release(_bxn); + if (_bxc) ray_release(_bxc); + return ray_error("type", NULL); + } + void* val_ptr; + double promoted; + if (col_type == RAY_F64 && row_elems[j]->type == -RAY_I64) { + promoted = (double)row_elems[j]->i64; + val_ptr = &promoted; + } else if (col_type == RAY_I64) val_ptr = &row_elems[j]->i64; + else if (col_type == RAY_F64) val_ptr = &row_elems[j]->f64; + else if (col_type == RAY_BOOL) val_ptr = &row_elems[j]->b8; + else val_ptr = &row_elems[j]->i64; /* SYM/TIMESTAMP/DATE/TIME stored as i64 */ + col_vec = ray_vec_append(col_vec, val_ptr); + } + if (RAY_IS_ERR(col_vec)) + { ray_release(tbl); if (_bxn) ray_release(_bxn); if (_bxc) ray_release(_bxc); return col_vec; } + } + + tbl = ray_table_add_col(tbl, name_id, col_vec); + ray_release(col_vec); + if (RAY_IS_ERR(tbl)) { if (_bxn) ray_release(_bxn); if (_bxc) ray_release(_bxc); return tbl; } + } + + if (_bxn) ray_release(_bxn); + if (_bxc) ray_release(_bxc); + return tbl; +} + +/* (key dict/table) — return keys vector */ +ray_t* ray_key_fn(ray_t* x) { + if (x->type == RAY_DICT) { + ray_t* keys = ray_dict_keys(x); + if (!keys) return ray_error("type", NULL); + ray_retain(keys); + return keys; + } + if (x->type != RAY_TABLE) return ray_error("type", NULL); + int64_t ncols = ray_table_ncols(x); + ray_t* vec = ray_vec_new(RAY_SYM, ncols); + if (RAY_IS_ERR(vec)) return vec; + vec->len = ncols; + int64_t* out = (int64_t*)ray_data(vec); + for (int64_t i = 0; i < ncols; i++) + out[i] = ray_table_col_name(x, i); + return vec; +} + +/* (value dict/table) — extract values */ +ray_t* ray_value_fn(ray_t* x) { + /* Table: return list of column vectors */ + if (x->type == RAY_TABLE) { + /* Table cols slot is a RAY_LIST already — return a fresh copy. */ + int64_t ncols = ray_table_ncols(x); + ray_t* result = ray_list_new(ncols); + if (!result || RAY_IS_ERR(result)) return result ? result : ray_error("oom", NULL); + for (int64_t i = 0; i < ncols; i++) { + ray_t* c = ray_table_get_col_idx(x, i); + result = ray_list_append(result, c); + if (RAY_IS_ERR(result)) return result; + } + return result; + } + if (x->type != RAY_DICT) return ray_error("type", NULL); + ray_t* vals = ray_dict_vals(x); + if (!vals) return ray_error("type", NULL); + ray_retain(vals); + return vals; +} + + + +/* ray_lang_print, fmt_interpolate, ray_println_fn, ray_show_fn, ray_format_fn, + * ray_resolve_fn, ray_timeit_fn, ray_exit_fn, resolve_type_name, + * ray_read_csv_fn, ray_write_csv_fn, cast_match, ray_cast_fn, ray_type_fn, + * ray_read_file_fn, ray_load_file_fn, ray_write_file_fn + * moved to ops/builtins.c */ + +/* ══════════════════════════════════════════ + * Special forms: set, let, if, do + * ══════════════════════════════════════════ */ + +/* (set name value) — bind in global env. Receives unevaluated args. */ +ray_t* ray_set_fn(ray_t* name_obj, ray_t* val_expr) { + if (name_obj->type != -RAY_SYM) + return ray_error("type", NULL); + ray_t* val = ray_eval(val_expr); + if (RAY_IS_ERR(val)) return val; + /* Materialize lazy handles before binding */ + if (ray_is_lazy(val)) + val = ray_lazy_materialize(val); + if (RAY_IS_ERR(val)) return val; + ray_err_t err = ray_env_set(name_obj->i64, val); + if (err != RAY_OK) { + ray_release(val); + return ray_error(ray_err_code_str(err), NULL); + } + return val; /* set returns the value */ +} + +/* (let name value) — bind in local scope. Receives unevaluated args. */ +ray_t* ray_let_fn(ray_t* name_obj, ray_t* val_expr) { + if (name_obj->type != -RAY_SYM) + return ray_error("type", NULL); + ray_t* val = ray_eval(val_expr); + if (RAY_IS_ERR(val)) return val; + /* Materialize lazy handles before binding */ + if (ray_is_lazy(val)) + val = ray_lazy_materialize(val); + if (RAY_IS_ERR(val)) return val; + ray_err_t err = ray_env_set_local(name_obj->i64, val); + if (err != RAY_OK) { ray_release(val); return ray_error(ray_err_code_str(err), NULL); } + return val; +} + +/* (if cond then else?) — conditional. Receives unevaluated args. */ +ray_t* ray_cond_fn(ray_t** args, int64_t n) { + if (n < 2) return ray_error("domain", NULL); + ray_t* cond = ray_eval(args[0]); + if (RAY_IS_ERR(cond)) return cond; + /* Materialize lazy handles before testing truthiness */ + if (ray_is_lazy(cond)) + cond = ray_lazy_materialize(cond); + if (RAY_IS_ERR(cond)) return cond; + int truthy = is_truthy(cond); + ray_release(cond); + if (truthy) return ray_eval(args[1]); + if (n >= 3) return ray_eval(args[2]); + /* No else branch: return 0 */ + return make_i64(0); +} + +/* (do expr1 expr2 ...) — evaluate in sequence, return last. Pushes local scope. */ +ray_t* ray_do_fn(ray_t** args, int64_t n) { + if (n == 0) return make_i64(0); + if (ray_env_push_scope() != RAY_OK) return ray_error("oom", NULL); + ray_t* result = NULL; + for (int64_t i = 0; i < n; i++) { + if (result) ray_release(result); + result = ray_eval(args[i]); + if (RAY_IS_ERR(result)) { + ray_env_pop_scope(); + return result; + } + } + ray_env_pop_scope(); + return result; +} + +/* ══════════════════════════════════════════ + * Lambda functions + * ══════════════════════════════════════════ */ + +/* (fn [params...] body...) — create a lambda object. + * Stores params list and body expressions in data area. */ +ray_t* ray_fn(ray_t** args, int64_t n) { + if (n < 2) return ray_error("domain", NULL); + /* args[0] = param vector (list of name symbols), args[1..n-1] = body exprs */ + ray_t* params_list = args[0]; + + /* Reject lambda parameters named under the reserved `.` namespace. + * Even though the bytecode VM resolves them to slot indices rather + * than env entries, a user-defined fn with `.sys.gc` as a parameter + * would silently override the builtin inside the body via the + * compile-time name→slot map — that counts as shadowing and is + * disallowed for the same reason `(let .sys.gc ...)` is. Lambda + * param lists are SYM *vectors* (not RAY_LISTs): `[a b c]` of all + * syms is stored as a flat i64 sym-id array. */ + if (params_list) { + int64_t nparams = ray_len(params_list); + if (params_list->type == RAY_SYM) { + int64_t* ids = (int64_t*)ray_data(params_list); + for (int64_t i = 0; i < nparams; i++) + if (ray_sym_is_reserved(ids[i])) + return ray_error("reserve", + "lambda parameter '%s' is in the reserved namespace", + ray_str_ptr(ray_sym_str(ids[i]))); + } else if (params_list->type == RAY_LIST) { + ray_t** pelems = (ray_t**)ray_data(params_list); + for (int64_t i = 0; i < nparams; i++) { + ray_t* p = pelems[i]; + if (p && p->type == -RAY_SYM && ray_sym_is_reserved(p->i64)) + return ray_error("reserve", + "lambda parameter '%s' is in the reserved namespace", + ray_str_ptr(ray_sym_str(p->i64))); + } + } + } + + /* Create lambda object with space for 7 slots: + * [0] params, [1] body, [2] bytecode, [3] constants, [4] n_locals, + * [5] nfo (source location), [6] dbg (debug metadata) */ + ray_t* lambda = ray_alloc(7 * sizeof(ray_t*)); + if (!lambda) return ray_error("oom", NULL); + lambda->type = RAY_LAMBDA; + lambda->attrs = 0; + lambda->len = 0; + + /* Store params list */ + ray_retain(params_list); + LAMBDA_PARAMS(lambda) = params_list; + + /* Build body list: wrap body expressions in a RAY_LIST */ + int64_t body_count = n - 1; + ray_t* body = ray_alloc(body_count * sizeof(ray_t*)); + if (!body) { + ray_release(params_list); + ray_release(lambda); + return ray_error("oom", NULL); + } + body->type = RAY_LIST; + body->len = body_count; + ray_t** body_elems = (ray_t**)ray_data(body); + for (int64_t i = 0; i < body_count; i++) { + ray_retain(args[i + 1]); + body_elems[i] = args[i + 1]; + } + LAMBDA_BODY(lambda) = body; + + /* Clear compiled slots */ + LAMBDA_BC(lambda) = NULL; + LAMBDA_CONSTS(lambda) = NULL; + LAMBDA_NLOCALS(lambda) = 0; + + /* Attach source location info from current eval context */ + if (g_eval_nfo) { + LAMBDA_NFO(lambda) = g_eval_nfo; + ray_retain(g_eval_nfo); + } else { + LAMBDA_NFO(lambda) = NULL; + } + LAMBDA_DBG(lambda) = NULL; + + return lambda; +} + +/* Build a [span_i64, filename, fn_name, source] frame from a resolved span + * and append it to g_error_trace. Shared by the bytecode and eval paths. */ +static void append_error_frame(ray_t* nfo, ray_span_t span) { + if (span.id == 0) return; + + ray_t* frame = ray_alloc(4 * sizeof(ray_t*)); + if (!frame || RAY_IS_ERR(frame)) return; + frame->type = RAY_LIST; + frame->len = 4; + ray_t** fe = (ray_t**)ray_data(frame); + + fe[0] = ray_i64(span.id); + if (nfo && NFO_FILENAME(nfo)) { + fe[1] = NFO_FILENAME(nfo); + ray_retain(fe[1]); + } else { + fe[1] = ray_str("", 9); + } + fe[2] = NULL; + if (nfo && NFO_SOURCE(nfo)) { + fe[3] = NFO_SOURCE(nfo); + ray_retain(fe[3]); + } else { + fe[3] = ray_str("", 0); + } + + if (!g_error_trace) { + g_error_trace = ray_alloc(sizeof(ray_t*)); + if (!g_error_trace) { ray_release(frame); return; } + g_error_trace->type = RAY_LIST; + g_error_trace->len = 1; + ((ray_t**)ray_data(g_error_trace))[0] = frame; + } else { + g_error_trace = ray_list_append(g_error_trace, frame); + ray_release(frame); + } +} + +/* Build a single error trace frame from a lambda's debug/nfo info at the given + * bytecode IP. */ +static void add_error_frame(ray_t* fn, int32_t ip) { + if (!fn || fn->type != RAY_LAMBDA) return; + ray_t* dbg = LAMBDA_DBG(fn); + ray_t* nfo = LAMBDA_NFO(fn); + if (!dbg && !nfo) return; + + ray_span_t span = {0}; + if (dbg) span = ray_bc_dbg_get(dbg, ip); + append_error_frame(nfo, span); +} + +/* Add error frame from eval context (nfo + AST node) for call-site errors. */ +static void add_eval_error_frame(ray_t* nfo, ray_t* node) { + if (!nfo || !node) return; + append_error_frame(nfo, ray_nfo_get(nfo, node)); +} + +/* Execute compiled bytecode for a lambda. */ +static ray_t* vm_exec(ray_t* lambda, ray_t** call_args, int64_t argc); + +/* Call a lambda: compile on first call, then execute bytecode. */ +ray_t* call_lambda(ray_t* lambda, ray_t** call_args, int64_t argc) { + /* Lazy compilation on first call */ + if (!LAMBDA_IS_COMPILED(lambda)) { + ray_compile(lambda); + } + + /* If compilation succeeded, run bytecode; otherwise fall back to tree-walk */ + if (LAMBDA_IS_COMPILED(lambda)) { + return vm_exec(lambda, call_args, argc); + } + + /* Fallback: tree-walking interpreter */ + ray_t* params_list = LAMBDA_PARAMS(lambda); + ray_t* body = LAMBDA_BODY(lambda); + + int64_t param_count = ray_len(params_list); + + if (argc != param_count) + return ray_error("arity", "expected %" PRId64 " args, got %" PRId64, param_count, argc); + + if (ray_env_push_scope() != RAY_OK) return ray_error("oom", NULL); + + /* Bind 'self' to the current lambda for recursion */ + { + static int64_t self_sym_id = -1; + if (self_sym_id < 0) self_sym_id = ray_sym_intern("self", 4); + ray_env_set_local(self_sym_id, lambda); + } + + int64_t* param_ids = (int64_t*)ray_data(params_list); + for (int64_t i = 0; i < param_count && i < argc; i++) { + (void)ray_env_set_local(param_ids[i], call_args[i]); + } + + int64_t body_count = ray_len(body); + ray_t** body_exprs = (ray_t**)ray_data(body); + ray_t* result = NULL; + for (int64_t i = 0; i < body_count; i++) { + if (result) ray_release(result); + result = ray_eval(body_exprs[i]); + if (RAY_IS_ERR(result)) { + ray_env_pop_scope(); + return result; + } + } + + ray_env_pop_scope(); + return result; +} + +/* ══════════════════════════════════════════ + * Stack-based VM executor (computed goto, frame-based) + * ══════════════════════════════════════════ */ + +static _Thread_local ray_vm_t *__VM = NULL; + +static ray_t* vm_exec(ray_t* lambda, ray_t** call_args, int64_t argc) { + /* Computed goto dispatch table */ + static void *dispatch[OP__COUNT] = { + [OP_RET] = &&op_ret, + [OP_JMP] = &&op_jmp, + [OP_JMPF] = &&op_jmpf, + [OP_LOADCONST] = &&op_loadconst, + [OP_LOADENV] = &&op_loadenv, + [OP_STOREENV] = &&op_storeenv, + [OP_POP] = &&op_pop, + [OP_RESOLVE] = &&op_resolve, + [OP_CALL1] = &&op_call1, + [OP_CALL2] = &&op_call2, + [OP_CALLN] = &&op_calln, + [OP_CALLF] = &&op_callf, + [OP_CALLS] = &&op_calls, + [OP_CALLD] = &&op_calld, + [OP_DUP] = &&op_dup, + [OP_LOADCONST_W] = &&op_loadconst_w, + [OP_RESOLVE_W] = &&op_resolve_w, + [OP_TRAP] = &&op_trap, + [OP_TRAP_END] = &&op_trap_end, + }; + + /* Arity check before allocating VM state */ + { + int64_t param_count = ray_len(LAMBDA_PARAMS(lambda)); + if (argc != param_count) + return ray_error("arity", "expected %" PRId64 " args, got %" PRId64, param_count, argc); + } + + ray_t *vm_block = ray_alloc(sizeof(ray_vm_t)); + if (!vm_block || RAY_IS_ERR(vm_block)) return ray_error("oom", NULL); + ray_vm_t *vmp = (ray_vm_t *)ray_data(vm_block); + memset(vmp, 0, sizeof(ray_vm_t)); + __VM = vmp; + +#define vm (*vmp) + + /* Set up initial frame */ + vm.fn = lambda; + ray_retain(lambda); + int32_t n_locals = LAMBDA_NLOCALS(lambda); + vm.fp = 0; + vm.sp = n_locals; + + /* Bind parameters into local slots */ + int64_t param_count = ray_len(LAMBDA_PARAMS(lambda)); + for (int64_t i = 0; i < param_count && i < argc; i++) { + ray_retain(call_args[i]); + vm.ps[i] = call_args[i]; + } + + uint8_t *code = (uint8_t *)ray_data(LAMBDA_BC(lambda)); + ray_t **cpool = (ray_t **)ray_data(LAMBDA_CONSTS(lambda)); + int32_t ip = 0; + ray_t *vm_err_obj = NULL; + +#define DISPATCH() goto *dispatch[code[ip++]] +#define PUSH(v) do { if (vm.sp >= VM_STACK_SIZE) goto vm_error_limit; vm.ps[vm.sp++] = (v); } while(0) +#define POP() (vm.ps[--vm.sp]) +#define PEEK() (vm.ps[vm.sp - 1]) +#define LOCAL(s) (vm.ps[vm.fp + (s)]) + + DISPATCH(); + +op_loadconst: { + uint8_t idx = code[ip++]; + ray_t *val = cpool[idx]; + ray_retain(val); + PUSH(val); + DISPATCH(); +} + +op_loadconst_w: { + uint16_t idx = (uint16_t)(code[ip] << 8) | code[ip + 1]; + ip += 2; + ray_t *val = cpool[idx]; + ray_retain(val); + PUSH(val); + DISPATCH(); +} + +op_loadenv: { + uint8_t slot = code[ip++]; + ray_t *val = LOCAL(slot); + if (val) ray_retain(val); + else val = make_i64(0); + PUSH(val); + DISPATCH(); +} + +op_storeenv: { + uint8_t slot = code[ip++]; + ray_t *val = POP(); + if (LOCAL(slot)) ray_release(LOCAL(slot)); + LOCAL(slot) = val; + DISPATCH(); +} + +op_pop: { + if (vm.sp > vm.fp + n_locals) { + ray_t *val = POP(); + if (val) ray_release(val); + } + DISPATCH(); +} + +op_dup: { + ray_t *val = PEEK(); + ray_retain(val); + PUSH(val); + DISPATCH(); +} + +op_resolve: { + uint8_t idx = code[ip++]; + ray_t *name_obj = cpool[idx]; + ray_t *val = ray_env_resolve(name_obj->i64); + if (!val) goto vm_error_name; + /* env_resolve returns an owned ref (rc >= 1); no extra retain needed. + * It can also return a real error (e.g. nyi from a parted-target link + * deref inside the dotted walker) — surface that as a VM error rather + * than pushing it onto the stack as if it were a normal value. */ + if (RAY_IS_ERR(val)) { vm_err_obj = val; goto vm_error; } + PUSH(val); + DISPATCH(); +} + +op_resolve_w: { + uint16_t idx = (uint16_t)((code[ip] << 8) | code[ip + 1]); + ip += 2; + ray_t *name_obj = cpool[idx]; + ray_t *val = ray_env_resolve(name_obj->i64); + if (!val) goto vm_error_name; + if (RAY_IS_ERR(val)) { vm_err_obj = val; goto vm_error; } + PUSH(val); + DISPATCH(); +} + +op_jmp: { + int16_t offset = (int16_t)((code[ip] << 8) | code[ip + 1]); + ip += 2; + ip += offset; + if (offset < 0 && g_eval_interrupted) goto vm_error_limit; + DISPATCH(); +} + +op_jmpf: { + int16_t offset = (int16_t)((code[ip] << 8) | code[ip + 1]); + ip += 2; + ray_t *cond = POP(); + int truthy = is_truthy(cond); + ray_release(cond); + if (!truthy) ip += offset; + DISPATCH(); +} + +op_call1: { + ray_t *arg = POP(); + ray_t *fn_obj = POP(); + ray_unary_fn fn = (ray_unary_fn)(uintptr_t)fn_obj->i64; + ray_t *result; + if (RAY_UNLIKELY(RAY_IS_NULL(arg))) { + result = (fn == (ray_unary_fn)ray_nil_fn || fn == (ray_unary_fn)ray_type_fn) + ? fn(arg) : ray_error("type", NULL); + } else if ((fn_obj->attrs & RAY_FN_ATOMIC) && arg->type >= 0) + result = atomic_map_unary(fn, arg); + else + result = fn(arg); + ray_release(arg); + ray_release(fn_obj); + if (RAY_IS_ERR(result)) { vm_err_obj = result; goto vm_error; } + PUSH(result); + DISPATCH(); +} + +op_call2: { + ray_t *right = POP(); + ray_t *left = POP(); + ray_t *fn_obj = POP(); + ray_binary_fn fn = (ray_binary_fn)(uintptr_t)fn_obj->i64; + ray_t *result; + if (RAY_UNLIKELY(RAY_IS_NULL(left) || RAY_IS_NULL(right))) { + result = (fn == (ray_binary_fn)ray_eq_fn || fn == (ray_binary_fn)ray_neq_fn) + ? fn(left, right) : ray_error("type", NULL); + /* Fast path: atoms have negative type — skip collection check entirely. + * Only call is_collection when at least one arg has type >= 0 (vector/list). */ + } else if ((fn_obj->attrs & RAY_FN_ATOMIC) && (left->type >= 0 || right->type >= 0)) + result = atomic_map_binary_op(fn, RAY_FN_OPCODE(fn_obj), left, right); + else + result = fn(left, right); + ray_release(left); + ray_release(right); + ray_release(fn_obj); + if (RAY_IS_ERR(result)) { vm_err_obj = result; goto vm_error; } + PUSH(result); + DISPATCH(); +} + +op_calln: { + uint8_t n = code[ip++]; + if (n > 64) goto vm_error; + ray_t *fn_args[64]; + for (int32_t i = n - 1; i >= 0; i--) + fn_args[i] = POP(); + ray_t *fn_obj = POP(); + ray_vary_fn fn = (ray_vary_fn)(uintptr_t)fn_obj->i64; + ray_t *result = fn(fn_args, n); + for (int32_t i = 0; i < n; i++) + ray_release(fn_args[i]); + ray_release(fn_obj); + if (RAY_IS_ERR(result)) { vm_err_obj = result; goto vm_error; } + PUSH(result); + DISPATCH(); +} + +op_callf: { + uint8_t n = code[ip++]; + if (n > 64) goto vm_error; + ray_t *fn_args[64]; + for (int32_t i = n - 1; i >= 0; i--) + fn_args[i] = POP(); + ray_t *fn_obj = POP(); + + /* Compiled lambda: push frame, switch to callee bytecode */ + if (fn_obj->type == RAY_LAMBDA) { + if (!LAMBDA_IS_COMPILED(fn_obj)) + ray_compile(fn_obj); + + if (LAMBDA_IS_COMPILED(fn_obj)) { + /* All checks before any VM state mutation. + * Stack limits take priority over arity (safety first). */ + int64_t pcnt = ray_len(LAMBDA_PARAMS(fn_obj)); + int32_t callee_locals = LAMBDA_NLOCALS(fn_obj); + if (vm.rp >= VM_STACK_SIZE || + vm.sp + callee_locals >= VM_STACK_SIZE) { + for (int32_t i = 0; i < n; i++) + if (fn_args[i]) ray_release(fn_args[i]); + ray_release(fn_obj); + goto vm_error_limit; + } + if (n != pcnt) { + for (int32_t i = 0; i < n; i++) + if (fn_args[i]) ray_release(fn_args[i]); + ray_release(fn_obj); + vm_err_obj = ray_error("arity", "expected %" PRId64 " args, got %d", pcnt, n); + goto vm_error; + } + + /* Push return frame */ + vm.rs[vm.rp++] = (vm_ctx_t){ .fn = vm.fn, .fp = vm.fp, .ip = ip }; + + /* Set up new frame */ + vm.fn = fn_obj; /* takes ownership of stack ref */ + vm.fp = vm.sp; + vm.sp += callee_locals; + n_locals = callee_locals; + + /* Bind parameters */ + int64_t bind = pcnt < n ? pcnt : n; + for (int64_t i = 0; i < bind; i++) + LOCAL(i) = fn_args[i]; /* transfer ownership from args */ + for (int32_t i = (int32_t)bind; i < callee_locals; i++) + LOCAL(i) = NULL; + for (int64_t i = bind; i < n; i++) + ray_release(fn_args[i]); /* excess args */ + + /* Check for Ctrl-C interrupt on each compiled call */ + if (g_eval_interrupted) goto vm_error_limit; + + /* Switch to callee bytecode */ + code = (uint8_t *)ray_data(LAMBDA_BC(fn_obj)); + cpool = (ray_t **)ray_data(LAMBDA_CONSTS(fn_obj)); + ip = 0; + DISPATCH(); + } + } + + /* Non-lambda or uncompiled: dispatch by type */ + { + ray_t *result; + switch (fn_obj->type) { + case RAY_UNARY: + if (fn_is_restricted(fn_obj)) { for (int32_t i = 0; i < n; i++) ray_release(fn_args[i]); result = ray_error("access", "restricted"); break; } + if (n != 1) { for (int32_t i = 0; i < n; i++) ray_release(fn_args[i]); result = ray_error("arity", "expected 1 arg, got %d", n); break; } + result = ((ray_unary_fn)(uintptr_t)fn_obj->i64)(fn_args[0]); + ray_release(fn_args[0]); + break; + case RAY_BINARY: + if (fn_is_restricted(fn_obj)) { for (int32_t i = 0; i < n; i++) ray_release(fn_args[i]); result = ray_error("access", "restricted"); break; } + if (n != 2) { for (int32_t i = 0; i < n; i++) ray_release(fn_args[i]); result = ray_error("arity", "expected 2 args, got %d", n); break; } + result = ((ray_binary_fn)(uintptr_t)fn_obj->i64)(fn_args[0], fn_args[1]); + ray_release(fn_args[0]); + ray_release(fn_args[1]); + break; + case RAY_VARY: + if (fn_is_restricted(fn_obj)) { for (int32_t i = 0; i < n; i++) ray_release(fn_args[i]); result = ray_error("access", "restricted"); break; } + result = ((ray_vary_fn)(uintptr_t)fn_obj->i64)(fn_args, n); + for (int32_t i = 0; i < n; i++) ray_release(fn_args[i]); + break; + case RAY_LAMBDA: + result = call_lambda(fn_obj, fn_args, n); + for (int32_t i = 0; i < n; i++) ray_release(fn_args[i]); + break; + default: + for (int32_t i = 0; i < n; i++) ray_release(fn_args[i]); + result = ray_error("type", NULL); + break; + } + ray_release(fn_obj); + if (RAY_IS_ERR(result)) { vm_err_obj = result; goto vm_error; } + PUSH(result); + DISPATCH(); + } +} + +op_calls: { + /* Self-recursive call — lean path matching rayforce 1. + * No fn object on stack. Args are already at sp-argc..sp. + * Push return frame, set fp so args become locals, extend for extra locals. */ + uint8_t argc = code[ip++]; + + /* Stack overflow guard */ + if (RAY_UNLIKELY(vm.rp >= VM_STACK_SIZE)) goto vm_error_limit; + if (RAY_UNLIKELY(vm.sp + n_locals >= VM_STACK_SIZE)) goto vm_error_limit; + + /* Push return frame (fn=NULL signals self-call to OP_RET) */ + vm.rs[vm.rp++] = (vm_ctx_t){ .fn = NULL, .fp = vm.fp, .ip = ip }; + + /* Args on stack become the new frame's first locals. + * Compiler guarantees argc == param count, so argc <= n_locals. */ + vm.fp = vm.sp - argc; + + /* Extend stack for extra locals beyond params (let bindings etc.) */ + for (int32_t i = argc; i < n_locals; i++) + vm.ps[vm.sp++] = NULL; + + ip = 0; + DISPATCH(); +} + +op_calld: { + /* Dynamic dispatch: evaluate AST directly via ray_eval */ + uint8_t n = code[ip++]; + if (n == 0) { + /* n=0: the AST itself is on the stack, eval it directly */ + ray_t *ast = POP(); + ray_t *result = ray_eval(ast); + ray_release(ast); + if (RAY_IS_ERR(result)) { vm_err_obj = result; goto vm_error; } + PUSH(result); + DISPATCH(); + } + /* n>0: build call list and eval */ + ray_t *fn_args[64]; + for (int32_t i = n - 1; i >= 0; i--) + fn_args[i] = POP(); + ray_t *fn_obj = POP(); + + ray_t *call_list = ray_alloc((n + 1) * sizeof(ray_t *)); + if (!call_list || RAY_IS_ERR(call_list)) { + for (int32_t i = 0; i < n; i++) ray_release(fn_args[i]); + ray_release(fn_obj); + goto vm_error; + } + call_list->type = RAY_LIST; + call_list->len = n + 1; + ray_t **elems = (ray_t **)ray_data(call_list); + elems[0] = fn_obj; + for (int32_t i = 0; i < n; i++) + elems[i + 1] = fn_args[i]; + + ray_t *result = ray_eval(call_list); + ray_release(call_list); + if (RAY_IS_ERR(result)) { vm_err_obj = result; goto vm_error; } + PUSH(result); + DISPATCH(); +} + +op_ret: { + ray_t *result; + bool from_stack = (vm.sp > vm.fp + n_locals); + if (from_stack) { + result = POP(); + ray_retain(result); /* prevent free during cleanup if aliased in locals */ + } else { + result = RAY_NULL_OBJ; + } + + /* Clean up current frame — release all locals and leftover stack slots */ + while (vm.sp > vm.fp) { + ray_t *v = vm.ps[--vm.sp]; + if (v) ray_release(v); + } + + /* Undo protective retain — POP's reference is the caller's ownership */ + if (from_stack) ray_release(result); + + if (vm.rp == 0) { + /* Top-level return */ + ray_release(vm.fn); + __VM = NULL; +#undef vm + ray_free(vm_block); + return result; /* caller owns the POP'd reference */ +#define vm (*vmp) + } + + /* Pop return frame */ + vm.rp--; + vm.fp = vm.rs[vm.rp].fp; + ip = vm.rs[vm.rp].ip; + if (vm.rs[vm.rp].fn) { + /* Normal call: restore caller's function */ + ray_release(vm.fn); + vm.fn = vm.rs[vm.rp].fn; + code = (uint8_t *)ray_data(LAMBDA_BC(vm.fn)); + cpool = (ray_t **)ray_data(LAMBDA_CONSTS(vm.fn)); + n_locals = LAMBDA_NLOCALS(vm.fn); + } + /* Self-call (fn==NULL): vm.fn/code/cpool/n_locals are already correct */ + PUSH(result); + DISPATCH(); +} + +op_trap: { + int16_t offset = (int16_t)((code[ip] << 8) | code[ip + 1]); + ip += 2; + if (vm.tp >= VM_TRAP_SIZE) goto vm_error_limit; + vm.ts[vm.tp++] = (vm_trap_t){ + .rp = vm.rp, .sp = vm.sp, .handler_ip = ip + offset, + .fn = vm.fn, .fp = vm.fp, .n_locals = n_locals + }; + ray_retain(vm.fn); + DISPATCH(); +} + +op_trap_end: { + if (vm.tp > 0) { + vm.tp--; + ray_release(vm.ts[vm.tp].fn); + } + DISPATCH(); +} + + const char *vm_err_str = "domain"; + const char *vm_err_detail = NULL; + goto vm_error_cleanup; + +vm_error_limit: + vm_err_str = "limit"; + vm_err_detail = "stack overflow"; + goto vm_error_cleanup; + +vm_error_name: + vm_err_str = "name"; + vm_err_detail = NULL; + goto vm_error_cleanup; + +vm_error: + vm_err_str = "domain"; + vm_err_detail = NULL; + +vm_error_cleanup: { + /* Check for trap frame */ + if (vm.tp > 0) { + vm.tp--; + vm_trap_t trap = vm.ts[vm.tp]; + + /* Clean up return frames above trap point */ + while (vm.rp > trap.rp) { + vm.rp--; + if (vm.rs[vm.rp].fn) ray_release(vm.rs[vm.rp].fn); + } + + /* Clean up stack above trap point */ + while (vm.sp > trap.sp) { + ray_t *v = vm.ps[--vm.sp]; + if (v) ray_release(v); + } + + /* Get error value — prefer vm_err_obj (VM-detected errors like + * arity mismatch) over __raise_val (user raise expressions) */ + ray_t *err_val = vm_err_obj ? vm_err_obj : __raise_val; + vm_err_obj = NULL; + __raise_val = NULL; + if (!err_val) err_val = make_i64(0); + + /* Restore context and push error value */ + ray_release(vm.fn); + vm.fn = trap.fn; /* takes ownership from trap frame */ + vm.fp = trap.fp; + n_locals = trap.n_locals; + code = (uint8_t *)ray_data(LAMBDA_BC(vm.fn)); + cpool = (ray_t **)ray_data(LAMBDA_CONSTS(vm.fn)); + ip = trap.handler_ip; + PUSH(err_val); + DISPATCH(); + } + + /* No trap frame — regular error cleanup */ + + /* Build error trace: current frame + callers from return stack */ + add_error_frame(vm.fn, ip > 0 ? ip - 1 : 0); + for (int32_t i = vm.rp - 1; i >= 0; i--) { + if (vm.rs[i].fn) + add_error_frame(vm.rs[i].fn, vm.rs[i].ip > 0 ? vm.rs[i].ip - 1 : 0); + } + + for (int32_t i = 0; i < vm.sp; i++) + if (vm.ps[i]) ray_release(vm.ps[i]); + ray_release(vm.fn); + for (int32_t i = 0; i < vm.rp; i++) + if (vm.rs[i].fn) ray_release(vm.rs[i].fn); + for (int32_t i = 0; i < vm.tp; i++) + ray_release(vm.ts[i].fn); + __VM = NULL; +#undef vm + ray_free(vm_block); + if (vm_err_obj) + return vm_err_obj; + if (vm_err_detail) + return ray_error(vm_err_str, "%s", vm_err_detail); + return ray_error(vm_err_str, NULL); +} + +#undef DISPATCH +#undef PUSH +#undef POP +#undef PEEK +#undef LOCAL +#undef vm +} + + +/* ray_enlist_fn, ray_dict_fn, ray_nil_fn, ray_where_fn, ray_group_fn, + * ray_concat_fn, ray_raze_fn, ray_within_fn, ray_fdiv_fn + * moved to ops/builtins.c */ + +/* ══════════════════════════════════════════ + * Builtin registration + * ══════════════════════════════════════════ */ + +/* Bind `obj` under `name` in the global env. For reserved-namespace + * names like `.sys.gc`: + * + * - `.sys` itself is a RAY_DICT in the env (keys SYM vec + vals + * LIST). Typing `.sys` at the REPL returns the whole dict for + * introspection. + * - `.sys.gc` is ALSO bound flat in the env, pointing at the same + * function object. This keeps direct lookup O(1), surfaces the + * full name to `ray_env_lookup_prefix` (so tab completion and + * REPL highlighting continue to see every reserved builtin), + * and lets error messages cite the fully-qualified name. + * + * The two bindings are created at startup and kept in sync — writes + * to any `.`-prefixed name are refused by ray_env_set, so user code + * can't drift them apart. Only 2-level namespaces are in use; the + * assert below guards against silent breakage if that changes. */ +/* Get-or-create a child dict by key on `parent`. ray_dict_get + * returns an owned ref (or NULL if missing), so we either reuse it + * (after type-checking) or build a fresh subdict and upsert it. + * Returns the (possibly-COWd) parent; the child is handed back via + * `*out_child`, owned by the caller. */ +static ray_t* dict_get_or_create_subdict(ray_t* parent, ray_t* key, + ray_t** out_child) { + ray_t* existing = ray_dict_get(parent, key); + if (existing && !RAY_IS_ERR(existing) && existing->type == RAY_DICT) { + *out_child = existing; + return parent; + } + if (existing) ray_release(existing); + ray_t* keys = ray_sym_vec_new(RAY_SYM_W64, 4); + ray_t* vals = ray_list_new(4); + assert(keys && !RAY_IS_ERR(keys) && vals && !RAY_IS_ERR(vals)); + ray_t* child = ray_dict_new(keys, vals); + assert(child && !RAY_IS_ERR(child)); + ray_retain(child); /* caller retains; dict_upsert below also retains */ + parent = ray_dict_upsert(parent, key, child); + *out_child = child; + return parent; +} + +static void reg_bind(const char* name, ray_t* obj) { + int64_t sym = ray_sym_intern(name, strlen(name)); + if (name[0] == '.' && ray_sym_is_dotted(sym)) { + const int64_t* segs; + int nsegs = ray_sym_segs(sym, &segs); + assert(nsegs >= 2 && "reg_bind: dotted reserved name must have ≥ 2 segments"); + + int64_t root_sym = segs[0]; /* e.g. `.sys` or `.db` */ + int64_t leaf_sym = segs[nsegs-1];/* leaf action sym */ + + /* 1. Get-or-create the root dict bound at `.`. */ + ray_t* root = ray_env_get(root_sym); + if (root) { + ray_retain(root); + } else { + ray_t* keys = ray_sym_vec_new(RAY_SYM_W64, 4); + ray_t* vals = ray_list_new(4); + assert(keys && !RAY_IS_ERR(keys) && vals && !RAY_IS_ERR(vals)); + root = ray_dict_new(keys, vals); + assert(root && !RAY_IS_ERR(root)); + } + + /* 2. For each intermediate segment, descend into (or create) + * a sub-dict. Two-level names skip this loop entirely + * and fall through to the leaf upsert below. After the + * walk, `cur` points at the dict that should hold the + * leaf; `chain[]` records the parents we still need to + * write back so a COW upsert at the deepest level + * propagates upward through every parent. */ + enum { MAX_DEPTH = 4 }; + ray_t* chain[MAX_DEPTH] = { root }; + int64_t chain_keys[MAX_DEPTH] = { 0 }; + int chain_len = 1; + ray_t* cur = root; + for (int i = 1; i < nsegs - 1; i++) { + assert(chain_len < MAX_DEPTH); + ray_t* mid_key = ray_sym(segs[i]); + ray_t* child = NULL; + cur = dict_get_or_create_subdict(cur, mid_key, &child); + ray_release(mid_key); + chain[chain_len - 1] = cur; + chain_keys[chain_len - 1] = segs[i]; + chain[chain_len++] = child; + cur = child; + } + + /* 3. Upsert the leaf into the deepest dict, then walk back up + * re-upserting any COWd parents into their parents. */ + ray_t* leaf_key = ray_sym(leaf_sym); + ray_t* deepest = ray_dict_upsert(cur, leaf_key, obj); + ray_release(leaf_key); + chain[chain_len - 1] = deepest; + for (int i = chain_len - 1; i > 0; i--) { + ray_t* parent_key = ray_sym(chain_keys[i - 1]); + chain[i - 1] = ray_dict_upsert(chain[i - 1], parent_key, chain[i]); + ray_release(parent_key); + ray_release(chain[i]); /* dict_upsert retained */ + } + + /* 4. Bind the (possibly-COWd) root and the flat fully-qualified + * name so ray_env_lookup_prefix (REPL completion / syntax + * highlighting) enumerates every reserved builtin by name. + * ray_env_bind_flat skips the dotted-walk so this doesn't + * re-upsert into the same dict we just built. */ + assert(ray_env_bind(root_sym, chain[0]) == RAY_OK); + ray_release(chain[0]); + assert(ray_env_bind_flat(sym, obj) == RAY_OK); + return; + } + assert(ray_env_bind(sym, obj) == RAY_OK); +} + +static void register_binary(const char* name, uint8_t attrs, ray_binary_fn fn) { + ray_t* obj = ray_fn_binary(name, attrs, fn); + reg_bind(name, obj); + ray_release(obj); +} + +/* Register binary with a DAG opcode for vectorized execution */ +static void register_binary_op(const char* name, uint8_t attrs, ray_binary_fn fn, uint16_t opcode) { + ray_t* obj = ray_fn_binary(name, attrs, fn); + RAY_FN_SET_OPCODE(obj, opcode); + reg_bind(name, obj); + ray_release(obj); +} + +static void register_unary(const char* name, uint8_t attrs, ray_unary_fn fn) { + ray_t* obj = ray_fn_unary(name, attrs, fn); + reg_bind(name, obj); + ray_release(obj); +} + +static void register_unary_op(const char* name, uint8_t attrs, ray_unary_fn fn, uint16_t opcode) { + ray_t* obj = ray_fn_unary(name, attrs, fn); + RAY_FN_SET_OPCODE(obj, opcode); + reg_bind(name, obj); + ray_release(obj); +} + +static void register_vary(const char* name, uint8_t attrs, ray_vary_fn fn) { + ray_t* obj = ray_fn_vary(name, attrs, fn); + reg_bind(name, obj); + ray_release(obj); +} + +static void ray_register_builtins(void) { + register_binary_op("+", RAY_FN_ATOMIC, ray_add_fn, OP_ADD); + register_binary_op("-", RAY_FN_ATOMIC, ray_sub_fn, OP_SUB); + register_binary_op("*", RAY_FN_ATOMIC, ray_mul_fn, OP_MUL); + register_binary_op("/", RAY_FN_ATOMIC, ray_div_fn, OP_DIV); + register_binary_op("%", RAY_FN_ATOMIC, ray_mod_fn, OP_MOD); + register_binary_op(">", RAY_FN_ATOMIC, ray_gt_fn, OP_GT); + register_binary_op("<", RAY_FN_ATOMIC, ray_lt_fn, OP_LT); + register_binary_op(">=", RAY_FN_ATOMIC, ray_gte_fn, OP_GE); + register_binary_op("<=", RAY_FN_ATOMIC, ray_lte_fn, OP_LE); + register_binary_op("==", RAY_FN_ATOMIC, ray_eq_fn, OP_EQ); + register_binary_op("!=", RAY_FN_ATOMIC, ray_neq_fn, OP_NE); + /* Special-form so args are passed unevaluated and the kernel can + * short-circuit on the first determining scalar (matches v1 and the + * Lisp/Clojure convention). */ + register_vary("and", RAY_FN_SPECIAL_FORM, ray_and_vary_fn); + register_vary("or", RAY_FN_SPECIAL_FORM, ray_or_vary_fn); + register_unary_op("not", RAY_FN_NONE, ray_not_fn, OP_NOT); + register_unary_op("neg", RAY_FN_ATOMIC, ray_neg_fn, OP_NEG); + register_unary("round", RAY_FN_ATOMIC, ray_round_fn); + register_unary_op("floor", RAY_FN_ATOMIC, ray_floor_fn, OP_FLOOR); + register_unary_op("ceil", RAY_FN_ATOMIC, ray_ceil_fn, OP_CEIL); + register_unary_op("abs", RAY_FN_ATOMIC, ray_abs_fn, OP_ABS); + register_unary_op("sqrt", RAY_FN_ATOMIC, ray_sqrt_fn, OP_SQRT); + register_unary_op("log", RAY_FN_ATOMIC, ray_log_fn, OP_LOG); + register_unary_op("exp", RAY_FN_ATOMIC, ray_exp_fn, OP_EXP); + + /* Special forms */ + register_binary("set", RAY_FN_SPECIAL_FORM | RAY_FN_RESTRICTED, ray_set_fn); + register_binary("let", RAY_FN_SPECIAL_FORM, ray_let_fn); + register_vary("if", RAY_FN_SPECIAL_FORM, ray_cond_fn); + register_vary("do", RAY_FN_SPECIAL_FORM, ray_do_fn); + register_vary("fn", RAY_FN_SPECIAL_FORM, ray_fn); + + /* Aggregation builtins */ + register_unary("sum", RAY_FN_AGGR, ray_sum_fn); + register_unary("count", RAY_FN_AGGR, ray_count_fn); + register_unary("avg", RAY_FN_AGGR, ray_avg_fn); + register_unary("min", RAY_FN_AGGR, ray_min_fn); + register_unary("max", RAY_FN_AGGR, ray_max_fn); + register_unary("first", RAY_FN_NONE, ray_first_fn); + register_unary("last", RAY_FN_NONE, ray_last_fn); + register_unary("med", RAY_FN_AGGR, ray_med_fn); + register_unary("dev", RAY_FN_AGGR, ray_dev_fn); + register_unary("stddev", RAY_FN_AGGR, ray_stddev_fn); + register_unary("stddev_pop", RAY_FN_AGGR, ray_stddev_pop_fn); + register_unary("dev_pop", RAY_FN_AGGR, ray_stddev_pop_fn); + register_unary("var", RAY_FN_AGGR, ray_var_fn); + register_unary("var_pop", RAY_FN_AGGR, ray_var_pop_fn); + + /* Error handling */ + register_unary("raise", RAY_FN_NONE, ray_raise_fn); + register_binary("try", RAY_FN_SPECIAL_FORM, ray_try_fn); + + /* Higher-order functions */ + register_vary("map", RAY_FN_NONE, ray_map_fn); + register_vary("pmap", RAY_FN_NONE, ray_pmap_fn); + register_vary("fold", RAY_FN_NONE, ray_fold_fn); + register_vary("scan", RAY_FN_NONE, ray_scan_fn); + register_binary("filter", RAY_FN_NONE, ray_filter_fn); + register_vary("apply", RAY_FN_NONE, ray_apply_fn); + + /* Collection operations */ + register_unary("distinct", RAY_FN_NONE, ray_distinct_fn); + register_binary("in", RAY_FN_NONE, ray_in_fn); + register_binary("except", RAY_FN_NONE, ray_except_fn); + register_binary("union", RAY_FN_NONE, ray_union_fn); + register_binary("sect", RAY_FN_NONE, ray_sect_fn); + register_binary("take", RAY_FN_NONE, ray_take_fn); + register_binary("at", RAY_FN_NONE, ray_at_fn); + register_binary("find", RAY_FN_NONE, ray_find_fn); + register_unary("reverse", RAY_FN_NONE, ray_reverse_fn); + register_unary("til", RAY_FN_NONE, ray_til_fn); + + /* Sorting operations */ + register_unary("asc", RAY_FN_NONE, ray_asc_fn); + register_unary("desc", RAY_FN_NONE, ray_desc_fn); + register_unary("iasc", RAY_FN_NONE, ray_iasc_fn); + register_unary("idesc", RAY_FN_NONE, ray_idesc_fn); + register_unary("rank", RAY_FN_NONE, ray_rank_fn); + register_binary("xasc", RAY_FN_NONE, ray_xasc_fn); + register_binary("xdesc", RAY_FN_NONE, ray_xdesc_fn); + + /* Table operations */ + register_vary("list", RAY_FN_NONE, ray_list_fn); + register_binary("table", RAY_FN_NONE, ray_table_fn); + register_unary("key", RAY_FN_NONE, ray_key_fn); + register_unary("value", RAY_FN_NONE, ray_value_fn); + register_binary("union-all", RAY_FN_NONE, ray_union_all_fn); + /* table-distinct removed — distinct dispatches on type */ + + /* Query operations */ + register_vary("select", RAY_FN_SPECIAL_FORM, ray_select_fn); + register_vary("update", RAY_FN_SPECIAL_FORM | RAY_FN_RESTRICTED, ray_update_fn); + register_vary("insert", RAY_FN_SPECIAL_FORM | RAY_FN_RESTRICTED, ray_insert_fn); + register_vary("upsert", RAY_FN_SPECIAL_FORM | RAY_FN_RESTRICTED, ray_upsert_fn); + register_binary("xbar", RAY_FN_ATOMIC, ray_xbar_fn); + + /* Join operations */ + register_vary("left-join", RAY_FN_NONE, ray_left_join_fn); + register_vary("inner-join", RAY_FN_NONE, ray_inner_join_fn); + register_vary("anti-join", RAY_FN_NONE, ray_anti_join_fn); + register_vary("window-join", RAY_FN_SPECIAL_FORM, ray_window_join_fn); + register_vary("window-join1", RAY_FN_SPECIAL_FORM, ray_window_join_fn); + register_vary("asof-join", RAY_FN_NONE, ray_asof_join_fn); + + /* I/O builtins */ + register_vary("println", RAY_FN_NONE, ray_println_fn); + register_vary("show", RAY_FN_NONE, ray_show_fn); + register_vary("format", RAY_FN_NONE, ray_format_fn); + register_vary(".csv.read", RAY_FN_RESTRICTED, ray_read_csv_fn); + register_vary(".csv.write", RAY_FN_RESTRICTED, ray_write_csv_fn); + register_binary("as", RAY_FN_NONE, ray_cast_fn); + register_unary("type", RAY_FN_NONE, ray_type_fn); + register_unary("read", RAY_FN_RESTRICTED, ray_read_file_fn); + register_binary("write", RAY_FN_RESTRICTED, ray_write_file_fn); + register_unary("load", RAY_FN_RESTRICTED, ray_load_file_fn); + register_unary("exit", RAY_FN_RESTRICTED, ray_exit_fn); + register_vary("resolve", RAY_FN_SPECIAL_FORM, ray_resolve_fn); + register_vary("timeit", RAY_FN_SPECIAL_FORM, ray_timeit_fn); + + /* Additional builtins (ported from rayforce) */ + register_vary("enlist", RAY_FN_NONE, ray_enlist_fn); + register_binary("dict", RAY_FN_NONE, ray_dict_fn); + register_unary("nil?", RAY_FN_NONE, ray_nil_fn); + register_unary("where", RAY_FN_NONE, ray_where_fn); + register_unary("group", RAY_FN_NONE, ray_group_fn); + register_binary("concat", RAY_FN_NONE, ray_concat_fn); + register_unary("raze", RAY_FN_NONE, ray_raze_fn); + register_binary("within", RAY_FN_NONE, ray_within_fn); + register_binary("div", RAY_FN_ATOMIC, ray_fdiv_fn); + register_binary("rand", RAY_FN_NONE, ray_rand_fn); + register_binary("bin", RAY_FN_NONE, ray_bin_fn); + register_binary("binr", RAY_FN_NONE, ray_binr_fn); + register_vary("map-left", RAY_FN_NONE, ray_map_left_fn); + register_vary("map-right", RAY_FN_NONE, ray_map_right_fn); + + /* String operations */ + register_binary("split", RAY_FN_NONE, ray_split_fn); + + /* Serialization */ + register_unary("ser", RAY_FN_NONE, ray_ser_fn); + register_unary("de", RAY_FN_NONE, ray_de_fn); + + /* Splayed / partitioned table I/O */ + /* Database storage — splayed and parted table I/O. Kept under a + * dedicated `.db.*` namespace so format-specific siblings stay + * grouped (set/get/mount per format) and there's room to grow + * without polluting the top-level builtin namespace. */ + register_vary(".db.splayed.set", RAY_FN_RESTRICTED, ray_set_splayed_fn); + register_vary(".db.splayed.get", RAY_FN_NONE, ray_get_splayed_fn); + register_vary(".db.splayed.mount", RAY_FN_NONE, ray_db_splayed_mount_fn); + register_vary(".db.parted.get", RAY_FN_NONE, ray_get_parted_fn); + register_vary(".db.parted.mount", RAY_FN_NONE, ray_db_parted_mount_fn); + + /* GUID generation */ + register_unary("guid", RAY_FN_NONE, ray_guid_fn); + + /* In-place mutation */ + register_vary("alter", RAY_FN_SPECIAL_FORM, ray_alter_fn); + + /* Pattern matching */ + register_binary("like", RAY_FN_NONE, ray_like_fn); + + /* Temporal clocks */ + register_unary("date", RAY_FN_NONE, ray_date_clock_fn); + register_unary("time", RAY_FN_NONE, ray_time_clock_fn); + register_unary("timestamp", RAY_FN_NONE, ray_timestamp_clock_fn); + + /* Temporal field accessors: unary builtins that map 1:1 onto + * ray_temporal_extract. Registered here so `(ss ts)` / `(dd d)` + * participate in the normal call machinery and `ts.ss` / `d.dd` + * resolve through env_resolve's "is segment a callable" lookup + * instead of a bespoke sym→field table. */ + register_unary("ss", RAY_FN_NONE, ray_extract_ss_fn); + register_unary("hh", RAY_FN_NONE, ray_extract_hh_fn); + register_unary("minute", RAY_FN_NONE, ray_extract_minute_fn); + register_unary("yyyy", RAY_FN_NONE, ray_extract_yyyy_fn); + register_unary("mm", RAY_FN_NONE, ray_extract_mm_fn); + register_unary("dd", RAY_FN_NONE, ray_extract_dd_fn); + register_unary("dow", RAY_FN_NONE, ray_extract_dow_fn); + register_unary("doy", RAY_FN_NONE, ray_extract_doy_fn); + + /* Eval, parse, print, meta */ + register_unary("eval", RAY_FN_NONE, ray_eval_builtin_fn); + register_unary("parse", RAY_FN_NONE, ray_parse_builtin_fn); + register_vary("print", RAY_FN_NONE, ray_print_fn); + register_unary("meta", RAY_FN_NONE, ray_meta_fn); + + /* System builtins — bound under the reserved `.sys.*` namespace so + * user code can't shadow them and a glance at the name identifies + * the category. */ + register_vary (".sys.gc", RAY_FN_NONE, ray_gc_fn); + register_unary(".sys.exec", RAY_FN_RESTRICTED, ray_system_fn); + /* Registry-dispatched system commands. `.sys.cmd "name args"` is + * the kdb-style entry point; the per-command direct builtins below + * skip the string parse for callers that already have a typed arg + * in hand. All share the table in lang/syscmd.c. */ + register_unary(".sys.cmd", RAY_FN_RESTRICTED, ray_syscmd_string_dispatch_fn); + register_vary (".sys.timeit", RAY_FN_NONE, ray_sys_timeit_fn); + register_unary(".sys.listen", RAY_FN_RESTRICTED, ray_sys_listen_fn); + register_vary (".sys.env", RAY_FN_NONE, ray_sys_env_fn); + + /* OS env / process interaction under `.os.*` */ + register_unary( ".os.getenv", RAY_FN_RESTRICTED, ray_getenv_fn); + register_binary(".os.setenv", RAY_FN_RESTRICTED, ray_setenv_fn); + /* Filesystem metadata (issue #36): size + listing. Predicates + * (exists / is-file / is-dir) are reachable via `try` on these + * or via shell fallback through `.sys.cmd`. */ + register_unary( ".os.size", RAY_FN_NONE, ray_os_size_fn); + register_unary( ".os.list", RAY_FN_NONE, ray_os_list_fn); + + /* IPC client primitives under `.ipc.*` */ + register_unary( ".ipc.open", RAY_FN_RESTRICTED, ray_hopen_fn); + register_unary( ".ipc.close", RAY_FN_RESTRICTED, ray_hclose_fn); + register_binary(".ipc.send", RAY_FN_RESTRICTED, ray_hsend_fn); + + /* Transaction-log journaling under `.log.*` — q/kdb's -l/-L feature. + * The CLI flags -l / -L call ray_journal_open() at + * startup; these builtins expose the same machinery to Rayfall code + * for manual control (open from a script, snapshot on demand, etc). */ + register_vary(".log.open", RAY_FN_RESTRICTED, ray_log_open_fn); + register_unary(".log.write", RAY_FN_NONE, ray_log_write_fn); + register_unary(".log.replay", RAY_FN_RESTRICTED, ray_log_replay_fn); + register_unary(".log.validate",RAY_FN_NONE, ray_log_validate_fn); + register_vary(".log.roll", RAY_FN_RESTRICTED, ray_log_roll_fn); + register_vary(".log.snapshot", RAY_FN_RESTRICTED, ray_log_snapshot_fn); + register_vary(".log.sync", RAY_FN_NONE, ray_log_sync_fn); + register_vary(".log.close", RAY_FN_RESTRICTED, ray_log_close_fn); + + /* quote — special form (unevaluated argument) */ + register_vary("quote", RAY_FN_SPECIAL_FORM, ray_quote_fn); + + /* return — early return (identity) */ + register_unary("return", RAY_FN_NONE, ray_return_fn); + + /* args — command line arguments */ + register_unary("args", RAY_FN_NONE, ray_args_fn); + + /* rc — reference count */ + register_unary("rc", RAY_FN_NONE, ray_rc_fn); + + /* diverse — check if all elements unique */ + register_unary("diverse", RAY_FN_NONE, ray_diverse_fn); + + /* get — dictionary/table lookup (alias for at) */ + register_binary("get", RAY_FN_NONE, ray_get_fn); + + /* remove — remove key from dict */ + register_binary("remove", RAY_FN_NONE, ray_remove_fn); + + /* row — single row from table */ + register_binary("row", RAY_FN_NONE, ray_row_fn); + + /* timer — high-res monotonic nanosecond timestamp */ + register_unary("timer", RAY_FN_NONE, ray_timer_fn); + + /* env — list all global environment bindings */ + register_unary("env", RAY_FN_NONE, ray_env_fn); + + /* Directional fold/scan variants */ + register_vary("fold-left", RAY_FN_NONE, ray_fold_left_fn); + register_vary("fold-right", RAY_FN_NONE, ray_fold_right_fn); + register_vary("scan-left", RAY_FN_NONE, ray_scan_left_fn); + register_vary("scan-right", RAY_FN_NONE, ray_scan_right_fn); + + /* del, modify, pivot remain top-level language primitives. + * Runtime/heap introspection moves under `.sys.*`. */ + register_vary("del", RAY_FN_SPECIAL_FORM | RAY_FN_RESTRICTED, ray_del_fn); + register_vary(".sys.build", RAY_FN_NONE, ray_internals_fn); + register_vary(".sys.mem", RAY_FN_NONE, ray_memstat_fn); + register_vary("modify", RAY_FN_RESTRICTED, ray_modify_fn); + register_vary("pivot", RAY_FN_NONE, ray_pivot_fn); + register_vary(".sys.info", RAY_FN_NONE, ray_sysinfo_fn); + register_unary("sym-name", RAY_FN_NONE, ray_sym_name_fn); + register_binary("unify", RAY_FN_NONE, ray_unify_fn); + register_binary("xrank", RAY_FN_NONE, ray_xrank_fn); + + /* EAV triple storage */ + register_vary("datoms", RAY_FN_NONE, ray_datoms_fn); + register_vary("assert-fact", RAY_FN_NONE, ray_assert_fact_fn); + register_vary("retract-fact", RAY_FN_NONE, ray_retract_fact_fn); + register_vary("scan-eav", RAY_FN_NONE, ray_scan_eav_fn); + register_vary("pull", RAY_FN_NONE, ray_pull_fn); + + /* Datalog */ + register_vary("rule", RAY_FN_SPECIAL_FORM, ray_rule_fn); + register_vary("query", RAY_FN_SPECIAL_FORM, ray_query_fn); + + /* Programmatic Datalog API */ + register_vary("dl-program", RAY_FN_NONE, ray_dl_program_fn); + register_vary("dl-add-edb", RAY_FN_NONE, ray_dl_add_edb_fn); + register_unary("dl-stratify", RAY_FN_NONE, ray_dl_stratify_fn); + register_unary("dl-eval", RAY_FN_NONE, ray_dl_eval_fn); + register_binary("dl-query", RAY_FN_NONE, ray_dl_query_fn); + register_binary("dl-provenance", RAY_FN_NONE, ray_dl_provenance_fn); + + /* Vector similarity / embeddings / HNSW */ + register_binary("cos-dist", RAY_FN_NONE, ray_cos_dist_fn); + register_binary("inner-prod", RAY_FN_NONE, ray_inner_prod_fn); + register_binary("l2-dist", RAY_FN_NONE, ray_l2_dist_fn); + register_unary ("norm", RAY_FN_NONE, ray_norm_fn); + register_vary ("knn", RAY_FN_NONE, ray_knn_fn); + register_vary ("hnsw-build", RAY_FN_NONE, ray_hnsw_build_fn); + register_vary ("ann", RAY_FN_NONE, ray_ann_fn); + register_unary ("hnsw-free", RAY_FN_NONE, ray_hnsw_free_fn); + register_binary("hnsw-save", RAY_FN_RESTRICTED, ray_hnsw_save_fn); + register_unary ("hnsw-load", RAY_FN_RESTRICTED, ray_hnsw_load_fn); + register_unary ("hnsw-info", RAY_FN_NONE, ray_hnsw_info_fn); + + /* Per-vector accelerator indices (see src/ops/idxop.h) */ + register_unary (".idx.zone", RAY_FN_NONE, ray_idx_zone_fn); + register_unary (".idx.hash", RAY_FN_NONE, ray_idx_hash_fn); + register_unary (".idx.sort", RAY_FN_NONE, ray_idx_sort_fn); + register_unary (".idx.bloom", RAY_FN_NONE, ray_idx_bloom_fn); + register_unary (".idx.drop", RAY_FN_NONE, ray_idx_drop_fn); + register_unary (".idx.has?", RAY_FN_NONE, ray_idx_has_fn); + register_unary (".idx.info", RAY_FN_NONE, ray_idx_info_fn); + + /* Linked columns (see src/ops/linkop.h) */ + register_binary(".col.link", RAY_FN_NONE, ray_col_link_fn); + register_unary (".col.unlink", RAY_FN_NONE, ray_col_unlink_fn); + register_unary (".col.link?", RAY_FN_NONE, ray_col_link_p_fn); + register_unary (".col.target", RAY_FN_NONE, ray_col_target_fn); +} + +/* ══════════════════════════════════════════ + * Runtime lifecycle + * ══════════════════════════════════════════ */ + +ray_err_t ray_lang_init(void) { + ray_err_t err = ray_env_init(); + if (err != RAY_OK) return err; + ray_register_builtins(); + return RAY_OK; +} + +void ray_lang_destroy(void) { + if (__raise_val) { ray_release(__raise_val); __raise_val = NULL; } + /* Reset global Datalog rule storage */ + ray_dl_reset_rules(); + ray_env_destroy(); + ray_compile_reset(); +} + +/* ══════════════════════════════════════════ + * Tree-walking evaluator + * ══════════════════════════════════════════ */ + +ray_t* ray_eval(ray_t* obj) { + if (!obj || RAY_IS_ERR(obj)) return obj; + + /* Check for external interrupt (e.g. Ctrl-C from REPL) */ + if (g_eval_interrupted) return ray_error("limit", "interrupted"); + + if (++eval_depth > RAY_EVAL_MAX_DEPTH) { + eval_depth--; + return ray_error("limit", "eval depth exceeded"); + } + + ray_t* ret; + + /* Atoms: return themselves (retain) */ + if (ray_is_atom(obj)) { + /* Name reference: resolve from env */ + if (obj->type == -RAY_SYM && (obj->attrs & RAY_ATTR_NAME)) { + /* Check for null keyword — compare by string, not cached sym_id, + * because sym table may be reinitialized between test runs */ + { + ray_t* name_str = ray_sym_str(obj->i64); + if (name_str && ray_str_len(name_str) == 4 && + memcmp(ray_str_ptr(name_str), "null", 4) == 0) { + ray_release(name_str); + ret = NULL; goto out; + } + if (name_str) ray_release(name_str); + } + + ray_t* val = ray_env_resolve(obj->i64); + if (!val) { + ray_t* ns = ray_sym_str(obj->i64); + if (ns) { + ret = ray_error("name", "'%.*s' undefined", + (int)ray_str_len(ns), ray_str_ptr(ns)); + ray_release(ns); + } else { + ret = ray_error("name", NULL); + } + goto out; + } + /* env_resolve may also return a real error (e.g. nyi from a + * parted-target link deref inside the dotted walker) — surface + * it directly rather than treating it as a found value. */ + if (RAY_IS_ERR(val)) { ret = val; goto out; } + /* env_resolve hands back an owned ref; no extra retain. */ + ret = val; goto out; + } + ray_retain(obj); + ret = obj; goto out; + } + + /* Non-list vectors (incl. RAY_DICT/RAY_TABLE): return themselves — + * dict literals are self-evaluating; values stay unevaluated. Use + * the (dict ...) builtin for evaluated construction. */ + if (obj->type != RAY_LIST) { ray_retain(obj); ret = obj; goto out; } + + /* Empty list */ + if (ray_len(obj) == 0) { ray_retain(obj); ret = obj; goto out; } + + /* List: evaluate first element, dispatch by type */ + ray_t** elems = (ray_t**)ray_data(obj); + ray_t* head = ray_eval(elems[0]); + if (RAY_IS_ERR(head)) { ret = head; goto out; } + + int64_t n = ray_len(obj); + + switch (head->type) { + case RAY_UNARY: { + if (n != 2) { ray_release(head); ret = ray_error("arity", "expected 1 arg, got %d", (int)(n-1)); goto out; } + if (fn_is_restricted(head)) { ray_release(head); ret = ray_error("access", "restricted"); goto out; } + ray_unary_fn fn = (ray_unary_fn)(uintptr_t)head->i64; + uint8_t fn_attrs = head->attrs; + ray_t* arg = ray_eval(elems[1]); + ray_release(head); + if (arg && RAY_IS_ERR(arg)) { ret = arg; goto out; } + ray_t* result; + if (!arg || RAY_IS_NULL(arg)) { + /* Only nil?/type/ser safely handle null */ + result = (fn == (ray_unary_fn)ray_nil_fn || fn == (ray_unary_fn)ray_type_fn || + fn == (ray_unary_fn)ray_ser_fn) ? fn(arg) : ray_error("type", NULL); + } else if ((fn_attrs & RAY_FN_ATOMIC) && is_collection(arg)) + result = atomic_map_unary(fn, arg); + else + result = fn(arg); + if (arg) ray_release(arg); + ret = result; goto out; + } + case RAY_BINARY: { + if (n != 3) { ray_release(head); ret = ray_error("arity", "expected 2 args, got %d", (int)(n-1)); goto out; } + if (fn_is_restricted(head)) { ray_release(head); ret = ray_error("access", "restricted"); goto out; } + ray_binary_fn fn = (ray_binary_fn)(uintptr_t)head->i64; + uint8_t fn_attrs = head->attrs; + if (fn_attrs & RAY_FN_SPECIAL_FORM) { + ray_release(head); + ret = fn(elems[1], elems[2]); goto out; + } + ray_t* left = ray_eval(elems[1]); + if (left && RAY_IS_ERR(left)) { + ray_release(head); + ret = left; goto out; + } + ray_t* right = ray_eval(elems[2]); + if (right && RAY_IS_ERR(right)) { + ray_release(head); if (left) ray_release(left); + ret = right; goto out; + } + /* If either arg is NULL/void, only == and != can handle it */ + if (!left || !right || RAY_IS_NULL(left) || RAY_IS_NULL(right)) { + if (fn == (ray_binary_fn)ray_eq_fn || fn == (ray_binary_fn)ray_neq_fn) { + ray_release(head); + ray_t* result = fn(left, right); + ray_release(left); + ray_release(right); + ret = result; goto out; + } + ray_release(head); + ray_release(left); + ray_release(right); + ret = ray_error("type", NULL); goto out; + } + uint16_t fn_opcode = RAY_FN_OPCODE(head); + ray_release(head); + ray_t* result; + if ((fn_attrs & RAY_FN_ATOMIC) && (is_collection(left) || is_collection(right))) + result = atomic_map_binary_op(fn, fn_opcode, left, right); + else + result = fn(left, right); + ray_release(left); + ray_release(right); + ret = result; goto out; + } + case RAY_VARY: { + if (fn_is_restricted(head)) { ray_release(head); ret = ray_error("access", "restricted"); goto out; } + ray_vary_fn fn = (ray_vary_fn)(uintptr_t)head->i64; + if (head->attrs & RAY_FN_SPECIAL_FORM) { + ray_release(head); + ret = fn(elems + 1, n - 1); goto out; + } + int64_t argc = n - 1; + if (argc > 64) { ray_release(head); ret = ray_error("domain", NULL); goto out; } + ray_t* args[64]; + for (int64_t i = 0; i < argc; i++) { + args[i] = ray_eval(elems[i + 1]); + if (!args[i] || RAY_IS_ERR(args[i])) { + ray_t* err = (!args[i]) ? ray_error("type", NULL) : args[i]; + for (int64_t j = 0; j < i; j++) ray_release(args[j]); + ray_release(head); + ret = err; goto out; + } + } + ray_release(head); + ray_t* result = fn(args, argc); + for (int64_t i = 0; i < argc; i++) ray_release(args[i]); + ret = result; goto out; + } + case RAY_LAMBDA: { + int64_t argc = n - 1; + if (argc > 64) { ray_release(head); ret = ray_error("domain", NULL); goto out; } + ray_t* args[64]; + for (int64_t i = 0; i < argc; i++) { + args[i] = ray_eval(elems[i + 1]); + if (!args[i] || RAY_IS_ERR(args[i])) { + ray_t* err = (!args[i]) ? ray_error("type", NULL) : args[i]; + for (int64_t j = 0; j < i; j++) ray_release(args[j]); + ray_release(head); + ret = err; goto out; + } + } + ray_t* result = call_lambda(head, args, argc); + for (int64_t i = 0; i < argc; i++) ray_release(args[i]); + ray_release(head); + if (RAY_IS_ERR(result)) + add_eval_error_frame(g_eval_nfo, obj); + ret = result; goto out; + } + default: + ray_release(head); + ret = ray_error("type", NULL); goto out; + } + +out: + eval_depth--; + /* End-of-top-level-expression cleanup hook. Every path that + * entered ray_eval — REPL, IPC, ray_eval_str, file mode — exits + * through here; firing ray_progress_end exactly when the depth + * returns to 0 guarantees the progress bar is cleared no matter + * which builtin drove the update (including ray_group_fn etc. + * that bypass ray_execute). */ + if (eval_depth == 0) ray_progress_end(); + return ret; +} + +ray_t* ray_eval_str(const char* source) { + ray_clear_error_trace(); + ray_t* nfo = ray_nfo_create("repl", 4, source, strlen(source)); + ray_t* parsed = ray_parse_with_nfo(source, nfo); + if (RAY_IS_ERR(parsed)) { ray_release(nfo); return parsed; } + + ray_t* prev_nfo = g_eval_nfo; + g_eval_nfo = nfo; + ray_t* result = ray_eval(parsed); + g_eval_nfo = prev_nfo; + + ray_release(parsed); + ray_release(nfo); + return result; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/lang/eval.h b/crates/rayforce-sys/vendor/rayforce/src/lang/eval.h new file mode 100644 index 0000000..df86e73 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/lang/eval.h @@ -0,0 +1,298 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_EVAL_H +#define RAY_EVAL_H + +#include +#include +#include "lang/nfo.h" + +/* ===== Function Attribute Flags (stored in attrs byte) ===== */ + +#define RAY_FN_NONE 0x00 +#define RAY_FN_LEFT_ATOMIC 0x01 /* auto-map left arg over vectors */ +#define RAY_FN_RIGHT_ATOMIC 0x02 /* auto-map right arg over vectors */ +#define RAY_FN_ATOMIC 0x04 /* auto-map all args over vectors */ +#define RAY_FN_AGGR 0x08 /* aggregation function */ +#define RAY_FN_SPECIAL_FORM 0x10 /* receives unevaluated args */ +#define RAY_FN_RESTRICTED 0x20 /* forbidden during -U restricted IPC evals */ + +/* AST name flag (distinguishes symbol literal from variable reference) */ +#define RAY_ATTR_NAME 0x20 /* ray_t SYM atom with this flag = name reference */ + +/* Function type signatures */ +typedef ray_t* (*ray_unary_fn)(ray_t*); +typedef ray_t* (*ray_binary_fn)(ray_t*, ray_t*); +typedef ray_t* (*ray_vary_fn)(ray_t**, int64_t); + +/* DAG opcode stored in nullmap[0..1] for binary builtins with DAG-exec paths. + * Eliminates dispatch table lookups in atomic_map_binary — just read the opcode. */ +#define RAY_FN_OPCODE(fn) (*(uint16_t*)(fn)->nullmap) +#define RAY_FN_SET_OPCODE(fn,op) (*(uint16_t*)(fn)->nullmap = (uint16_t)(op)) + +/* ===== VM Bytecode Opcodes ===== */ + +enum { + OP_RET = 0, /* return top of stack */ + OP_JMP, /* unconditional jump (2-byte signed offset) */ + OP_JMPF, /* jump if false (2-byte signed offset) */ + OP_LOADCONST, /* push constant pool[operand] (1-byte index) */ + OP_LOADENV, /* push local variable (1-byte slot index) */ + OP_STOREENV, /* pop and store into local (1-byte slot index) */ + OP_POP, /* discard top of stack */ + OP_RESOLVE, /* resolve global name: constant pool[operand] is sym_id */ + OP_CALL1, /* call unary: pop fn + 1 arg, push result */ + OP_CALL2, /* call binary: pop fn + 2 args, push result */ + OP_CALLN, /* call variadic: operand = argc, pop fn + N args */ + OP_CALLF, /* call compiled lambda: push frame, jump to callee */ + OP_CALLS, /* tail call: reuse frame */ + OP_CALLD, /* dynamic dispatch: fallback to ray_eval() */ + OP_DUP, /* duplicate top of stack */ + OP_LOADCONST_W, /* push constant pool[operand] (2-byte index) */ + OP_RESOLVE_W, /* resolve global name: 2-byte constant pool index */ + OP_TRAP, /* push trap frame, 2-byte handler offset */ + OP_TRAP_END, /* pop trap frame (success path) */ + OP__COUNT +}; + +/* ===== Compiled Lambda Layout ===== + * + * A RAY_LAMBDA object with attrs & RAY_FN_COMPILED stores compiled + * bytecode in its data area: + * + * data[0] = ray_t* params_list (same as interpreted) + * data[1] = ray_t* body (parsed body, same as interpreted) + * data[2] = ray_t* bytecode (RAY_U8 vector of opcodes) + * data[3] = ray_t* constants (RAY_LIST of constant pool entries) + * data[4] = int32_t n_locals (number of local slots needed) + * data[5] = ray_t* nfo (source location info, NULL if absent) + * data[6] = ray_t* dbg (debug metadata, NULL if absent) + */ + +#define RAY_FN_COMPILED 0x40 /* lambda has been compiled to bytecode */ + +#define LAMBDA_PARAMS(lam) (((ray_t**)ray_data(lam))[0]) +#define LAMBDA_BODY(lam) (((ray_t**)ray_data(lam))[1]) +#define LAMBDA_BC(lam) (((ray_t**)ray_data(lam))[2]) +#define LAMBDA_CONSTS(lam) (((ray_t**)ray_data(lam))[3]) +#define LAMBDA_NLOCALS(lam) (*((int32_t*)&((ray_t**)ray_data(lam))[4])) +#define LAMBDA_NFO(lam) (((ray_t**)ray_data(lam))[5]) +#define LAMBDA_DBG(lam) (((ray_t**)ray_data(lam))[6]) + +#define LAMBDA_IS_COMPILED(lam) ((lam)->attrs & RAY_FN_COMPILED) + +/* ===== VM Types ===== */ + +#define VM_STACK_SIZE 1024 + +typedef struct { + ray_t *fn; /* lambda being executed */ + int32_t fp; /* frame pointer */ + int32_t ip; /* instruction pointer */ +} vm_ctx_t; + +typedef struct { + int32_t rp; /* return stack depth at trap point */ + int32_t sp; /* stack depth at trap point */ + int32_t handler_ip;/* IP of handler code */ + ray_t *fn; /* function containing handler code */ + int32_t fp; /* frame pointer at trap point */ + int32_t n_locals; /* n_locals at trap point */ +} vm_trap_t; + +#define VM_TRAP_SIZE 16 + +typedef struct { + int32_t sp; /* stack pointer */ + int32_t fp; /* frame pointer */ + int32_t rp; /* return stack pointer */ + int32_t id; /* VM identifier */ + ray_t *fn; /* current lambda */ + void *heap; /* heap pointer (future use) */ + int32_t tp; /* trap stack pointer */ + ray_t *ps[VM_STACK_SIZE]; /* program stack */ + vm_ctx_t rs[VM_STACK_SIZE]; /* return stack */ + vm_trap_t ts[VM_TRAP_SIZE]; /* trap frames */ +} ray_vm_t; + +/* ===== Public API ===== */ + +/* Initialize the Rayfall runtime: symbols, environment, builtins. */ +ray_err_t ray_lang_init(void); +void ray_lang_destroy(void); + +/* Evaluate a parsed ray_t object tree. */ +ray_t* ray_eval(ray_t* obj); + +/* Parse + eval convenience. */ +ray_t* ray_eval_str(const char* source); + +/* Compile a lambda's body to bytecode. Called lazily on first invocation. */ +void ray_compile(ray_t* lambda); + +/* Reset compiler cached state (call from ray_lang_destroy). */ +void ray_compile_reset(void); + +/* Look up the source span for a bytecode IP from a lambda's debug vector. + * Returns a span with id==0 if not found. */ +ray_span_t ray_bc_dbg_get(ray_t* dbg, int32_t ip); + +/* Print a ray_t value to a FILE stream. */ +void ray_lang_print(FILE* fp, ray_t* val); + +/* Interrupt support: allow external code (REPL signal handler) to request + * that the evaluator abort early. ray_eval() and the bytecode VM check + * this flag at function-call and loop boundaries. */ +void ray_eval_request_interrupt(void); +void ray_eval_clear_interrupt(void); +int ray_eval_is_interrupted(void); + +/* Return the current eval context's nfo (source location) object, or NULL. */ +ray_t* ray_eval_get_nfo(void); +void ray_eval_set_nfo(ray_t* nfo); + +/* Error trace: list of [span_i64, filename, fn_name, source] frames built when + * a VM error propagates without a trap. Cleared at the start of ray_eval_str. */ +ray_t* ray_get_error_trace(void); +void ray_clear_error_trace(void); + +/* Restricted mode: when true, builtins with RAY_FN_RESTRICTED are blocked. */ +void ray_eval_set_restricted(bool on); +bool ray_eval_get_restricted(void); + +/* ===== Rayfall Builtin Functions ===== */ + +/* Arithmetic */ +ray_t* ray_add_fn(ray_t* a, ray_t* b); +ray_t* ray_sub_fn(ray_t* a, ray_t* b); +ray_t* ray_mul_fn(ray_t* a, ray_t* b); +ray_t* ray_div_fn(ray_t* a, ray_t* b); +ray_t* ray_mod_fn(ray_t* a, ray_t* b); + +/* Comparison */ +ray_t* ray_gt_fn(ray_t* a, ray_t* b); +ray_t* ray_lt_fn(ray_t* a, ray_t* b); +ray_t* ray_gte_fn(ray_t* a, ray_t* b); +ray_t* ray_lte_fn(ray_t* a, ray_t* b); +ray_t* ray_eq_fn(ray_t* a, ray_t* b); +ray_t* ray_neq_fn(ray_t* a, ray_t* b); + +/* Logic */ +ray_t* ray_and_fn(ray_t* a, ray_t* b); +ray_t* ray_or_fn(ray_t* a, ray_t* b); +ray_t* ray_and_vary_fn(ray_t** args, int64_t n); +ray_t* ray_or_vary_fn(ray_t** args, int64_t n); +ray_t* ray_not_fn(ray_t* x); +ray_t* ray_neg_fn(ray_t* x); + +/* Aggregation */ +ray_t* ray_sum_fn(ray_t* x); +ray_t* ray_count_fn(ray_t* x); +ray_t* ray_avg_fn(ray_t* x); +ray_t* ray_min_fn(ray_t* x); +ray_t* ray_max_fn(ray_t* x); +ray_t* ray_first_fn(ray_t* x); +ray_t* ray_last_fn(ray_t* x); +ray_t* ray_med_fn(ray_t* x); +ray_t* ray_dev_fn(ray_t* x); +ray_t* ray_stddev_fn(ray_t* x); +ray_t* ray_stddev_pop_fn(ray_t* x); +ray_t* ray_var_fn(ray_t* x); +ray_t* ray_var_pop_fn(ray_t* x); + +/* Higher-order */ +ray_t* ray_map_fn(ray_t** args, int64_t n); +ray_t* ray_pmap_fn(ray_t** args, int64_t n); +ray_t* ray_fold_fn(ray_t** args, int64_t n); +ray_t* ray_scan_fn(ray_t** args, int64_t n); +ray_t* ray_filter_fn(ray_t* vec, ray_t* mask); +ray_t* ray_apply_fn(ray_t** args, int64_t n); + +/* Collection */ +ray_t* ray_distinct_fn(ray_t* x); +ray_t* ray_in_fn(ray_t* val, ray_t* vec); +ray_t* ray_except_fn(ray_t* vec1, ray_t* vec2); +ray_t* ray_union_fn(ray_t* vec1, ray_t* vec2); +ray_t* ray_sect_fn(ray_t* vec1, ray_t* vec2); +ray_t* ray_take_fn(ray_t* vec, ray_t* n_obj); +ray_t* ray_at_fn(ray_t* vec, ray_t* idx); +ray_t* ray_find_fn(ray_t* vec, ray_t* val); +ray_t* ray_til_fn(ray_t* x); +ray_t* ray_reverse_fn(ray_t* x); + +/* Table construction */ +ray_t* ray_list_fn(ray_t** args, int64_t n); +ray_t* ray_table_fn(ray_t* names, ray_t* cols); +ray_t* ray_key_fn(ray_t* x); +ray_t* ray_value_fn(ray_t* x); + +/* Query */ +ray_t* ray_select_fn(ray_t** args, int64_t n); +ray_t* ray_update_fn(ray_t** args, int64_t n); +ray_t* ray_insert_fn(ray_t** args, int64_t n); +ray_t* ray_upsert_fn(ray_t** args, int64_t n); +ray_t* ray_xbar_fn(ray_t* col, ray_t* bucket); + +/* Joins */ +ray_t* ray_left_join_fn(ray_t** args, int64_t n); +ray_t* ray_inner_join_fn(ray_t** args, int64_t n); +ray_t* ray_window_join_fn(ray_t** args, int64_t n); + +/* I/O */ +ray_t* ray_println_fn(ray_t** args, int64_t n); +ray_t* ray_read_csv_fn(ray_t** args, int64_t n); +ray_t* ray_write_csv_fn(ray_t** args, int64_t n); +ray_t* ray_read_file_fn(ray_t* path_obj); +ray_t* ray_write_file_fn(ray_t* path_obj, ray_t* content); + +/* Vector similarity / embeddings / HNSW. + * cos-dist and l2-dist return distance (lower = closer); inner-prod is + * the raw mathematical dot product. */ +ray_t* ray_cos_dist_fn(ray_t* a, ray_t* b); +ray_t* ray_inner_prod_fn(ray_t* a, ray_t* b); +ray_t* ray_l2_dist_fn(ray_t* a, ray_t* b); +ray_t* ray_norm_fn(ray_t* x); +ray_t* ray_knn_fn(ray_t** args, int64_t n); +ray_t* ray_hnsw_build_fn(ray_t** args, int64_t n); +ray_t* ray_ann_fn(ray_t** args, int64_t n); +ray_t* ray_hnsw_free_fn(ray_t* h); +ray_t* ray_hnsw_save_fn(ray_t* h, ray_t* path); +ray_t* ray_hnsw_load_fn(ray_t* path); +ray_t* ray_hnsw_info_fn(ray_t* h); + +/* Cast and type */ +ray_t* ray_cast_fn(ray_t* type_sym, ray_t* val); +ray_t* ray_type_fn(ray_t* val); + +/* Special forms */ +ray_t* ray_set_fn(ray_t* name_obj, ray_t* val_expr); +ray_t* ray_let_fn(ray_t* name_obj, ray_t* val_expr); +ray_t* ray_cond_fn(ray_t** args, int64_t n); +ray_t* ray_do_fn(ray_t** args, int64_t n); +ray_t* ray_fn(ray_t** args, int64_t n); +ray_t* ray_raise_fn(ray_t* val); +ray_t* ray_try_fn(ray_t* expr, ray_t* handler_expr); + + +#endif /* RAY_EVAL_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/lang/format.c b/crates/rayforce-sys/vendor/rayforce/src/lang/format.c new file mode 100644 index 0000000..dc88fe8 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/lang/format.c @@ -0,0 +1,1074 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "lang/format.h" +#include "lang/env.h" +#include "table/sym.h" +#include "lang/eval.h" +#include "ops/ops.h" /* RAY_LAZY, ray_lazy_materialize */ +#include "mem/heap.h" +#include +#include +#include +#include +#include +#include + +/* ===== Internal growable buffer ===== */ + +typedef struct { + char* buf; + int32_t len; + int32_t cap; + ray_t* block; /* ray_alloc'd backing block */ +} fmt_buf_t; + +static void fmt_init(fmt_buf_t* b) { + b->block = ray_alloc(256); + b->buf = (char*)ray_data(b->block); + b->len = 0; + b->cap = 256; +} + +static void fmt_destroy(fmt_buf_t* b) { + if (b->block) { + ray_free(b->block); + b->block = NULL; + b->buf = NULL; + b->len = 0; + b->cap = 0; + } +} + +static void fmt_ensure(fmt_buf_t* b, int32_t extra) { + if (b->len + extra <= b->cap) return; + int32_t new_cap = b->cap; + while (new_cap < b->len + extra) + new_cap *= 2; + ray_t* new_block = ray_alloc((size_t)new_cap); + char* new_buf = (char*)ray_data(new_block); + memcpy(new_buf, b->buf, (size_t)b->len); + ray_free(b->block); + b->block = new_block; + b->buf = new_buf; + b->cap = new_cap; +} + +static void fmt_putc(fmt_buf_t* b, char c) { + fmt_ensure(b, 1); + b->buf[b->len++] = c; +} + +static void fmt_puts(fmt_buf_t* b, const char* s) { + int32_t slen = (int32_t)strlen(s); + fmt_ensure(b, slen); + memcpy(b->buf + b->len, s, (size_t)slen); + b->len += slen; +} + +static void fmt_printf(fmt_buf_t* b, const char* fmt, ...) { + va_list ap; + + /* Try to fit in remaining space first */ + va_start(ap, fmt); + int32_t avail = b->cap - b->len; + int n = vsnprintf(b->buf + b->len, (size_t)avail, fmt, ap); + va_end(ap); + + if (n < 0) return; /* encoding error */ + + if (n < avail) { + b->len += n; + return; + } + + /* Need more space — grow and retry */ + fmt_ensure(b, n + 1); + va_start(ap, fmt); + vsnprintf(b->buf + b->len, (size_t)(b->cap - b->len), fmt, ap); + va_end(ap); + b->len += n; +} + +static void fmt_putn(fmt_buf_t* b, const char* s, int32_t n) { + fmt_ensure(b, n); + memcpy(b->buf + b->len, s, (size_t)n); + b->len += n; +} + +static ray_t* fmt_to_str(fmt_buf_t* b) { + ray_t* result = ray_str(b->buf, (size_t)b->len); + fmt_destroy(b); + return result; +} + +/* ===== Static globals ===== */ + +static int g_precision = FMT_DEFAULT_PRECISION; +static int g_row_width = FMT_DEFAULT_ROW_WIDTH; + +/* ===== Public API ===== */ + +void ray_fmt_set_precision(int digits) { + if (digits >= 0 && digits <= 20) + g_precision = digits; +} + +void ray_fmt_set_width(int cols) { + if (cols > 0) + g_row_width = cols; +} + +/* Single type-name function. Negative type (atom) → lowercase, + * positive type (vector/collection) → uppercase. */ +const char* ray_type_name(int8_t type) { + switch (type < 0 ? -type : type) { + case RAY_BOOL: return type < 0 ? "b8" : "B8"; + case RAY_U8: return type < 0 ? "u8" : "U8"; + case RAY_I16: return type < 0 ? "i16" : "I16"; + case RAY_I32: return type < 0 ? "i32" : "I32"; + case RAY_I64: return type < 0 ? "i64" : "I64"; + case RAY_F32: return type < 0 ? "f32" : "F32"; + case RAY_F64: return type < 0 ? "f64" : "F64"; + case RAY_DATE: return type < 0 ? "date" : "DATE"; + case RAY_TIME: return type < 0 ? "time" : "TIME"; + case RAY_TIMESTAMP: return type < 0 ? "timestamp" : "TIMESTAMP"; + case RAY_SYM: return type < 0 ? "sym" : "SYM"; + case RAY_STR: return type < 0 ? "str" : "STR"; + case RAY_GUID: return type < 0 ? "guid" : "GUID"; + case RAY_TABLE: return "TABLE"; + case RAY_DICT: return "DICT"; + case RAY_LIST: return "LIST"; + case RAY_INDEX: return "INDEX"; + default: return "?"; + } +} + +/* ===== Atom formatters ===== */ + +static void fmt_bool(fmt_buf_t* b, uint8_t val) { + fmt_puts(b, val ? "true" : "false"); +} + +static void fmt_u8(fmt_buf_t* b, uint8_t val) { + fmt_printf(b, "0x%02x", val); +} + + +static void fmt_i16(fmt_buf_t* b, int16_t val) { + fmt_printf(b, "%d", (int)val); +} + +static void fmt_i32(fmt_buf_t* b, int32_t val) { + fmt_printf(b, "%d", (int)val); +} + +static void fmt_i64(fmt_buf_t* b, int64_t val) { + fmt_printf(b, "%" PRId64, val); +} + +static void fmt_f64(fmt_buf_t* b, double val) { + if (val == -0.0 && signbit(val)) val = 0.0; /* normalize -0.0 */ + if (val == 0.0) { + /* Zero: format as "0.0" (after trailing-zero strip) */ + char tmp[16]; + int n = snprintf(tmp, sizeof(tmp), "%.*f", g_precision, 0.0); + char* dot = strchr(tmp, '.'); + if (dot) { char* end = tmp + n - 1; while (end > dot + 1 && *end == '0') end--; n = (int)(end - tmp + 1); } + fmt_putn(b, tmp, (int32_t)n); + return; + } + double absval = val < 0 ? -val : val; + double order = log10(absval); + + /* Format with requested precision */ + char tmp[64]; + int n; + if (val != 0.0 && (order > 6 || order < -1)) + n = snprintf(tmp, sizeof(tmp), "%.*e", g_precision, val); + else + n = snprintf(tmp, sizeof(tmp), "%.*f", g_precision, val); + + if (n <= 0 || n >= (int)sizeof(tmp)) { + fmt_puts(b, "?"); + return; + } + + /* Strip trailing zeros after decimal point, keeping at least one + * digit after '.'. Do NOT touch exponential notation. */ + char* dot = strchr(tmp, '.'); + char* e = strchr(tmp, 'e'); + if (dot && !e) { + char* end = tmp + n - 1; + while (end > dot + 1 && *end == '0') + end--; + n = (int)(end - tmp + 1); + } + + fmt_putn(b, tmp, (int32_t)n); +} + +static void fmt_f32(fmt_buf_t* b, float val) { + fmt_f64(b, (double)val); +} + +static void fmt_guid(fmt_buf_t* b, const uint8_t* bytes) { + static const char hex[] = "0123456789abcdef"; + /* Format: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx */ + static const int groups[] = {4, 2, 2, 2, 6}; + int pos = 0; + for (int g = 0; g < 5; g++) { + if (g > 0) fmt_putc(b, '-'); + for (int j = 0; j < groups[g]; j++) { + fmt_putc(b, hex[bytes[pos] >> 4]); + fmt_putc(b, hex[bytes[pos] & 0x0F]); + pos++; + } + } +} + +static void fmt_sym(fmt_buf_t* b, int64_t sym_id) { + ray_t* s = ray_sym_str(sym_id); + if (s && !RAY_IS_ERR(s)) { + const char* p = ray_str_ptr(s); + size_t n = ray_str_len(s); + fmt_putn(b, p, (int32_t)n); + ray_release(s); + } else { + fmt_puts(b, "0Ns"); + } +} + +/* ===== Date/time/timestamp helpers ===== */ + +#include "lang/cal.h" + +static void time_to_hms(int32_t ms, int* h, int* min, int* s, int* ms_out) { + int32_t mask = ms >> 31; + int32_t val = (mask ^ ms) - mask; /* absolute value */ + + int32_t secs = val / 1000; + *ms_out = (int)(val % 1000); + *h = (int)(secs / 3600); + int32_t rem = secs % 3600; + *min = (int)(rem / 60); + *s = (int)(rem % 60); +} + +#define NSECS_IN_DAY ((int64_t)24 * 60 * 60 * 1000000000LL) + +static void ts_to_parts(int64_t ns, int* y, int* mo, int* d, + int* h, int* mi, int* s, int* nanos) { + int64_t days = ns / NSECS_IN_DAY; + int64_t span = ns % NSECS_IN_DAY; + + if (span < 0) { + days -= 1; + span += NSECS_IN_DAY; + } + + date_to_ymd((int32_t)days, y, mo, d); + + /* timespan_from_nanos */ + int64_t secs = span / 1000000000LL; + *nanos = (int)(span % 1000000000LL); + *h = (int)(secs / 3600); + int64_t rem = secs % 3600; + *mi = (int)(rem / 60); + *s = (int)(rem % 60); +} + +static void fmt_date(fmt_buf_t* b, int32_t val) { + int y, m, d; + date_to_ymd(val, &y, &m, &d); + fmt_printf(b, "%04d.%02d.%02d", y, m, d); +} + +static void fmt_time(fmt_buf_t* b, int32_t val) { + int h, m, s, ms; + time_to_hms(val, &h, &m, &s, &ms); + if (val < 0) fmt_putc(b, '-'); + fmt_printf(b, "%02d:%02d:%02d.%03d", h, m, s, ms); +} + +static void fmt_timestamp(fmt_buf_t* b, int64_t val) { + int y, mo, d, h, mi, s, ns; + ts_to_parts(val, &y, &mo, &d, &h, &mi, &s, &ns); + fmt_printf(b, "%04d.%02d.%02dD%02d:%02d:%02d.%09d", y, mo, d, h, mi, s, ns); +} + +static void fmt_str_atom(fmt_buf_t* b, ray_t* obj, int full) { + (void)full; + const char* p = ray_str_ptr(obj); + size_t n = ray_str_len(obj); + fmt_putc(b, '"'); + fmt_putn(b, p, (int32_t)n); + fmt_putc(b, '"'); +} + +/* ===== Forward declarations ===== */ + +static void fmt_obj(fmt_buf_t* b, ray_t* obj, int mode); + +/* ===== Null literal display (type → "0Nx" string) ===== */ + +static const char* null_literal(int8_t type) { + switch (type) { + case RAY_BOOL: return "0Nb"; + case RAY_U8: return "0Nu"; + case RAY_I16: return "0Nh"; + case RAY_I32: return "0Ni"; + case RAY_I64: return "0Nl"; + case RAY_F64: return "0Nf"; + case RAY_F32: return "0Ne"; + case RAY_DATE: return "0Nd"; + case RAY_TIME: return "0Nt"; + case RAY_TIMESTAMP: return "0Np"; + case RAY_SYM: return "0Ns"; + case RAY_STR: return "0Nc"; + case RAY_GUID: return "0Ng"; + default: return "null"; + } +} + +/* ===== Vector element formatter ===== */ + +static void fmt_raw_elem(fmt_buf_t* b, ray_t* vec, int64_t idx) { + /* Check for null */ + if (ray_vec_is_null(vec, idx)) { + fmt_puts(b, null_literal(vec->type)); + return; + } + + switch (vec->type) { + case RAY_BOOL: fmt_bool(b, ((bool*)ray_data(vec))[idx]); break; + case RAY_U8: fmt_u8(b, ((uint8_t*)ray_data(vec))[idx]); break; + + case RAY_I16: fmt_i16(b, ((int16_t*)ray_data(vec))[idx]); break; + case RAY_I32: fmt_i32(b, ((int32_t*)ray_data(vec))[idx]); break; + case RAY_I64: fmt_i64(b, ((int64_t*)ray_data(vec))[idx]); break; + case RAY_F32: fmt_f32(b, ((float*)ray_data(vec))[idx]); break; + case RAY_F64: fmt_f64(b, ((double*)ray_data(vec))[idx]); break; + case RAY_DATE: fmt_date(b, ((int32_t*)ray_data(vec))[idx]); break; + case RAY_TIME: fmt_time(b, ((int32_t*)ray_data(vec))[idx]); break; + case RAY_TIMESTAMP: fmt_timestamp(b, ((int64_t*)ray_data(vec))[idx]); break; + case RAY_SYM: { + int64_t sym_id = ray_read_sym(ray_data(vec), idx, vec->type, vec->attrs); + fmt_sym(b, sym_id); + break; + } + case RAY_STR: { + size_t slen = 0; + const char* p = ray_str_vec_get(vec, idx, &slen); + if (p) { + fmt_putc(b, '"'); + fmt_putn(b, p, (int32_t)slen); + fmt_putc(b, '"'); + } + break; + } + case RAY_GUID: + fmt_guid(b, ((uint8_t*)ray_data(vec)) + idx * 16); + break; + case RAY_LIST: { + ray_t* child = ((ray_t**)ray_data(vec))[idx]; + if (child) { + ray_t* s = ray_fmt(child, 1); + if (s && !RAY_IS_ERR(s)) { + fmt_putn(b, ray_str_ptr(s), (int32_t)ray_str_len(s)); + ray_release(s); + } else { + fmt_puts(b, "?"); + } + } else { + fmt_puts(b, "null"); + } + break; + } + default: + fmt_puts(b, "?"); + break; + } +} + +/* ===== Vector formatter ===== */ + +static void fmt_vector(fmt_buf_t* b, ray_t* vec, int limit) { + int64_t len = ray_len(vec); + if (len == 0) { fmt_puts(b, "[]"); return; } + + fmt_puts(b, "["); + int32_t start_len = b->len; + + for (int64_t i = 0; i < len; i++) { + if (i > 0) fmt_putc(b, ' '); + + int32_t before = b->len; + fmt_raw_elem(b, vec, i); + + /* Width limiting: check if we exceeded the limit */ + if (limit > 0 && (b->len - start_len) > limit) { + /* Rewind to before this element and truncate */ + b->len = before; + fmt_puts(b, "..]"); + return; + } + } + + fmt_puts(b, "]"); +} + +/* ===== List formatter ===== */ + +static void fmt_list(fmt_buf_t* b, ray_t* list, int mode) { + int64_t len = ray_len(list); + if (len == 0) { fmt_puts(b, "()"); return; } + + /* Homogeneous atom list → format as vector [...] */ + ray_t** items = (ray_t**)ray_data(list); + if (items && len > 0 && items[0] && !RAY_IS_ERR(items[0]) && ray_is_atom(items[0])) { + int8_t first_type = items[0]->type; + int homogeneous = 1; + for (int64_t i = 1; i < len; i++) { + if (!items[i] || RAY_IS_ERR(items[i]) || items[i]->type != first_type) { + homogeneous = 0; break; + } + } + if (homogeneous) { + fmt_puts(b, "["); + for (int64_t i = 0; i < len; i++) { + if (i > 0) fmt_putc(b, ' '); + fmt_obj(b, items[i], mode); + } + fmt_puts(b, "]"); + return; + } + } + + /* mode 0 = compact/round-trippable: "(list ...)" prefix required + * mode 1 = REPL display: "(...)" matching rayforce 1 output */ + if (mode == 0) + fmt_puts(b, "(list "); + else + fmt_puts(b, "("); + + int64_t max_elems = (mode == 1) ? FMT_LIST_MAX_HEIGHT : len; + int64_t show = len < max_elems ? len : max_elems; + + for (int64_t i = 0; i < show; i++) { + if (i > 0) fmt_putc(b, ' '); + ray_t* elem = ray_list_get(list, i); + fmt_obj(b, elem, mode); + } + + if (len > show) fmt_puts(b, " .."); + fmt_puts(b, ")"); +} + +/* ===== Dict formatter ===== */ + +static void fmt_dict(fmt_buf_t* b, ray_t* dict, int mode) { + ray_t* keys = ray_dict_keys(dict); + ray_t* vals = ray_dict_vals(dict); + int64_t npairs = keys ? keys->len : 0; + if (npairs == 0) { fmt_puts(b, "{}"); return; } + + int64_t max_pairs = (mode == 1) ? FMT_LIST_MAX_HEIGHT : npairs; + int64_t show = npairs < max_pairs ? npairs : max_pairs; + + fmt_puts(b, "{"); + for (int64_t i = 0; i < show; i++) { + if (i > 0) fmt_putc(b, ' '); + /* Render key: synthesize an atom view from the keys vector. When + * the source slot is flagged null in the keys' bitmap, set the + * synthesized atom's nullmap bit 0 so fmt_obj renders the proper + * null literal. Without this, nullable GUID/STR/sym keys render + * as their underlying bytes (e.g. the 16-zero-byte GUID), losing + * null semantics. */ + bool k_is_null = (keys->type != RAY_LIST) && ray_vec_is_null(keys, i); + ray_t k_atom_storage; + ray_t* k_atom = NULL; + memset(&k_atom_storage, 0, sizeof(k_atom_storage)); + bool k_owned = false; /* true if k_atom is a fresh allocation */ + if (keys->type == RAY_SYM) { + k_atom_storage.type = -RAY_SYM; + k_atom_storage.i64 = ray_read_sym(ray_data(keys), i, RAY_SYM, keys->attrs); + k_atom = &k_atom_storage; + } else if (keys->type == RAY_STR) { + size_t slen = 0; + const char* sp = ray_str_vec_get(keys, i, &slen); + k_atom = ray_str(sp ? sp : "", sp ? slen : 0); + k_owned = true; + } else if (keys->type == RAY_I64 || keys->type == RAY_TIMESTAMP) { + k_atom_storage.type = (int8_t)-keys->type; + k_atom_storage.i64 = ((int64_t*)ray_data(keys))[i]; + k_atom = &k_atom_storage; + } else if (keys->type == RAY_I32 || keys->type == RAY_DATE || keys->type == RAY_TIME) { + k_atom_storage.type = (int8_t)-keys->type; + k_atom_storage.i32 = ((int32_t*)ray_data(keys))[i]; + k_atom = &k_atom_storage; + } else if (keys->type == RAY_I16) { + k_atom_storage.type = -RAY_I16; + k_atom_storage.i16 = ((int16_t*)ray_data(keys))[i]; + k_atom = &k_atom_storage; + } else if (keys->type == RAY_BOOL || keys->type == RAY_U8) { + k_atom_storage.type = (int8_t)-keys->type; + k_atom_storage.u8 = ((uint8_t*)ray_data(keys))[i]; + k_atom = &k_atom_storage; + } else if (keys->type == RAY_F64) { + k_atom_storage.type = -RAY_F64; + k_atom_storage.f64 = ((double*)ray_data(keys))[i]; + k_atom = &k_atom_storage; + } else if (keys->type == RAY_F32) { + k_atom_storage.type = -RAY_F32; + k_atom_storage.f64 = (double)((float*)ray_data(keys))[i]; + k_atom = &k_atom_storage; + } else if (keys->type == RAY_GUID) { + /* GUID atoms keep their 16-byte payload in a heap-allocated + * child block; the stack-local view trick from the other + * branches doesn't carry the bytes (fmt_obj would deref a + * bogus inline data[] pointer). Build a real atom. */ + k_atom = ray_guid(((const uint8_t*)ray_data(keys)) + i * 16); + k_owned = (k_atom && !RAY_IS_ERR(k_atom)); + } else if (keys->type == RAY_LIST) { + /* Borrowed — do NOT release. */ + k_atom = ((ray_t**)ray_data(keys))[i]; + } + if (k_is_null && k_atom) k_atom->nullmap[0] |= 1; + if (k_atom) fmt_obj(b, k_atom, mode); + fmt_putc(b, ':'); + + /* Render value: borrow from vals (LIST) or synthesize a typed atom + * directly from index i (do NOT route through k_atom — for STR keys + * k_atom is a fresh allocation we'll release just below). */ + if (vals && vals->type == RAY_LIST) { + ray_t* v = ray_list_get(vals, i); + fmt_obj(b, v, mode); + } else if (vals && i < vals->len) { + bool v_is_null = ray_vec_is_null(vals, i); + ray_t v_storage; memset(&v_storage, 0, sizeof(v_storage)); + ray_t* v_atom = NULL; + bool v_owned = false; + switch (vals->type) { + case RAY_BOOL: + case RAY_U8: v_storage.type = (int8_t)-vals->type; + v_storage.u8 = ((uint8_t*)ray_data(vals))[i]; + v_atom = &v_storage; break; + case RAY_I16: v_storage.type = -RAY_I16; + v_storage.i16 = ((int16_t*)ray_data(vals))[i]; + v_atom = &v_storage; break; + case RAY_I32: + case RAY_DATE: + case RAY_TIME: v_storage.type = (int8_t)-vals->type; + v_storage.i32 = ((int32_t*)ray_data(vals))[i]; + v_atom = &v_storage; break; + case RAY_I64: + case RAY_TIMESTAMP: v_storage.type = (int8_t)-vals->type; + v_storage.i64 = ((int64_t*)ray_data(vals))[i]; + v_atom = &v_storage; break; + case RAY_F32: v_storage.type = -RAY_F32; + v_storage.f64 = (double)((float*)ray_data(vals))[i]; + v_atom = &v_storage; break; + case RAY_F64: v_storage.type = -RAY_F64; + v_storage.f64 = ((double*)ray_data(vals))[i]; + v_atom = &v_storage; break; + case RAY_SYM: v_storage.type = -RAY_SYM; + v_storage.i64 = ray_read_sym(ray_data(vals), i, RAY_SYM, vals->attrs); + v_atom = &v_storage; break; + case RAY_STR: { + size_t vl = 0; + const char* vp = ray_str_vec_get(vals, i, &vl); + v_atom = ray_str(vp ? vp : "", vp ? vl : 0); + v_owned = true; + break; + } + case RAY_GUID: + v_atom = ray_guid(((const uint8_t*)ray_data(vals)) + i * 16); + v_owned = (v_atom && !RAY_IS_ERR(v_atom)); + break; + default: break; + } + if (v_is_null && v_atom) v_atom->nullmap[0] |= 1; + if (v_atom) fmt_obj(b, v_atom, mode); + if (v_owned && v_atom) ray_release(v_atom); + } + + if (k_owned && k_atom) ray_release(k_atom); + } + if (npairs > show) fmt_puts(b, " .."); + fmt_puts(b, "}"); +} + +/* ===== Box-drawing glyphs (UTF-8) ===== */ + +#define G_TL "\xe2\x94\x8c" /* ┌ */ +#define G_TR "\xe2\x94\x90" /* ┐ */ +#define G_BL "\xe2\x94\x94" /* └ */ +#define G_BR "\xe2\x94\x98" /* ┘ */ +#define G_H "\xe2\x94\x80" /* ─ */ +#define G_V "\xe2\x94\x82" /* │ */ +#define G_TT "\xe2\x94\xac" /* ┬ */ +#define G_BT "\xe2\x94\xb4" /* ┴ */ +#define G_LT "\xe2\x94\x9c" /* ├ */ +#define G_RT "\xe2\x94\xa4" /* ┤ */ +#define G_X "\xe2\x94\xbc" /* ┼ */ +#define G_HDOTS "\xe2\x80\xa6" /* … */ +#define G_VDOTS "\xe2\x94\x86" /* ┆ */ + +/* ===== Table formatter helpers ===== */ + +static void fmt_centered(fmt_buf_t* b, const char* s, int32_t slen, int32_t width) { + int32_t left = (width - slen) / 2; + int32_t right = width - slen - left; + for (int32_t i = 0; i < left; i++) fmt_putc(b, ' '); + fmt_putn(b, s, slen); + for (int32_t i = 0; i < right; i++) fmt_putc(b, ' '); +} + +/* Maximum pre-formatted cells: FMT_TABLE_MAX_WIDTH * FMT_TABLE_MAX_HEIGHT = 200 */ +#define FMT_CELL_BUF_SIZE 64 + +typedef struct { + char str[FMT_CELL_BUF_SIZE]; + int32_t len; +} fmt_cell_t; + +static void fmt_table(fmt_buf_t* b, ray_t* tbl, int mode) { + int64_t ncols = ray_table_ncols(tbl); + int64_t nrows = ray_table_nrows(tbl); + + /* Compact mode: round-trippable (table [names] (list col1 col2 ...)) */ + if (mode == 0) { + fmt_puts(b, "(table ["); + for (int64_t i = 0; i < ncols; i++) { + if (i > 0) fmt_putc(b, ' '); + int64_t name_id = ray_table_col_name(tbl, i); + ray_t* name_str = ray_sym_str(name_id); + if (name_str && !RAY_IS_ERR(name_str)) { + fmt_putn(b, ray_str_ptr(name_str), (int32_t)ray_str_len(name_str)); + ray_release(name_str); + } + } + fmt_puts(b, "] (list "); + for (int64_t i = 0; i < ncols; i++) { + if (i > 0) fmt_putc(b, ' '); + ray_t* col = ray_table_get_col_idx(tbl, i); + if (col) { + fmt_obj(b, col, mode); + } + } + fmt_puts(b, "))"); + return; + } + + /* Full mode (1) and show mode (2) */ + int64_t table_width = ncols; + int64_t table_height = nrows; + + if (mode == 1) { + if (table_width > FMT_TABLE_MAX_WIDTH) + table_width = FMT_TABLE_MAX_WIDTH; + if (table_height > FMT_TABLE_MAX_HEIGHT) + table_height = FMT_TABLE_MAX_HEIGHT; + } + + if (table_width == 0) { + fmt_puts(b, ""); + return; + } + + bool has_hidden_cols = (table_width < ncols); + bool has_hidden_rows = (table_height < nrows); + + /* Allocate metadata arrays. For mode 1 they fit on the stack + * (max 10 cols x 20 rows). For mode 2 we heap-allocate. */ + bool heap_alloc = (table_width > FMT_TABLE_MAX_WIDTH || + table_height > FMT_TABLE_MAX_HEIGHT); + + int32_t col_widths_stack[FMT_TABLE_MAX_WIDTH]; + const char* col_names_stack[FMT_TABLE_MAX_WIDTH]; + int32_t col_name_lens_stack[FMT_TABLE_MAX_WIDTH]; + const char* col_types_stack[FMT_TABLE_MAX_WIDTH]; + int32_t col_type_lens_stack[FMT_TABLE_MAX_WIDTH]; + ray_t* name_refs_stack[FMT_TABLE_MAX_WIDTH]; + fmt_cell_t cells_stack[FMT_TABLE_MAX_WIDTH * FMT_TABLE_MAX_HEIGHT]; + + /* Heap-backed pointers (NULL when using stack) */ + ray_t* heap_widths_blk = NULL; + ray_t* heap_names_blk = NULL; + ray_t* heap_nlen_blk = NULL; + ray_t* heap_types_blk = NULL; + ray_t* heap_tlen_blk = NULL; + ray_t* heap_refs_blk = NULL; + ray_t* heap_cells_blk = NULL; + + int32_t* col_widths; + const char** col_names; + int32_t* col_name_lens; + const char** col_types; + int32_t* col_type_lens; + ray_t** name_refs; + fmt_cell_t* cells; + + if (!heap_alloc) { + col_widths = col_widths_stack; + col_names = col_names_stack; + col_name_lens = col_name_lens_stack; + col_types = col_types_stack; + col_type_lens = col_type_lens_stack; + name_refs = name_refs_stack; + cells = cells_stack; + } else { + heap_widths_blk = ray_alloc((size_t)(table_width * (int64_t)sizeof(int32_t))); + heap_names_blk = ray_alloc((size_t)(table_width * (int64_t)sizeof(const char*))); + heap_nlen_blk = ray_alloc((size_t)(table_width * (int64_t)sizeof(int32_t))); + heap_types_blk = ray_alloc((size_t)(table_width * (int64_t)sizeof(const char*))); + heap_tlen_blk = ray_alloc((size_t)(table_width * (int64_t)sizeof(int32_t))); + heap_refs_blk = ray_alloc((size_t)(table_width * (int64_t)sizeof(ray_t*))); + heap_cells_blk = ray_alloc((size_t)(table_width * table_height * (int64_t)sizeof(fmt_cell_t))); + + col_widths = (int32_t*)ray_data(heap_widths_blk); + col_names = (const char**)ray_data(heap_names_blk); + col_name_lens = (int32_t*)ray_data(heap_nlen_blk); + col_types = (const char**)ray_data(heap_types_blk); + col_type_lens = (int32_t*)ray_data(heap_tlen_blk); + name_refs = (ray_t**)ray_data(heap_refs_blk); + cells = (fmt_cell_t*)ray_data(heap_cells_blk); + } + + /* Pre-format cells and calculate column widths */ + for (int64_t ci = 0; ci < table_width; ci++) { + /* Column name */ + int64_t name_id = ray_table_col_name(tbl, ci); + ray_t* name_str = ray_sym_str(name_id); + name_refs[ci] = name_str; + if (name_str && !RAY_IS_ERR(name_str)) { + col_names[ci] = ray_str_ptr(name_str); + col_name_lens[ci] = (int32_t)ray_str_len(name_str); + } else { + col_names[ci] = "?"; + col_name_lens[ci] = 1; + name_refs[ci] = NULL; + } + + /* Column type */ + ray_t* col_vec = ray_table_get_col_idx(tbl, ci); + const char* tname = ray_type_name(col_vec ? col_vec->type : 0); + col_types[ci] = tname; + col_type_lens[ci] = (int32_t)strlen(tname); + + /* Start with max of name and type lengths */ + int32_t max_w = col_name_lens[ci]; + if (col_type_lens[ci] > max_w) max_w = col_type_lens[ci]; + + int64_t col_len = col_vec ? ray_len(col_vec) : 0; + + /* Format first half (head rows) */ + int64_t half = table_height / 2; + for (int64_t ri = 0; ri < half; ri++) { + fmt_cell_t* cell = &cells[ci * table_height + ri]; + if (ri < col_len) { + fmt_buf_t tmp; + fmt_init(&tmp); + fmt_raw_elem(&tmp, col_vec, ri); + int32_t clen = tmp.len < FMT_CELL_BUF_SIZE - 1 ? tmp.len : FMT_CELL_BUF_SIZE - 1; + memcpy(cell->str, tmp.buf, (size_t)clen); + cell->str[clen] = '\0'; + cell->len = clen; + fmt_destroy(&tmp); + } else { + memcpy(cell->str, "NA", 3); + cell->len = 2; + } + if (cell->len > max_w) max_w = cell->len; + } + + /* Format second half (tail rows) */ + for (int64_t ri = half; ri < table_height; ri++) { + fmt_cell_t* cell = &cells[ci * table_height + ri]; + int64_t src_idx; + if (table_height == col_len || !has_hidden_rows) { + src_idx = ri; + } else { + src_idx = col_len - table_height + ri; + } + if (src_idx >= 0 && src_idx < col_len) { + fmt_buf_t tmp; + fmt_init(&tmp); + fmt_raw_elem(&tmp, col_vec, src_idx); + int32_t clen = tmp.len < FMT_CELL_BUF_SIZE - 1 ? tmp.len : FMT_CELL_BUF_SIZE - 1; + memcpy(cell->str, tmp.buf, (size_t)clen); + cell->str[clen] = '\0'; + cell->len = clen; + fmt_destroy(&tmp); + } else { + memcpy(cell->str, "NA", 3); + cell->len = 2; + } + if (cell->len > max_w) max_w = cell->len; + } + + col_widths[ci] = max_w + 2; /* +2 for padding (1 space each side) */ + } + + /* Calculate total width (sum of col widths + separators between columns) */ + int32_t total_width = 0; + for (int64_t ci = 0; ci < table_width; ci++) + total_width += col_widths[ci]; + total_width += (int32_t)(table_width - 1); /* separators between columns */ + + /* Format footer to check if we need to widen the last column */ + char footer[128]; + int footer_len = snprintf(footer, sizeof(footer), + " %" PRId64 " rows (%" PRId64 " shown) %" PRId64 " columns (%" PRId64 " shown)", + nrows, table_height, ncols, table_width); + + if (total_width < footer_len) { + col_widths[table_width - 1] += footer_len - total_width; + total_width = footer_len; + } + + /* Extra width for hidden columns indicator */ + if (has_hidden_cols) + total_width += 4; /* "───┐" or " … │" */ + + /* === Render === */ + + /* 1. Top border: ┌──┬──┐ */ + fmt_puts(b, G_TL); + for (int64_t ci = 0; ci < table_width; ci++) { + for (int32_t j = 0; j < col_widths[ci]; j++) + fmt_puts(b, G_H); + if (ci < table_width - 1 || has_hidden_cols) + fmt_puts(b, G_TT); + else + fmt_puts(b, G_TR); + } + if (has_hidden_cols) { + fmt_puts(b, G_H G_H G_H G_TR); + } + + /* 2. Header row: │ name │ (centered) */ + fmt_putc(b, '\n'); + fmt_puts(b, G_V); + for (int64_t ci = 0; ci < table_width; ci++) { + fmt_centered(b, col_names[ci], col_name_lens[ci], col_widths[ci]); + fmt_puts(b, G_V); + } + if (has_hidden_cols) { + fmt_puts(b, " " G_HDOTS " " G_V); + } + + /* 3. Type row: │ type │ (centered) */ + fmt_putc(b, '\n'); + fmt_puts(b, G_V); + for (int64_t ci = 0; ci < table_width; ci++) { + fmt_centered(b, col_types[ci], col_type_lens[ci], col_widths[ci]); + fmt_puts(b, G_V); + } + if (has_hidden_cols) { + fmt_puts(b, " " G_HDOTS " " G_V); + } + + /* 4. Separator: ├──┼──┤ */ + fmt_putc(b, '\n'); + fmt_puts(b, G_LT); + for (int64_t ci = 0; ci < table_width; ci++) { + for (int32_t j = 0; j < col_widths[ci]; j++) + fmt_puts(b, G_H); + if (ci < table_width - 1 || has_hidden_cols) + fmt_puts(b, G_X); + else + fmt_puts(b, G_RT); + } + if (has_hidden_cols) { + fmt_puts(b, G_H G_H G_H G_RT); + } + + /* 5. Data rows */ + int64_t half = table_height / 2; + for (int64_t ri = 0; ri < table_height; ri++) { + fmt_putc(b, '\n'); + + /* 6. Truncation indicator row between head and tail */ + if (has_hidden_rows && ri == half) { + fmt_puts(b, G_VDOTS); + for (int64_t ci = 0; ci < table_width; ci++) { + /* Center the ellipsis (3 bytes, 1 display char) */ + int32_t left = (col_widths[ci] - 1) / 2; + int32_t right = col_widths[ci] - 1 - left; + for (int32_t p = 0; p < left; p++) fmt_putc(b, ' '); + fmt_puts(b, G_HDOTS); + for (int32_t p = 0; p < right; p++) fmt_putc(b, ' '); + fmt_puts(b, G_VDOTS); + } + if (has_hidden_cols) { + fmt_puts(b, " " G_HDOTS " " G_VDOTS); + } + fmt_putc(b, '\n'); + } + + /* Data row: │ val │ (left-aligned with 1-space padding) */ + fmt_puts(b, G_V); + for (int64_t ci = 0; ci < table_width; ci++) { + fmt_cell_t* cell = &cells[ci * table_height + ri]; + fmt_putc(b, ' '); + fmt_putn(b, cell->str, cell->len); + int32_t pad = col_widths[ci] - cell->len - 1; + for (int32_t p = 0; p < pad; p++) + fmt_putc(b, ' '); + fmt_puts(b, G_V); + } + if (has_hidden_cols) { + fmt_puts(b, " " G_HDOTS " " G_V); + } + } + + /* 7. Bottom border (separator before footer): ├──┴──┤ */ + fmt_putc(b, '\n'); + fmt_puts(b, G_LT); + for (int64_t ci = 0; ci < table_width; ci++) { + for (int32_t j = 0; j < col_widths[ci]; j++) + fmt_puts(b, G_H); + if (ci < table_width - 1 || has_hidden_cols) + fmt_puts(b, G_BT); + else + fmt_puts(b, G_RT); + } + if (has_hidden_cols) { + fmt_puts(b, G_H G_H G_H G_RT); + } + + /* 8. Footer row: │ N rows (M shown) C columns (K shown) │ */ + fmt_putc(b, '\n'); + fmt_puts(b, G_V); + fmt_putn(b, footer, footer_len); + for (int32_t i = footer_len; i < total_width; i++) + fmt_putc(b, ' '); + fmt_puts(b, G_V); + + /* Final bottom border: └───┘ */ + fmt_putc(b, '\n'); + fmt_puts(b, G_BL); + for (int32_t i = 0; i < total_width; i++) + fmt_puts(b, G_H); + fmt_puts(b, G_BR); + + /* Release name string refs */ + for (int64_t ci = 0; ci < table_width; ci++) { + if (name_refs[ci]) ray_release(name_refs[ci]); + } + + /* Free heap allocations if used */ + if (heap_alloc) { + ray_free(heap_widths_blk); + ray_free(heap_names_blk); + ray_free(heap_nlen_blk); + ray_free(heap_types_blk); + ray_free(heap_tlen_blk); + ray_free(heap_refs_blk); + ray_free(heap_cells_blk); + } +} + +/* ===== Core dispatch ===== */ + +static void fmt_obj(fmt_buf_t* b, ray_t* obj, int mode) { + if (!obj || RAY_IS_NULL(obj)) { fmt_puts(b, "null"); return; } + if (RAY_IS_ERR(obj)) { + char code[8] = {0}; + memcpy(code, obj->sdata, obj->slen < 7 ? obj->slen : 7); + fmt_puts(b, "error: "); + fmt_puts(b, code); + return; + } + + int8_t type = obj->type; + if (type < 0) { + /* Typed null atom: null bit set → display as 0Nx */ + if (RAY_ATOM_IS_NULL(obj)) { + fmt_puts(b, null_literal(-type)); + return; + } + /* Atom: type is negated */ + switch (-type) { + case RAY_BOOL: fmt_bool(b, obj->b8); break; + case RAY_U8: fmt_u8(b, obj->u8); break; + + case RAY_I16: fmt_i16(b, obj->i16); break; + case RAY_I32: fmt_i32(b, obj->i32); break; + case RAY_I64: fmt_i64(b, obj->i64); break; + case RAY_F32: fmt_f32(b, (float)obj->f64); break; + case RAY_F64: fmt_f64(b, obj->f64); break; + case RAY_DATE: fmt_date(b, obj->i32); break; + case RAY_TIME: fmt_time(b, obj->i32); break; + case RAY_TIMESTAMP: fmt_timestamp(b, obj->i64); break; + case RAY_SYM: fmt_sym(b, obj->i64); break; + case RAY_STR: fmt_str_atom(b, obj, mode > 0); break; + case RAY_GUID: fmt_guid(b, obj->obj ? (const uint8_t*)ray_data(obj->obj) : (const uint8_t*)ray_data(obj)); break; + default: fmt_puts(b, "?"); break; + } + } else if (ray_is_vec(obj)) { + int limit = (mode == 1) ? g_row_width : -1; + fmt_vector(b, obj, limit); + } else if (type == RAY_LIST) { + fmt_list(b, obj, mode); + } else if (type == RAY_TABLE) { + fmt_table(b, obj, mode); + } else if (type == RAY_DICT) { + fmt_dict(b, obj, mode); + } else if (type == RAY_LAMBDA) { + fmt_puts(b, "lambda"); + } else if (type == RAY_UNARY || type == RAY_BINARY || type == RAY_VARY) { + /* Render function objects with angle brackets so a fn is + * visually distinct from a sym or string. Without them, + * `.os` printed as `{getenv:.os.getenv …}` — looked like + * a dict of sym self-references. Now it reads + * `{getenv:<.os.getenv> …}`. */ + const char* name = ray_fn_name(obj); + if (name[0]) { fmt_puts(b, "<"); fmt_puts(b, name); fmt_puts(b, ">"); } + else fmt_puts(b, type == RAY_UNARY ? "" : + type == RAY_BINARY ? "" : ""); + } else if (type == RAY_LAZY) { + ray_t* concrete = ray_lazy_materialize(obj); + fmt_obj(b, concrete, mode); + return; + } else { + fmt_printf(b, "<%s>", ray_type_name(type)); + } +} + +ray_t* ray_fmt(ray_t* obj, int mode) { + fmt_buf_t b; + fmt_init(&b); + fmt_obj(&b, obj, mode); + return fmt_to_str(&b); +} + +void ray_fmt_print(FILE* fp, ray_t* obj, int mode) { + ray_t* s = ray_fmt(obj, mode); + if (s) { + fwrite(ray_str_ptr(s), 1, ray_str_len(s), fp); + ray_release(s); + } +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/lang/format.h b/crates/rayforce-sys/vendor/rayforce/src/lang/format.h new file mode 100644 index 0000000..894aa5b --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/lang/format.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_LANG_FORMAT_H +#define RAY_LANG_FORMAT_H + +#include +#include + +#define FMT_TABLE_MAX_WIDTH 10 +#define FMT_TABLE_MAX_HEIGHT 20 +#define FMT_LIST_MAX_HEIGHT 50 +#define FMT_DEFAULT_ROW_WIDTH 80 +#define FMT_DEFAULT_PRECISION 2 + +/* Format a ray_t value into a new ray_t string (RAY_STR atom). + * mode: 0 = compact, 1 = full (REPL), 2 = show (no limits) */ +ray_t* ray_fmt(ray_t* obj, int mode); + +/* Format and write to FILE* */ +void ray_fmt_print(FILE* fp, ray_t* obj, int mode); + +/* Display settings */ +void ray_fmt_set_precision(int digits); +void ray_fmt_set_width(int cols); + +/* Type name string (e.g. RAY_I64 -> "i64") */ +const char* ray_type_name(int8_t type); + +#endif /* RAY_LANG_FORMAT_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/lang/internal.h b/crates/rayforce-sys/vendor/rayforce/src/lang/internal.h new file mode 100644 index 0000000..3b47b09 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/lang/internal.h @@ -0,0 +1,514 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** Shared helpers for eval.c split — included by arith.c, cmp.c, agg.c, etc. + * Small hot-path helpers are static inline; larger functions that remain in + * eval.c are declared extern. + */ + +#ifndef RAY_LANG_INTERNAL_H +#define RAY_LANG_INTERNAL_H + +#include "lang/eval.h" +#include "lang/format.h" +#include "core/types.h" +#include "mem/heap.h" +#include "table/sym.h" +#include +#include +#include + +/* ══════════════════════════════════════════ + * Atom constructors + * ══════════════════════════════════════════ */ + +static inline ray_t* make_i64(int64_t v) { + ray_t* obj = ray_alloc(0); + if (!obj) return ray_error("oom", NULL); + obj->type = -RAY_I64; + obj->i64 = v; + return obj; +} + +static inline ray_t* make_f64(double v) { + ray_t* obj = ray_alloc(0); + if (!obj) return ray_error("oom", NULL); + obj->type = -RAY_F64; + obj->f64 = v; + return obj; +} + +static inline ray_t* make_i16(int16_t v) { + return ray_i16(v); +} + +static inline ray_t* make_i32(int32_t v) { + return ray_i32(v); +} + +static inline ray_t* make_u8(uint8_t v) { + return ray_u8(v); +} + +static inline ray_t* make_bool(uint8_t v) { + ray_t* obj = ray_alloc(0); + if (!obj) return ray_error("oom", NULL); + obj->type = -RAY_BOOL; + obj->b8 = v; + return obj; +} + +/* ══════════════════════════════════════════ + * Type checks and numeric extraction + * ══════════════════════════════════════════ */ + +/* Helpers to extract numeric value as double */ +static inline int is_numeric(ray_t* x) { + return x->type == -RAY_I64 || x->type == -RAY_F64 || + x->type == -RAY_I16 || x->type == -RAY_I32 || + x->type == -RAY_U8 || x->type == -RAY_BOOL; +} + +/* Check if an atom is a temporal type */ +static inline int is_temporal(ray_t* x) { + return x->type == -RAY_DATE || x->type == -RAY_TIME || x->type == -RAY_TIMESTAMP; +} + +/* Convert temporal atom to nanoseconds for cross-temporal comparison. + * DATE = days since epoch -> ns, TIME = ms since midnight -> ns, TIMESTAMP = ns */ +static inline int64_t temporal_as_ns(ray_t* x) { + if (x->type == -RAY_TIMESTAMP) return x->i64; + if (x->type == -RAY_DATE) return (int64_t)x->i32 * 86400000000000LL; + if (x->type == -RAY_TIME) return (int64_t)x->i32 * 1000000LL; + return 0; +} + +/* Extract integer value from any integer atom as int64_t */ +static inline int64_t as_i64(ray_t* x) { + if (x->type == -RAY_I64) return x->i64; + if (x->type == -RAY_I32) return (int64_t)x->i32; + if (x->type == -RAY_I16) return (int64_t)x->i16; + if (x->type == -RAY_U8) return (int64_t)x->u8; + return x->i64; /* fallback */ +} + +static inline double as_f64(ray_t* x) { + if (x->type == -RAY_F64) return x->f64; + if (x->type == -RAY_I64) return (double)x->i64; + if (x->type == -RAY_I32) return (double)x->i32; + if (x->type == -RAY_I16) return (double)x->i16; + if (x->type == -RAY_U8) return (double)x->u8; + if (x->type == -RAY_STR && ray_str_len(x) == 1) return (double)(unsigned char)x->sdata[0]; + if (x->type == -RAY_BOOL) return (double)x->b8; + if (x->type == -RAY_DATE || x->type == -RAY_TIME) return (double)x->i32; + if (x->type == -RAY_TIMESTAMP) return (double)x->i64; + return (double)x->i64; +} + +static inline int is_float_op(ray_t* a, ray_t* b) { + return a->type == -RAY_F64 || b->type == -RAY_F64; +} + +/* ══════════════════════════════════════════ + * Null/type helpers + * ══════════════════════════════════════════ */ + +/* RAY_ATOM_IS_NULL and ray_typed_null are in rayforce.h */ + +/* Return a typed null for the promoted result type of two operands */ +static inline ray_t* null_for_promoted(ray_t* a, ray_t* b) { + if (a->type == -RAY_F64 || b->type == -RAY_F64) + return ray_typed_null(-RAY_F64); + if (a->type == -RAY_I64 || b->type == -RAY_I64) + return ray_typed_null(-RAY_I64); + if (a->type == -RAY_I32 || b->type == -RAY_I32) + return ray_typed_null(-RAY_I32); + if (a->type == -RAY_I16 || b->type == -RAY_I16) + return ray_typed_null(-RAY_I16); + return ray_typed_null(-RAY_I64); +} + +/* ══════════════════════════════════════════ + * Type promotion + * ══════════════════════════════════════════ */ + +/* Determine the promoted integer result type for two numeric operands. + * Returns atom type code (negative). */ +static inline int8_t promote_int_type(ray_t* a, ray_t* b) { + if (a->type == -RAY_I64 || b->type == -RAY_I64) return -RAY_I64; + if (a->type == -RAY_I32 || b->type == -RAY_I32) return -RAY_I32; + if (a->type == -RAY_U8 || b->type == -RAY_U8) { + /* u8 op u8 -> u8, but u8 op i16 -> i16 etc */ + if (a->type == -RAY_U8 && b->type == -RAY_U8) return -RAY_U8; + return (a->type == -RAY_I16 || b->type == -RAY_I16) ? -RAY_I16 : -RAY_I64; + } + if (a->type == -RAY_I16 || b->type == -RAY_I16) return -RAY_I16; + return -RAY_I64; +} + +/* Promote integer type following right-operand's type (K/q semantics for sub) */ +static inline int8_t promote_int_type_right(ray_t* a, ray_t* b) { + (void)a; + int8_t bt = b->type; + if (bt == -RAY_I32 || bt == -RAY_I16 || bt == -RAY_U8 || bt == -RAY_I64) + return bt; + int8_t at = a->type; + if (at == -RAY_I32 || at == -RAY_I16 || at == -RAY_U8 || at == -RAY_I64) + return at; + return -RAY_I64; +} + +/* Create a result atom of the given type from an int64_t value */ +static inline ray_t* make_typed_int(int8_t atom_type, int64_t val) { + switch (atom_type) { + case -RAY_I16: return make_i16((int16_t)val); + case -RAY_I32: return make_i32((int32_t)val); + case -RAY_U8: return make_u8((uint8_t)val); + default: return make_i64(val); + } +} + +/* ══════════════════════════════════════════ + * Type name helper + * ══════════════════════════════════════════ */ + +/* Removed: type_sym_name() — use ray_type_name() directly. + * Lowercase for atoms (negative type), uppercase for vectors (positive). */ + +/* ══════════════════════════════════════════ + * Truthiness + * ══════════════════════════════════════════ */ + +/* Logical -- coerce to truthiness (0/nil/false = falsy, else truthy). + * Null forms (RAY_NULL singleton and typed null atoms) are falsy. */ +static inline int is_truthy(ray_t* x) { + if (RAY_IS_NULL(x) || RAY_ATOM_IS_NULL(x)) return 0; + if (x->type == -RAY_BOOL) return x->b8; + if (x->type == -RAY_I64) return x->i64 != 0; + if (x->type == -RAY_F64) return x->f64 != 0.0; + return 1; /* non-null objects are truthy */ +} + +/* ══════════════════════════════════════════ + * Collection helpers + * ══════════════════════════════════════════ */ + +static inline int is_list(ray_t* x) { + return x && !RAY_IS_ERR(x) && x->type == RAY_LIST; +} + +/* Check if x is a collection: boxed list OR typed vector */ +static inline int is_collection(ray_t* x) { + return x && !RAY_IS_ERR(x) && (x->type == RAY_LIST || ray_is_vec(x)); +} + +/* Extract the i-th element of a collection as a ray_t* atom. + * For boxed lists, returns the stored pointer (no alloc). + * For typed vectors, allocates a new atom. Caller must release + * atoms obtained from typed vectors (allocated == 1). */ +static inline ray_t* collection_elem(ray_t* coll, int64_t i, int *allocated) { + if (coll->type == RAY_LIST) { + *allocated = 0; + return ((ray_t**)ray_data(coll))[i]; + } + *allocated = 1; + if (ray_vec_is_null(coll, i)) + return ray_typed_null(-coll->type); + void* d = ray_data(coll); + switch (coll->type) { + case RAY_I64: return ray_i64(((int64_t*)d)[i]); + case RAY_F64: return ray_f64(((double*)d)[i]); + case RAY_I32: return ray_i32(((int32_t*)d)[i]); + case RAY_I16: return ray_i16(((int16_t*)d)[i]); + case RAY_BOOL: return ray_bool(((bool*)d)[i]); + case RAY_SYM: return ray_sym(ray_read_sym(d, i, coll->type, coll->attrs)); + case RAY_U8: return ray_u8(((uint8_t*)d)[i]); + case RAY_DATE: return ray_date((int64_t)((int32_t*)d)[i]); + case RAY_TIME: return ray_time((int64_t)((int32_t*)d)[i]); + case RAY_TIMESTAMP: return ray_timestamp(((int64_t*)d)[i]); + case RAY_GUID: { + const uint8_t* gd = ((uint8_t*)d) + i * 16; + return ray_guid(gd); + } + /* RAY_CHAR removed -- char vectors no longer exist */ + case RAY_STR: { + size_t slen = 0; + const char* sp = ray_str_vec_get(coll, i, &slen); + return ray_str(sp ? sp : "", sp ? slen : 0); + } + default: *allocated = 0; return ray_error("type", NULL); + } +} + +/* Extract a value from an atom for storage, handling cross-type casting. + * Returns the value as int64_t (for integer/temporal types). */ +static inline int64_t elem_as_i64(ray_t* elem) { + if (elem->type == -RAY_I64 || elem->type == -RAY_TIMESTAMP || + elem->type == -RAY_DATE || elem->type == -RAY_TIME || + elem->type == -RAY_SYM) return elem->i64; + if (elem->type == -RAY_I32) return (int64_t)elem->i32; + if (elem->type == -RAY_I16) return (int64_t)elem->i16; + if (elem->type == -RAY_U8) return (int64_t)elem->u8; + if (elem->type == -RAY_F64) return (int64_t)elem->f64; + return elem->i64; +} + +/* Store a scalar result into a typed vector at position i. + * Returns 0 on success, -1 if the element type doesn't match. */ +static inline int store_typed_elem(ray_t* vec, int64_t i, ray_t* elem) { + if (RAY_ATOM_IS_NULL(elem)) { + int esz = ray_elem_size(vec->type); + memset((char*)ray_data(vec) + i * esz, 0, esz); + ray_vec_set_null(vec, i, true); + return 0; + } + switch (vec->type) { + case RAY_I64: ((int64_t*)ray_data(vec))[i] = elem_as_i64(elem); return 0; + case RAY_F64: ((double*)ray_data(vec))[i] = (elem->type == -RAY_F64) ? elem->f64 : (double)elem_as_i64(elem); return 0; + case RAY_I32: ((int32_t*)ray_data(vec))[i] = (int32_t)elem_as_i64(elem); return 0; + case RAY_I16: ((int16_t*)ray_data(vec))[i] = (int16_t)elem_as_i64(elem); return 0; + case RAY_BOOL: ((bool*)ray_data(vec))[i] = elem->b8; return 0; + case RAY_U8: ((uint8_t*)ray_data(vec))[i] = (uint8_t)elem_as_i64(elem); return 0; + case RAY_DATE: ((int32_t*)ray_data(vec))[i] = (int32_t)elem_as_i64(elem); return 0; + case RAY_TIME: ((int32_t*)ray_data(vec))[i] = (int32_t)elem_as_i64(elem); return 0; + case RAY_TIMESTAMP: ((int64_t*)ray_data(vec))[i] = elem_as_i64(elem); return 0; + case RAY_SYM: ray_write_sym(ray_data(vec), i, (uint64_t)elem->i64, vec->type, vec->attrs); return 0; + case RAY_GUID: if (elem->obj) memcpy(((uint8_t*)ray_data(vec)) + i * 16, ray_data(elem->obj), 16); return 0; + default: return -1; + } +} + +/* ══════════════════════════════════════════ + * Extern forward declarations — larger functions that stay in eval.c + * ══════════════════════════════════════════ */ + +ray_t* atomic_map_binary_op(ray_binary_fn fn, uint16_t dag_opcode, ray_t* left, ray_t* right); +ray_t* atomic_map_unary(ray_unary_fn fn, ray_t* arg); +ray_t* to_boxed_list(ray_t* x); +ray_t* unbox_vec_arg(ray_t* x, ray_t** _bx); +ray_t* call_lambda(ray_t* lambda, ray_t** call_args, int64_t argc); +ray_t* call_fn1(ray_t* fn, ray_t* arg); +ray_t* call_fn2(ray_t* fn, ray_t* a, ray_t* b); +ray_t* gather_by_idx(ray_t* vec, int64_t* idx, int64_t n); +ray_t* ray_sort(ray_t** cols, uint8_t* descs, uint8_t* nulls_first, + uint8_t n_cols, int64_t nrows); +int char_str_cmp(ray_t* a, ray_t* b, int *out); +int is_comparable(ray_t* x); + +/* Arithmetic builtins (formerly static in eval.c, now in arith.c) */ +ray_t* ray_round_fn(ray_t* x); +ray_t* ray_floor_fn(ray_t* x); +ray_t* ray_ceil_fn(ray_t* x); +ray_t* ray_abs_fn(ray_t* x); +ray_t* ray_sqrt_fn(ray_t* x); +ray_t* ray_log_fn(ray_t* x); +ray_t* ray_exp_fn(ray_t* x); + +/* Collection helpers (formerly static in eval.c, now in collection.c) */ +int atom_eq(ray_t* a, ray_t* b); +ray_t* list_to_typed_vec(ray_t* list, int8_t orig_vec_type); + +/* Collection builtins (formerly static in eval.c, now in collection.c) */ +ray_t* ray_map_fn(ray_t** args, int64_t n); +ray_t* ray_pmap_fn(ray_t** args, int64_t n); +ray_t* ray_fold_fn(ray_t** args, int64_t n); +ray_t* ray_scan_fn(ray_t** args, int64_t n); +ray_t* ray_filter_fn(ray_t* vec, ray_t* mask); +ray_t* ray_apply_fn(ray_t** args, int64_t n); +ray_t* ray_distinct_fn(ray_t* x); +ray_t* ray_in_fn(ray_t* val, ray_t* vec); +ray_t* ray_except_fn(ray_t* vec1, ray_t* vec2); +ray_t* ray_union_fn(ray_t* vec1, ray_t* vec2); +ray_t* ray_sect_fn(ray_t* vec1, ray_t* vec2); +ray_t* ray_take_fn(ray_t* vec, ray_t* n_obj); +ray_t* ray_at_fn(ray_t* vec, ray_t* idx); +ray_t* ray_find_fn(ray_t* vec, ray_t* val); +ray_t* ray_til_fn(ray_t* x); +ray_t* ray_reverse_fn(ray_t* x); +ray_t* ray_rand_fn(ray_t* a, ray_t* b); +ray_t* ray_bin_fn(ray_t* sorted, ray_t* val); +ray_t* ray_binr_fn(ray_t* sorted, ray_t* val); +ray_t* ray_map_left_fn(ray_t** args, int64_t n); +ray_t* ray_map_right_fn(ray_t** args, int64_t n); +ray_t* ray_fold_left_fn(ray_t** args, int64_t n); +ray_t* ray_fold_right_fn(ray_t** args, int64_t n); +ray_t* ray_scan_left_fn(ray_t** args, int64_t n); +ray_t* ray_scan_right_fn(ray_t** args, int64_t n); +ray_t* ray_enlist_fn(ray_t** args, int64_t n); + +/* String builtins (formerly static in eval.c, now in str_builtin.c) */ +ray_t* ray_split_fn(ray_t* str, ray_t* delim); +ray_t* ray_like_fn(ray_t* x, ray_t* pattern); +ray_t* ray_sym_name_fn(ray_t* x); + +/* Table builtins (formerly static in eval.c, now in table_builtin.c) */ +uint16_t pivot_fn_to_agg_op(ray_t* fn); +ray_t* ray_pivot_fn(ray_t** args, int64_t n); +ray_t* ray_modify_fn(ray_t** args, int64_t n); +ray_t* ray_alter_fn(ray_t** args, int64_t n); +ray_t* ray_del_fn(ray_t** args, int64_t n); +ray_t* ray_row_fn(ray_t* tbl, ray_t* idx); +ray_t* ray_union_all_fn(ray_t* t1, ray_t* t2); +ray_t* ray_table_distinct_fn(ray_t* tbl); +ray_t* ray_unify_fn(ray_t* a, ray_t* b); + +/* Concat (formerly static in eval.c, now extern for table_builtin.c) */ +ray_t* ray_concat_fn(ray_t* a, ray_t* b); + +/* Temporal builtins (formerly static in eval.c, now in temporal.c) */ +ray_t* ray_date_clock_fn(ray_t* arg); +ray_t* ray_time_clock_fn(ray_t* arg); +ray_t* ray_timestamp_clock_fn(ray_t* arg); + +/* Sort builtins (formerly static in eval.c, now in sort.c) */ +ray_t* ray_asc_fn(ray_t* x); +ray_t* ray_desc_fn(ray_t* x); +ray_t* ray_iasc_fn(ray_t* x); +ray_t* ray_idesc_fn(ray_t* x); +ray_t* ray_rank_fn(ray_t* x); +ray_t* sort_table_by_keys(ray_t* tbl, ray_t* keys, uint8_t descending); +ray_t* ray_xasc_fn(ray_t* tbl, ray_t* keys); +ray_t* ray_xdesc_fn(ray_t* tbl, ray_t* keys); +ray_t* ray_xrank_fn(ray_t* n_obj, ray_t* vec); + +/* Datalog builtins (formerly static in eval.c, now in datalog_builtin.c) */ +ray_t* ray_datoms_fn(ray_t** args, int64_t n); +ray_t* ray_assert_fact_fn(ray_t** args, int64_t n); +ray_t* ray_retract_fact_fn(ray_t** args, int64_t n); +ray_t* ray_scan_eav_fn(ray_t** args, int64_t n); +ray_t* ray_pull_fn(ray_t** args, int64_t n); +ray_t* ray_rule_fn(ray_t** args, int64_t n); +ray_t* ray_query_fn(ray_t** args, int64_t n); +ray_t* ray_dl_program_fn(ray_t** args, int64_t n); +ray_t* ray_dl_add_edb_fn(ray_t** args, int64_t n); +ray_t* ray_dl_stratify_fn(ray_t* x); +ray_t* ray_dl_eval_fn(ray_t* x); +ray_t* ray_dl_query_fn(ray_t* prog_obj, ray_t* pred_obj); +ray_t* ray_dl_provenance_fn(ray_t* prog_obj, ray_t* pred_obj); +void ray_dl_reset_rules(void); + +/* System builtins (formerly static in eval.c, now in system.c) */ +ray_t* ray_eval_builtin_fn(ray_t* x); +ray_t* ray_parse_builtin_fn(ray_t* x); +ray_t* ray_print_fn(ray_t** args, int64_t n); +ray_t* ray_meta_fn(ray_t* x); +ray_t* ray_gc_fn(ray_t** args, int64_t n); +ray_t* ray_system_fn(ray_t* x); +/* `.sys.cmd "name args"` — registry-dispatched system commands with + * shell fallback (see lang/syscmd.h). */ +ray_t* ray_syscmd_string_dispatch_fn(ray_t* x); +/* Direct typed entry points sharing the syscmd registry. timeit and + * env are variadic so they accept the zero-arg toggle/list shape. */ +ray_t* ray_sys_listen_fn(ray_t* x); +ray_t* ray_sys_timeit_fn(ray_t** args, int64_t n); +ray_t* ray_sys_env_fn(ray_t** args, int64_t n); +ray_t* ray_getenv_fn(ray_t* x); +/* Filesystem metadata under .os.* (issue #36). Lean two: size + + * directory-list. Existence/is-file/is-dir reachable via try on + * either of these, or via the shell fallback in .sys.cmd. */ +ray_t* ray_os_size_fn(ray_t* x); +ray_t* ray_os_list_fn(ray_t* x); +ray_t* ray_setenv_fn(ray_t* name, ray_t* val); +ray_t* ray_quote_fn(ray_t** args, int64_t n); +ray_t* ray_return_fn(ray_t* x); +ray_t* ray_args_fn(ray_t* x); +ray_t* ray_rc_fn(ray_t* x); +ray_t* ray_diverse_fn(ray_t* x); +ray_t* ray_get_fn(ray_t* dict, ray_t* key); +ray_t* ray_remove_fn(ray_t* dict, ray_t* key); +ray_t* ray_timer_fn(ray_t* x); +ray_t* ray_env_fn(ray_t* x); +ray_t* ray_internals_fn(ray_t** args, int64_t n); +ray_t* ray_memstat_fn(ray_t** args, int64_t n); +ray_t* ray_sysinfo_fn(ray_t** args, int64_t n); +ray_t* ray_ser_fn(ray_t* val); +ray_t* ray_de_fn(ray_t* val); +ray_t* ray_hopen_fn(ray_t* x); +ray_t* ray_hclose_fn(ray_t* x); +ray_t* ray_hsend_fn(ray_t* handle, ray_t* msg); +ray_t* ray_set_splayed_fn(ray_t** args, int64_t n); +ray_t* ray_get_splayed_fn(ray_t** args, int64_t n); +ray_t* ray_get_parted_fn(ray_t** args, int64_t n); +/* Bulk-load entry points: walk a root directory, find every splayed + * (resp. parted) child, bind it as a Rayfall global, return the + * resulting {name → table} dict. */ +ray_t* ray_db_splayed_mount_fn(ray_t** args, int64_t n); +ray_t* ray_db_parted_mount_fn(ray_t** args, int64_t n); +ray_t* ray_guid_fn(ray_t* n_arg); + +/* Transaction-log journaling (.log.*) — q's -l/-L feature. + * Implementations live in src/ops/journal.c; the on-disk machinery + * is src/store/journal.c. */ +ray_t* ray_log_open_fn(ray_t** args, int64_t n); +ray_t* ray_log_write_fn(ray_t* expr); +ray_t* ray_log_replay_fn(ray_t* path); +ray_t* ray_log_validate_fn(ray_t* path); +ray_t* ray_log_roll_fn(ray_t** args, int64_t n); +ray_t* ray_log_snapshot_fn(ray_t** args, int64_t n); +ray_t* ray_log_sync_fn(ray_t** args, int64_t n); +ray_t* ray_log_close_fn(ray_t** args, int64_t n); + +/* Group (formerly static in eval.c, now extern for query.c) */ +ray_t* ray_group_fn(ray_t* x); + +/* I/O and formatting builtins (formerly in eval.c, now in ops/builtins.c) */ +ray_t* ray_println_fn(ray_t** args, int64_t n); +ray_t* ray_show_fn(ray_t** args, int64_t n); +ray_t* ray_format_fn(ray_t** args, int64_t n); +ray_t* ray_resolve_fn(ray_t** args, int64_t n); +ray_t* ray_timeit_fn(ray_t** args, int64_t n); +ray_t* ray_exit_fn(ray_t* arg); +ray_t* ray_read_csv_fn(ray_t** args, int64_t n); +ray_t* ray_write_csv_fn(ray_t** args, int64_t n); +ray_t* ray_cast_fn(ray_t* type_sym, ray_t* val); +ray_t* ray_type_fn(ray_t* val); +ray_t* ray_read_file_fn(ray_t* path_obj); +ray_t* ray_load_file_fn(ray_t* path_obj); +ray_t* ray_write_file_fn(ray_t* path_obj, ray_t* content); + +/* Misc builtins (formerly in eval.c, now in ops/builtins.c) */ +ray_t* ray_dict_fn(ray_t* keys, ray_t* vals); +ray_t* ray_nil_fn(ray_t* x); +ray_t* ray_where_fn(ray_t* x); +ray_t* ray_raze_fn(ray_t* x); +ray_t* ray_within_fn(ray_t* vals, ray_t* range); +ray_t* ray_fdiv_fn(ray_t* a, ray_t* b); + +/* Query bridge builtins (formerly in eval.c, now in ops/query.c) */ +ray_t* ray_select_fn(ray_t** args, int64_t n); +ray_t* ray_update_fn(ray_t** args, int64_t n); +ray_t* ray_insert_fn(ray_t** args, int64_t n); +ray_t* ray_upsert_fn(ray_t** args, int64_t n); +ray_t* ray_xbar_fn(ray_t* col, ray_t* bucket); +ray_t* ray_left_join_fn(ray_t** args, int64_t n); +ray_t* ray_inner_join_fn(ray_t** args, int64_t n); +ray_t* ray_anti_join_fn(ray_t** args, int64_t n); +ray_t* ray_window_join_fn(ray_t** args, int64_t n); +ray_t* ray_asof_join_fn(ray_t** args, int64_t n); + +/* Convenience wrapper: atomic_map_binary with no DAG opcode */ +static inline ray_t* atomic_map_binary(ray_binary_fn fn, ray_t* left, ray_t* right) { + return atomic_map_binary_op(fn, 0, left, right); +} + +#endif /* RAY_LANG_INTERNAL_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/lang/nfo.c b/crates/rayforce-sys/vendor/rayforce/src/lang/nfo.c new file mode 100644 index 0000000..8169889 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/lang/nfo.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nfo.h" +#include + +ray_t* ray_nfo_create(const char* filename, size_t fname_len, + const char* source, size_t src_len) { + ray_t* fname = ray_str(filename, fname_len); + if (RAY_IS_ERR(fname)) return fname; + + ray_t* src = ray_str(source, src_len); + if (RAY_IS_ERR(src)) { ray_release(fname); return src; } + + ray_t* keys = ray_vec_new(RAY_I64, 0); + if (RAY_IS_ERR(keys)) { ray_release(fname); ray_release(src); return keys; } + + ray_t* vals = ray_vec_new(RAY_I64, 0); + if (RAY_IS_ERR(vals)) { ray_release(fname); ray_release(src); ray_release(keys); return vals; } + + /* Build the nfo list: alloc 4-slot list, set elements directly. */ + ray_t* nfo = ray_alloc(4 * sizeof(ray_t*)); + if (!nfo || RAY_IS_ERR(nfo)) { + ray_release(fname); ray_release(src); ray_release(keys); ray_release(vals); + return ray_error("oom", NULL); + } + nfo->type = RAY_LIST; + nfo->len = 4; + ray_t** elems = (ray_t**)ray_data(nfo); + elems[0] = fname; /* ownership transfers — no extra retain needed */ + elems[1] = src; + elems[2] = keys; + elems[3] = vals; + + return nfo; +} + +void ray_nfo_insert(ray_t* nfo, ray_t* node, ray_span_t span) { + int64_t key = (intptr_t)node; + int64_t val = span.id; + + ray_t* keys = NFO_KEYS(nfo); + ray_t* vals = NFO_VALS(nfo); + if (!keys || !vals) return; + + ray_t* new_keys = ray_vec_append(keys, &key); + ray_t* new_vals = ray_vec_append(vals, &val); + + /* If vec_append reallocated, update the nfo list slots directly. + * ray_vec_append already transferred ownership (the old pointer is + * invalid), so we just swap the slot pointer; no retain/release needed + * since the list already owns one ref from the original append. */ + ray_t** slots = (ray_t**)ray_data(nfo); + if (new_keys != keys) slots[2] = new_keys; + if (new_vals != vals) slots[3] = new_vals; +} + +ray_span_t ray_nfo_get(ray_t* nfo, ray_t* node) { + ray_span_t none = { .id = 0 }; + if (!nfo) return none; + + ray_t* keys = NFO_KEYS(nfo); + ray_t* vals = NFO_VALS(nfo); + if (!keys || !vals) return none; + + int64_t needle = (intptr_t)node; + int64_t n = ray_len(keys); + int64_t* kdata = (int64_t*)ray_data(keys); + int64_t* vdata = (int64_t*)ray_data(vals); + + for (int64_t i = 0; i < n; i++) { + if (kdata[i] == needle) { + ray_span_t span; + span.id = vdata[i]; + return span; + } + } + + return none; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/lang/nfo.h b/crates/rayforce-sys/vendor/rayforce/src/lang/nfo.h new file mode 100644 index 0000000..fe42050 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/lang/nfo.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_NFO_H +#define RAY_NFO_H + +#include + +/* ===== Source Span ===== */ + +/* 8-byte source location: packs start/end line+col into a single int64. + * id == 0 means "no span information available". */ +typedef union ray_span_t { + int64_t id; + struct { + uint16_t start_line; + uint16_t end_line; + uint16_t start_col; + uint16_t end_col; + }; +} ray_span_t; + +/* ===== Nfo Object ===== */ + +/* An nfo is a RAY_LIST with 4 elements: + * [0] filename (RAY_STR atom) + * [1] source (RAY_STR atom) + * [2] keys (RAY_I64 vector — intptr_t node pointers) + * [3] vals (RAY_I64 vector — span ids) + */ + +#define NFO_FILENAME(nfo) ray_list_get((nfo), 0) +#define NFO_SOURCE(nfo) ray_list_get((nfo), 1) +#define NFO_KEYS(nfo) ray_list_get((nfo), 2) +#define NFO_VALS(nfo) ray_list_get((nfo), 3) + +/* Create a new nfo object for the given source file. + * Returns a RAY_LIST or ray_error() on failure. */ +ray_t* ray_nfo_create(const char* filename, size_t fname_len, + const char* source, size_t src_len); + +/* Record the source span for an AST node. */ +void ray_nfo_insert(ray_t* nfo, ray_t* node, ray_span_t span); + +/* Look up the source span for an AST node. + * Returns a span with id==0 if not found. */ +ray_span_t ray_nfo_get(ray_t* nfo, ray_t* node); + +#endif /* RAY_NFO_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/lang/parse.c b/crates/rayforce-sys/vendor/rayforce/src/lang/parse.c new file mode 100644 index 0000000..213a685 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/lang/parse.c @@ -0,0 +1,881 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "lang/parse.h" +#include "lang/nfo.h" +#include "lang/env.h" +#include "core/numparse.h" +#include "table/sym.h" /* RAY_SYM_W64 */ +#include +#include +#include +#include + +/* ══════════════════════════════════════════ + * ASCII dispatch table (128 bytes) + * Single indexed read: PA(c) — zero branches. + * ══════════════════════════════════════════ */ + +#define PA_ERR 0 +#define PA_DIGIT 1 +#define PA_ALPHA 2 +#define PA_STRING 3 +#define PA_QUOTE 4 /* ' symbol prefix */ +#define PA_LPAREN 5 +#define PA_RPAREN 6 +#define PA_LBRACK 7 +#define PA_RBRACK 8 +#define PA_LBRACE 9 +#define PA_RBRACE 10 +#define PA_COLON 11 +#define PA_WS 12 +#define PA_END 13 +#define PA_MINUS 14 +#define PA_SEMI 15 /* ; comment */ + +static const char _PA[128] = +/* NUL \t \n */ + "\x0d\x00\x00\x00\x00\x00\x00\x00\x00\x0c\x0c\x00\x00\x0c\x00\x00" +/* */ + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" +/* SP ! " # $ % & ' ( ) * + , - . / */ + "\x0c\x02\x03\x02\x02\x02\x02\x04\x05\x06\x02\x02\x02\x0e\x02\x02" +/* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */ + "\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x0b\x0f\x02\x02\x02\x02" +/* @ A B C D E F G H I J K L M N O */ + "\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02" +/* P Q R S T U V W X Y Z [ \ ] ^ _ */ + "\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x07\x00\x08\x02\x02" +/* ` a b c d e f g h i j k l m n o */ + "\x00\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02" +/* p q r s t u v w x y z { | } ~ DEL */ + "\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x02\x09\x02\x0a\x02\x00"; + +#define PA(c) ((unsigned char)(c) < 128 ? (int)(unsigned char)_PA[(unsigned char)(c)] : PA_ERR) + +/* ══════════════════════════════════════════ + * Parser state + * ══════════════════════════════════════════ */ + +typedef struct { + const char *src; + const char *pos; + int32_t line; + int32_t col; + ray_t *nfo; +} ray_parser_t; + +static void advance(ray_parser_t *p, int32_t n) { + for (int32_t i = 0; i < n; i++) { + if (p->pos[i] == '\n') { p->line++; p->col = 0; } + else { p->col++; } + } + p->pos += n; +} + +/* Fixup line/col after raw p->pos advancement (scan consumed region). */ +static void fixup_pos(ray_parser_t *p, const char *old_pos) { + for (const char *c = old_pos; c < p->pos; c++) { + if (*c == '\n') { p->line++; p->col = 0; } + else { p->col++; } + } +} + +/* Record a span for node in the nfo object. */ +static void nfo_record(ray_parser_t *p, ray_t *node, + int32_t sl, int32_t sc) { + if (!p->nfo || RAY_IS_ERR(node)) return; + ray_span_t span; + span.start_line = (uint16_t)sl; + span.start_col = (uint16_t)sc; + span.end_line = (uint16_t)p->line; + span.end_col = (uint16_t)(p->col > 0 ? p->col - 1 : 0); + ray_nfo_insert(p->nfo, node, span); +} + +static void skip_ws_and_comments(ray_parser_t *p) { + for (;;) { + while (*p->pos == ' ' || *p->pos == '\t' || *p->pos == '\n' || *p->pos == '\r') { + if (*p->pos == '\n') { p->line++; p->col = 0; } + else { p->col++; } + p->pos++; + } + if (*p->pos == ';') { + while (*p->pos && *p->pos != '\n') { p->col++; p->pos++; } + continue; + } + break; + } +} + +/* Forward declarations */ +static ray_t* parse_expr(ray_parser_t *p); + +/* ── Date/time/timestamp helpers ── */ + +#include "lang/cal.h" + +#define PARSE_NSECS_IN_DAY ((int64_t)24 * 60 * 60 * 1000000000LL) + +/* Try to parse a time literal starting from 'start'. + * Returns the char past the end on success, NULL on failure. + * Writes the millisecond value into *ms_out, including sign. */ +static const char* try_parse_time(const char* start, int32_t *ms_out) { + const char* c = start; + int sign = 1; + if (*c == '-') { sign = -1; c++; } + + /* HH */ + if (!(c[0] >= '0' && c[0] <= '9' && c[1] >= '0' && c[1] <= '9')) return NULL; + int hh = (c[0] - '0') * 10 + (c[1] - '0'); c += 2; + if (*c != ':') return NULL; + c++; + + /* MM */ + if (!(c[0] >= '0' && c[0] <= '9' && c[1] >= '0' && c[1] <= '9')) return NULL; + int mm = (c[0] - '0') * 10 + (c[1] - '0'); c += 2; + if (*c != ':') return NULL; + c++; + + /* SS */ + if (!(c[0] >= '0' && c[0] <= '9' && c[1] >= '0' && c[1] <= '9')) return NULL; + int ss = (c[0] - '0') * 10 + (c[1] - '0'); c += 2; + + /* .mmm (milliseconds) */ + int ms = 0; + if (*c == '.') { + c++; + if (!(*c >= '0' && *c <= '9')) return NULL; + ms = (*c - '0'); c++; + if (*c >= '0' && *c <= '9') { ms = ms * 10 + (*c - '0'); c++; } + if (*c >= '0' && *c <= '9') { ms = ms * 10 + (*c - '0'); c++; } + } + + *ms_out = sign * (int32_t)((hh * 3600 + mm * 60 + ss) * 1000 + ms); + return c; +} + +/* ── Number parsing (with hex, nulls, typed suffixes, date/time/timestamp) ── */ +static ray_t* parse_number(ray_parser_t *p) { + const char *start = p->pos; + int is_neg = 0; + if (*p->pos == '-') { is_neg = 1; p->pos++; } + + /* Hex literal: 0x.. */ + if (p->pos[0] == '0' && p->pos[1] == 'x') { + p->pos += 2; + uint64_t v; + size_t n = ray_parse_u64_hex(p->pos, SIZE_MAX, &v); + if (n == 0) return ray_error("parse", NULL); + p->pos += n; + return ray_u8((uint8_t)v); + } + + /* Null literal: 0N{h,i,d,t,p,l,f,s} or bare 0N (defaults to i64 null). */ + if (!is_neg && p->pos[0] == '0' && p->pos[1] == 'N') { + switch (p->pos[2]) { + case 'h': p->pos += 3; return ray_typed_null(-RAY_I16); + case 'i': p->pos += 3; return ray_typed_null(-RAY_I32); + case 'd': p->pos += 3; return ray_typed_null(-RAY_DATE); + case 't': p->pos += 3; return ray_typed_null(-RAY_TIME); + case 'p': p->pos += 3; return ray_typed_null(-RAY_TIMESTAMP); + case 'l': p->pos += 3; return ray_typed_null(-RAY_I64); + case 'f': p->pos += 3; return ray_typed_null(-RAY_F64); + case 's': p->pos += 3; return ray_typed_null(-RAY_SYM); + } + /* Bare 0N: only if the next char is not an identifier continuation + * (letter/digit/underscore), else fall through to plain number. */ + char c2 = p->pos[2]; + if (!((c2 >= 'a' && c2 <= 'z') || (c2 >= 'A' && c2 <= 'Z') || + (c2 >= '0' && c2 <= '9') || c2 == '_')) { + p->pos += 2; + return ray_typed_null(-RAY_I64); + } + } + + /* Scan digits */ + const char *dstart = p->pos; + while (*p->pos >= '0' && *p->pos <= '9') p->pos++; + int ndigits = (int)(p->pos - dstart); + + /* Date/Timestamp: YYYY.MM.DD or YYYY.MM.DDDhh:mm:ss.nnnnnnnnn */ + if (ndigits == 4 && !is_neg && *p->pos == '.' && + p->pos[1] >= '0' && p->pos[1] <= '9' && + p->pos[2] >= '0' && p->pos[2] <= '9' && + p->pos[3] == '.') { + int year = (int)ray_parse_4_digits(dstart); + p->pos++; /* skip first '.' */ + int month = (p->pos[0] - '0') * 10 + (p->pos[1] - '0'); + p->pos += 2; + if (*p->pos != '.') { p->pos = start; goto plain_number; } + p->pos++; /* skip second '.' */ + if (!(p->pos[0] >= '0' && p->pos[0] <= '9' && + p->pos[1] >= '0' && p->pos[1] <= '9')) { + p->pos = start; goto plain_number; + } + int day = (p->pos[0] - '0') * 10 + (p->pos[1] - '0'); + p->pos += 2; + + int32_t days = ymd_to_date(year, month, day); + + /* Check for timestamp separator 'D' */ + if (*p->pos == 'D') { + p->pos++; /* skip D */ + /* Parse HH:MM:SS.nnnnnnnnn */ + if (!(p->pos[0] >= '0' && p->pos[0] <= '9' && + p->pos[1] >= '0' && p->pos[1] <= '9')) + return ray_error("parse", NULL); + int hh = (p->pos[0] - '0') * 10 + (p->pos[1] - '0'); p->pos += 2; + if (*p->pos != ':') return ray_error("parse", NULL); + p->pos++; + int mi = (p->pos[0] - '0') * 10 + (p->pos[1] - '0'); p->pos += 2; + if (*p->pos != ':') return ray_error("parse", NULL); + p->pos++; + int ss = (p->pos[0] - '0') * 10 + (p->pos[1] - '0'); p->pos += 2; + if (*p->pos != '.') return ray_error("parse", NULL); + p->pos++; + /* Parse fractional seconds (up to 9 digits for nanoseconds) */ + const char* fstart = p->pos; + while (*p->pos >= '0' && *p->pos <= '9') p->pos++; + int flen = (int)(p->pos - fstart); + uint64_t nanos = 0; + for (int i = 0; i < flen && i < 9; i++) + nanos = nanos * 10 + (uint64_t)(fstart[i] - '0'); + /* Pad to 9 digits */ + for (int i = flen; i < 9; i++) nanos *= 10; + + int64_t day_ns = (int64_t)days * PARSE_NSECS_IN_DAY; + int64_t time_ns = ((int64_t)hh * 3600 + mi * 60 + ss) * 1000000000LL + (int64_t)nanos; + return ray_timestamp(day_ns + time_ns); + } + + return ray_date(days); + } + + /* Time literal: HH:MM:SS.mmm (detected by colon after 2 digits from digit-start) */ + if (ndigits == 2 && *p->pos == ':') { + p->pos = start; /* reset — let try_parse_time handle sign */ + int32_t ms; + const char* end = try_parse_time(start, &ms); + if (end) { p->pos = end; return ray_time(ms); } + /* Not a valid time — fall through to regular number parsing */ + p->pos = start; + if (is_neg) p->pos++; + while (*p->pos >= '0' && *p->pos <= '9') p->pos++; + } + +plain_number:; + /* At this point p->pos is past the digits. Check for float */ + int is_float = 0; + if (*p->pos == '.' && p->pos[1] >= '0' && p->pos[1] <= '9') { + is_float = 1; + p->pos++; + while (*p->pos >= '0' && *p->pos <= '9') p->pos++; + } + if (*p->pos == 'e' || *p->pos == 'E') { + is_float = 1; + p->pos++; + if (*p->pos == '+' || *p->pos == '-') p->pos++; + while (*p->pos >= '0' && *p->pos <= '9') p->pos++; + } + + size_t span = (size_t)(p->pos - start); + + if (is_float) { + double v = 0.0; + if (ray_parse_f64(start, span, &v) == 0) + return ray_error("parse", NULL); + return ray_f64(v); + } + + /* Integer parse — overflow signalled by `n == 0` (digits present but + * value didn't fit int64). Promote to f64 in that case, matching the + * historical strtoll/ERANGE → strtod behavior. */ + int64_t v = 0; + size_t n = ray_parse_i64(start, span, &v); + if (n == 0) { + double fv = 0.0; + if (ray_parse_f64(start, span, &fv) == 0) + return ray_error("parse", NULL); + return ray_f64(fv); + } + + /* Type suffix: h (i16), i (i32) */ + if (*p->pos == 'h') { + p->pos++; + if (v < -32767 || v > 32767) return ray_error("domain", NULL); + return ray_i16((int16_t)v); + } + if (*p->pos == 'i') { + p->pos++; + if (v < -2147483647LL || v > 2147483647LL) return ray_error("domain", NULL); + return ray_i32((int32_t)v); + } + + return ray_i64(v); +} + +/* ── String parsing with escape sequence decoding ── */ +static ray_t* parse_string(ray_parser_t *p) { + p->pos++; /* skip opening " */ + const char *start = p->pos; + + /* First pass: scan for closing " and check for escapes */ + bool has_escape = false; + const char *scan = p->pos; + while (*scan && *scan != '"') { + if (*scan == '\\' && scan[1]) { has_escape = true; scan++; } + scan++; + } + size_t raw_len = (size_t)(scan - start); + if (*scan != '"') return ray_error("parse", NULL); /* unterminated string */ + scan++; + p->pos = scan; + + if (!has_escape) return ray_str(start, raw_len); + + /* Decode escape sequences into a temporary buffer */ + char buf[4096]; + size_t out = 0; + const char *r = start; + const char *end = start + raw_len; + while (r < end) { + if (out >= sizeof(buf) - 2) + return ray_error("domain", NULL); /* string too long for escape buffer */ + if (*r == '\\' && r + 1 < end) { + r++; + switch (*r) { + case 'n': buf[out++] = '\n'; r++; break; + case 't': buf[out++] = '\t'; r++; break; + case 'r': buf[out++] = '\r'; r++; break; + case '\\': buf[out++] = '\\'; r++; break; + case '"': buf[out++] = '"'; r++; break; + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': { + /* Octal escape: \OOO (1-3 digits) */ + char ch = (char)(*r - '0'); r++; + if (r < end && *r >= '0' && *r <= '7') { + ch = (char)((ch << 3) | (*r - '0')); r++; + if (r < end && *r >= '0' && *r <= '7') { + ch = (char)((ch << 3) | (*r - '0')); r++; + } + } + buf[out++] = ch; + break; + } + default: buf[out++] = '\\'; buf[out++] = *r; r++; break; + } + } else { + buf[out++] = *r++; + } + } + return ray_str(buf, out); +} + +/* ── Symbol/char parsing: 'name or 'a' ── */ +static ray_t* parse_symbol(ray_parser_t *p) { + p->pos++; /* skip ' */ + const char *start = p->pos; + + /* Empty symbol (bare tick at end or before terminator) */ + if (*p->pos == 0 || *p->pos == ' ' || *p->pos == '\t' || *p->pos == '\n' || + *p->pos == ')' || *p->pos == ']' || *p->pos == '}') { + /* Null symbol 0Ns */ + return ray_typed_null(-RAY_SYM); + } + + /* Char literal: 'X' or '\n' etc. */ + if (*p->pos == '\\') { + /* Escape sequence char literal */ + const char *esc = p->pos + 1; + char ch; + int esc_len = 1; + switch (*esc) { + case 'n': ch = '\n'; break; + case 'r': ch = '\r'; break; + case 't': ch = '\t'; break; + case '\\': ch = '\\'; break; + case '\'': ch = '\''; break; + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': { + /* Octal escape: \OOO */ + ch = (char)(*esc - '0'); + if (esc[1] >= '0' && esc[1] <= '7') { + ch = (char)((ch << 3) | (esc[1] - '0')); + if (esc[2] >= '0' && esc[2] <= '7') { + ch = (char)((ch << 3) | (esc[2] - '0')); + esc_len = 3; + } else { + esc_len = 2; + } + } + break; + } + default: ch = *esc; break; + } + if (esc[esc_len] == '\'') { + /* Closing quote found — it's a char literal */ + p->pos = esc + esc_len + 1; + return ray_str(&ch, 1); + } + /* Not a char literal — fall through to symbol parsing */ + } else if (start[1] == '\'') { + /* Simple char literal like 'a' */ + char ch = *start; + p->pos = start + 2; /* skip char + closing quote */ + return ray_str(&ch, 1); + } + + /* Regular symbol */ + while (PA(*p->pos) == PA_ALPHA || PA(*p->pos) == PA_DIGIT || *p->pos == '_' || *p->pos == '.') + p->pos++; + size_t len = (size_t)(p->pos - start); + if (len == 0) return ray_typed_null(-RAY_SYM); /* empty symbol */ + int64_t id = ray_sym_intern(start, len); + return ray_sym(id); +} + +/* ── Name parsing ── */ +static ray_t* parse_name(ray_parser_t *p) { + const char *start = p->pos; + /* Name chars: alpha, digit, _, ., -, !, ?, +, *, /, %, <, >, =, & */ + while (PA(*p->pos) == PA_ALPHA || PA(*p->pos) == PA_DIGIT + || *p->pos == '_' || *p->pos == '.' || *p->pos == '-' + || *p->pos == '!' || *p->pos == '?' || *p->pos == '+' + || *p->pos == '*' || *p->pos == '/' || *p->pos == '%' + || *p->pos == '<' || *p->pos == '>' || *p->pos == '=' + || *p->pos == '&' || *p->pos == '|') + p->pos++; + size_t len = (size_t)(p->pos - start); + if (len == 0) return ray_error("parse", NULL); + + /* Check for true/false */ + if (len == 4 && memcmp(start, "true", 4) == 0) return ray_bool(true); + if (len == 5 && memcmp(start, "false", 5) == 0) return ray_bool(false); + /* null is handled as a name that resolves to NULL at eval time */ + + /* Return as name symbol (with RAY_ATTR_NAME flag) */ + int64_t id = ray_sym_intern(start, len); + ray_t* s = ray_sym(id); + if (!RAY_IS_ERR(s)) s->attrs |= RAY_ATTR_NAME; + return s; +} + +/* ── Vector literal: [1 2 3] ── */ +static ray_t* parse_vector(ray_parser_t *p) { + advance(p, 1); /* skip [ */ + + /* Collect parsed elements into a temporary array */ + ray_t* elems[4096]; + int32_t count = 0; + + skip_ws_and_comments(p); + while (*p->pos && *p->pos != ']') { + if (count >= 4096) { + for (int32_t i = 0; i < count; i++) ray_release(elems[i]); + return ray_error("limit", NULL); + } + ray_t* elem = parse_expr(p); + if (RAY_IS_ERR(elem)) { + for (int32_t i = 0; i < count; i++) ray_release(elems[i]); + return elem; + } + elems[count++] = elem; + skip_ws_and_comments(p); + } + if (*p->pos != ']') { + for (int32_t i = 0; i < count; i++) ray_release(elems[i]); + return ray_error("parse", NULL); + } + advance(p, 1); /* skip ] */ + + if (count == 0) { + /* Empty vector -> empty i64 vector */ + return ray_vec_new(RAY_I64, 0); + } + + /* Determine element types. + * Name references (RAY_ATTR_NAME) must stay as boxed atoms because + * the evaluator, compiler, and fn-builder dereference them as ray_t*. */ + int8_t first_type = elems[0]->type; + bool homogeneous = true; + bool has_float = (first_type == -RAY_F64); + bool has_int = (first_type == -RAY_I64); + bool all_numeric = (first_type == -RAY_I64 || first_type == -RAY_F64); + + for (int32_t i = 0; i < count; i++) { + /* Inside [...], names are symbol literals, not variable references */ + if (elems[i]->attrs & RAY_ATTR_NAME) { + elems[i]->attrs &= ~RAY_ATTR_NAME; + /* type is already -RAY_SYM from parse_expr */ + } + if (i == 0) continue; + int8_t t = elems[i]->type; + if (t != first_type) homogeneous = false; + if (t == -RAY_F64) has_float = true; + else if (t == -RAY_I64) has_int = true; + if (t != -RAY_I64 && t != -RAY_F64) all_numeric = false; + } + + /* All same atom type -> typed vector */ + if (homogeneous && first_type < 0) { + int8_t vec_type = -first_type; + ray_t* vec = ray_vec_new(vec_type, count); + if (RAY_IS_ERR(vec)) { + for (int32_t i = 0; i < count; i++) ray_release(elems[i]); + return vec; + } + switch (vec_type) { + case RAY_I64: case RAY_TIMESTAMP: { + int64_t* d = (int64_t*)ray_data(vec); + for (int32_t i = 0; i < count; i++) d[i] = elems[i]->i64; + break; + } + case RAY_F64: { + double* d = (double*)ray_data(vec); + for (int32_t i = 0; i < count; i++) d[i] = elems[i]->f64; + break; + } + case RAY_I32: case RAY_DATE: case RAY_TIME: { + int32_t* d = (int32_t*)ray_data(vec); + for (int32_t i = 0; i < count; i++) d[i] = elems[i]->i32; + break; + } + case RAY_I16: { + int16_t* d = (int16_t*)ray_data(vec); + for (int32_t i = 0; i < count; i++) d[i] = elems[i]->i16; + break; + } + case RAY_BOOL: { + bool* d = (bool*)ray_data(vec); + for (int32_t i = 0; i < count; i++) d[i] = elems[i]->b8; + break; + } + case RAY_SYM: { + int64_t* d = (int64_t*)ray_data(vec); + for (int32_t i = 0; i < count; i++) d[i] = elems[i]->i64; + break; + } + case RAY_U8: { + uint8_t* d = (uint8_t*)ray_data(vec); + for (int32_t i = 0; i < count; i++) d[i] = elems[i]->u8; + break; + } + case RAY_STR: { + /* String vectors use ray_str_vec_append */ + ray_t* svec = ray_vec_new(RAY_STR, count); + if (RAY_IS_ERR(svec)) { + ray_free(vec); + for (int32_t i = 0; i < count; i++) ray_release(elems[i]); + return svec; + } + for (int32_t i = 0; i < count; i++) { + const char* s = ray_str_ptr(elems[i]); + size_t slen = ray_str_len(elems[i]); + svec = ray_str_vec_append(svec, s, slen); + if (RAY_IS_ERR(svec)) { + for (int32_t j = 0; j < count; j++) ray_release(elems[j]); + ray_free(vec); + return svec; + } + } + ray_free(vec); + for (int32_t i = 0; i < count; i++) ray_release(elems[i]); + return svec; + } + default: ray_free(vec); goto boxed_list; + } + vec->len = count; + for (int32_t i = 0; i < count; i++) { + if (RAY_ATOM_IS_NULL(elems[i])) + ray_vec_set_null(vec, i, true); + ray_release(elems[i]); + } + return vec; + } + + /* Mixed int/float -> promote to f64 */ + if (has_float && has_int && all_numeric) { + ray_t* vec = ray_vec_new(RAY_F64, count); + if (RAY_IS_ERR(vec)) { + for (int32_t i = 0; i < count; i++) ray_release(elems[i]); + return vec; + } + double* d = (double*)ray_data(vec); + for (int32_t i = 0; i < count; i++) { + d[i] = (elems[i]->type == -RAY_F64) ? elems[i]->f64 + : (double)elems[i]->i64; + } + vec->len = count; + for (int32_t i = 0; i < count; i++) { + if (RAY_ATOM_IS_NULL(elems[i])) + ray_vec_set_null(vec, i, true); + ray_release(elems[i]); + } + return vec; + } + +boxed_list: + /* Mixed types in vector literal — domain error */ + for (int32_t i = 0; i < count; i++) ray_release(elems[i]); + return ray_error("domain", NULL); +} + +/* ── Dict literal: {key: val key: val ...} ── + * + * Builds a RAY_DICT block holding [keys, vals]. + * Keys are emitted as a RAY_SYM vector when every key is a bareword sym + * literal, as a RAY_STR vector when every key is a quoted string literal, + * or as a heterogeneous RAY_LIST otherwise. Values stay unevaluated in + * a RAY_LIST so dict literals remain self-evaluating (the (dict ...) + * builtin evaluates them on demand). + */ +static ray_t* parse_dict(ray_parser_t *p) { + advance(p, 1); /* skip { */ + + /* Build keys+vals as a generic RAY_LIST of atoms first; then narrow + * keys to a typed vector if homogeneous. 16 entries cover every + * realistic dict literal — heterogeneous spillover stays as LIST. */ + ray_t* key_list = ray_list_new(8); + if (RAY_IS_ERR(key_list)) return key_list; + ray_t* vals = ray_list_new(8); + if (RAY_IS_ERR(vals)) { ray_release(key_list); return vals; } + + bool all_sym = true; + bool all_str = true; + + skip_ws_and_comments(p); + while (*p->pos && *p->pos != '}') { + ray_t* key_atom = NULL; + if (*p->pos == '"') { + const char *sk_before = p->pos; + key_atom = parse_string(p); + fixup_pos(p, sk_before); + if (RAY_IS_ERR(key_atom)) { ray_release(key_list); ray_release(vals); return key_atom; } + all_sym = false; + } else { + const char *kstart = p->pos; + while (PA(*p->pos) == PA_ALPHA || PA(*p->pos) == PA_DIGIT + || *p->pos == '_' || *p->pos == '-') + p->pos++; + p->col += (int32_t)(p->pos - kstart); + size_t klen = (size_t)(p->pos - kstart); + if (klen == 0) { ray_release(key_list); ray_release(vals); return ray_error("parse", NULL); } + int64_t kid = ray_sym_intern(kstart, klen); + key_atom = ray_sym(kid); + if (RAY_IS_ERR(key_atom)) { ray_release(key_list); ray_release(vals); return key_atom; } + all_str = false; + } + + skip_ws_and_comments(p); + if (*p->pos != ':') { ray_release(key_atom); ray_release(key_list); ray_release(vals); return ray_error("parse", NULL); } + advance(p, 1); + skip_ws_and_comments(p); + + ray_t* val = parse_expr(p); + if (RAY_IS_ERR(val)) { ray_release(key_atom); ray_release(key_list); ray_release(vals); return val; } + + key_list = ray_list_append(key_list, key_atom); + ray_release(key_atom); + if (RAY_IS_ERR(key_list)) { ray_release(vals); ray_release(val); return key_list; } + + vals = ray_list_append(vals, val); + ray_release(val); + if (RAY_IS_ERR(vals)) { ray_release(key_list); return vals; } + + skip_ws_and_comments(p); + } + if (*p->pos != '}') { ray_release(key_list); ray_release(vals); return ray_error("parse", NULL); } + advance(p, 1); /* skip } */ + + /* Narrow keys to a typed vector when homogeneous. */ + int64_t n_pairs = key_list->len; + ray_t** key_atoms = (ray_t**)ray_data(key_list); + ray_t* keys; + if (n_pairs > 0 && all_sym) { + keys = ray_sym_vec_new(RAY_SYM_W64, n_pairs); + if (RAY_IS_ERR(keys)) { ray_release(key_list); ray_release(vals); return keys; } + for (int64_t i = 0; i < n_pairs; i++) { + int64_t id = key_atoms[i]->i64; + keys = ray_vec_append(keys, &id); + if (RAY_IS_ERR(keys)) { ray_release(key_list); ray_release(vals); return keys; } + } + ray_release(key_list); + } else if (n_pairs > 0 && all_str) { + keys = ray_vec_new(RAY_STR, n_pairs); + if (RAY_IS_ERR(keys)) { ray_release(key_list); ray_release(vals); return keys; } + for (int64_t i = 0; i < n_pairs; i++) { + keys = ray_str_vec_append(keys, ray_str_ptr(key_atoms[i]), ray_str_len(key_atoms[i])); + if (RAY_IS_ERR(keys)) { ray_release(key_list); ray_release(vals); return keys; } + } + ray_release(key_list); + } else { + keys = key_list; /* heterogeneous or empty — use the LIST as-is */ + } + return ray_dict_new(keys, vals); +} + +/* ── List (s-expression): (fn arg1 arg2 ...) ── */ +static ray_t* parse_list(ray_parser_t *p) { + advance(p, 1); /* skip ( */ + ray_t* list = ray_list_new(4); + if (RAY_IS_ERR(list)) return list; + + skip_ws_and_comments(p); + while (*p->pos && *p->pos != ')') { + ray_t* elem = parse_expr(p); + if (RAY_IS_ERR(elem)) { ray_release(list); return elem; } + list = ray_list_append(list, elem); + ray_release(elem); + if (RAY_IS_ERR(list)) return list; + skip_ws_and_comments(p); + } + if (*p->pos != ')') { ray_release(list); return ray_error("parse", NULL); } + advance(p, 1); /* skip ) */ + return list; +} + +/* ── Main expression dispatch ── */ +static ray_t* parse_expr(ray_parser_t *p) { + skip_ws_and_comments(p); + + int32_t sl = p->line, sc = p->col; + const char *before = p->pos; + ray_t *result; + + switch (PA(*p->pos)) { + case PA_END: return ray_error("parse", NULL); + case PA_DIGIT: result = parse_number(p); break; + case PA_MINUS: + if (p->pos[1] >= '0' && p->pos[1] <= '9') + result = parse_number(p); + else + result = parse_name(p); /* standalone '-' or '-name' */ + break; + case PA_ALPHA: result = parse_name(p); break; + case PA_STRING: result = parse_string(p); break; + case PA_QUOTE: result = parse_symbol(p); break; + case PA_LPAREN: result = parse_list(p); break; + case PA_LBRACK: result = parse_vector(p); break; + case PA_LBRACE: result = parse_dict(p); break; + case PA_RPAREN: return ray_error("parse", NULL); + case PA_RBRACK: return ray_error("parse", NULL); + case PA_RBRACE: return ray_error("parse", NULL); + case PA_COLON: { + /* Keyword literal :name — parse as symbol (like 'name) */ + p->pos++; /* skip : */ + const char *kstart = p->pos; + while (PA(*p->pos) == PA_ALPHA || PA(*p->pos) == PA_DIGIT + || *p->pos == '_' || *p->pos == '.' || *p->pos == '-' + || *p->pos == '/' || *p->pos == '?') + p->pos++; + size_t klen = (size_t)(p->pos - kstart); + if (klen == 0) { result = ray_error("parse", "empty keyword"); break; } + int64_t kid = ray_sym_intern(kstart, klen); + result = ray_sym(kid); + break; + } + default: result = parse_name(p); break; /* operators like +, *, etc. */ + } + + /* Fixup line/col: leaf parsers advance pos without updating line/col. + * Compound parsers (list/vector/dict) use advance() internally and + * call skip_ws_and_comments, so their line/col is already accurate. */ + if (PA(*before) != PA_LPAREN && PA(*before) != PA_LBRACK && PA(*before) != PA_LBRACE) + fixup_pos(p, before); + nfo_record(p, result, sl, sc); + return result; +} + +/* ── Internal parse driver (shared by public APIs) ── */ +static ray_t* parse_source(ray_parser_t *p) { + ray_t* first = parse_expr(p); + if (RAY_IS_ERR(first)) return first; + + /* Check if there are more expressions after the first */ + skip_ws_and_comments(p); + if (*p->pos == '\0') return first; /* single expression */ + + /* Multiple expressions: collect into (do expr1 expr2 ...) */ + ray_t* exprs[256]; + int32_t count = 0; + exprs[count++] = first; + + while (*p->pos) { + if (count >= 256) { + for (int32_t i = 0; i < count; i++) ray_release(exprs[i]); + return ray_error("domain", NULL); /* too many top-level expressions */ + } + ray_t* expr = parse_expr(p); + if (RAY_IS_ERR(expr)) { + for (int32_t i = 0; i < count; i++) ray_release(exprs[i]); + return expr; + } + exprs[count++] = expr; + skip_ws_and_comments(p); + } + + /* Build (do expr1 expr2 ...) list */ + int32_t sl = p->line, sc = p->col; + ray_t* do_list = ray_alloc((count + 1) * sizeof(ray_t*)); + if (!do_list) { + for (int32_t i = 0; i < count; i++) ray_release(exprs[i]); + return ray_error("oom", NULL); + } + do_list->type = RAY_LIST; + do_list->len = 0; + ray_t** elems = (ray_t**)ray_data(do_list); + /* Build a name-reference atom for "do" so parsing is independent of runtime */ + ray_t* do_sym = ray_alloc(0); + if (!do_sym) { + ray_release(do_list); + for (int32_t i = 0; i < count; i++) ray_release(exprs[i]); + return ray_error("oom", NULL); + } + do_sym->type = -RAY_SYM; + do_sym->attrs = RAY_ATTR_NAME; + do_sym->i64 = ray_sym_intern("do", 2); + elems[0] = do_sym; + for (int32_t i = 0; i < count; i++) + elems[i + 1] = exprs[i]; + do_list->len = count + 1; + nfo_record(p, do_list, sl, sc); + return do_list; +} + +/* ── Public API ── */ +ray_t* ray_parse(const char* source) { + return ray_parse_with_nfo(source, NULL); +} + +ray_t* ray_parse_with_nfo(const char* source, ray_t* nfo) { + if (!source) return ray_error("parse", NULL); + ray_parser_t p = { + .src = source, + .pos = source, + .line = 0, + .col = 0, + .nfo = nfo + }; + return parse_source(&p); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/lang/parse.h b/crates/rayforce-sys/vendor/rayforce/src/lang/parse.h new file mode 100644 index 0000000..ea08375 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/lang/parse.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_PARSE_H +#define RAY_PARSE_H + +#include + +/* Parse a Rayfall source string into a ray_t object tree. + * Returns a single expression, or a list of expressions if the + * source contains multiple top-level forms. */ +ray_t* ray_parse(const char* source); + +/* Parse with source-location tracking. If nfo is non-NULL every AST + * node produced by the parser will have its span recorded in the nfo + * object (created via ray_nfo_create in lang/nfo.h). */ +ray_t* ray_parse_with_nfo(const char* source, ray_t* nfo); + +#endif /* RAY_PARSE_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/lang/syscmd.c b/crates/rayforce-sys/vendor/rayforce/src/lang/syscmd.c new file mode 100644 index 0000000..4b97909 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/lang/syscmd.c @@ -0,0 +1,359 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "lang/syscmd.h" +/* Avoid both lang/internal.h and core/runtime.h here: they each + * transitively pull a different struct definition for `ray_vm_t` + * (lang/eval.h vs core/runtime.h) and we don't need the VM internals + * — only the runtime accessors and the lang-side builtins. The + * runtime exposes its main poll via opaque-pointer accessors + * declared inline below. */ +#include "core/poll.h" +#include "core/ipc.h" +#include "core/profile.h" +#include "lang/env.h" +#include "table/sym.h" + +/* Forward decls of the bare runtime accessors — these are defined in + * core/runtime.c. Pulling the full runtime.h would re-trigger the + * dual ray_vm_t typedef; one extern keeps us decoupled. */ +void* ray_runtime_get_poll(void); + +#include +#include +#include +#include + +/* Public header reaches us through syscmd.h. These few helpers were + * previously sourced from lang/internal.h; pulling them in directly + * keeps this TU clear of the runtime/eval VM clash. */ +static inline int ray_is_atom_local(ray_t* x) { return x && !RAY_IS_ERR(x) && x->type < 0; } + +/* ══════════════════════════════════════════ + * Argument parsing helpers — handlers receive a Rayfall ray_t* but + * the .sys.cmd / REPL paths arrive with a raw char slice. These + * helpers coerce both into the typed value the handler wants, + * keeping the per-handler code free of input-shape branching. + * ══════════════════════════════════════════ */ + +static int arg_is_null(const ray_t* arg) { + return !arg || RAY_IS_NULL(arg); +} + +/* Parse signed decimal int64 out of a ray_t (atom or string). Returns + * 0 + sets *err=1 if the arg can't be coerced. */ +static int64_t arg_as_i64(ray_t* arg, int* err) { + *err = 0; + if (arg_is_null(arg)) { *err = 1; return 0; } + if (arg->type == -RAY_I64) return arg->i64; + if (arg->type == -RAY_I32) return (int64_t)arg->i32; + if (arg->type == -RAY_I16) return (int64_t)arg->i16; + if (arg->type == -RAY_U8) return (int64_t)arg->u8; + if (arg->type == -RAY_BOOL) return (int64_t)arg->b8; + if (arg->type == -RAY_STR) { + const char* p = ray_str_ptr(arg); + size_t len = ray_str_len(arg); + size_t i = 0; + while (i < len && (p[i] == ' ' || p[i] == '\t')) i++; + int sign = 1; + if (i < len && (p[i] == '+' || p[i] == '-')) { if (p[i] == '-') sign = -1; i++; } + if (i >= len || p[i] < '0' || p[i] > '9') { *err = 1; return 0; } + int64_t v = 0; + while (i < len && p[i] >= '0' && p[i] <= '9') { v = v * 10 + (p[i] - '0'); i++; } + return sign * v; + } + *err = 1; + return 0; +} + +/* ══════════════════════════════════════════ + * Handlers + * ══════════════════════════════════════════ */ + +/* timeit/t — toggle the profiler. + * no arg → toggle + * 0 → disable + * nonzero→ enable + */ +static ray_t* h_timeit(ray_t* arg, ray_syscmd_ctx_t* ctx) { + bool active; + if (arg_is_null(arg)) { + active = !g_ray_profile.active; + } else { + int err = 0; + int64_t v = arg_as_i64(arg, &err); + if (err) return ray_error("type", ":t expects an integer (0 = off, 1 = on)"); + active = (v != 0); + } + g_ray_profile.active = active; + if (ctx && ctx->repl) { + if (ctx->color) fprintf(stdout, "\033[1;33m"); + fprintf(stdout, ". Timeit is %s.", active ? "on" : "off"); + if (ctx->color) fprintf(stdout, "\033[0m"); + fprintf(stdout, "\n"); + return NULL; + } + return ray_i64(active ? 1 : 0); +} + +/* listen N — bind an IPC listener on PORT using the runtime's main + * poll instance. Errors with `nyi` if no main loop is wired (i.e. + * the host didn't call ray_runtime_create from main.c, or libray is + * being used as an embedded library without a poll). Errors with + * `io` if the bind fails (port in use, permission, etc.). Returns + * the listener id on success. */ +static ray_t* h_listen(ray_t* arg, ray_syscmd_ctx_t* ctx) { + (void)ctx; + int err = 0; + int64_t port = arg_as_i64(arg, &err); + if (err) return ray_error("type", "listen expects a port number"); + if (port <= 0 || port > 65535) return ray_error("domain", "listen: port out of range (1..65535)"); + + ray_poll_t* poll = (ray_poll_t*)ray_runtime_get_poll(); + if (!poll) return ray_error("nyi", "listen: no main event loop attached"); + + int64_t id = ray_ipc_listen(poll, (uint16_t)port); + if (id < 0) { + int e = errno; + return ray_error("io", "listen: bind to port %lld failed: %s", + (long long)port, strerror(e ? e : EADDRINUSE)); + } + return ray_i64(id); +} + +/* env — list defined globals. REPL prints a summary; Rayfall path + * returns a list of [name, type-label] pairs. */ +static const char* type_label_short(ray_t* v) { + if (!v) return "null"; + switch (v->type) { + case RAY_LAMBDA: return "lambda"; + case RAY_UNARY: + case RAY_BINARY: + case RAY_VARY: return "fn"; + case RAY_TABLE: return "table"; + case RAY_DICT: return "dict"; + case RAY_LIST: return "list"; + default: + if (v->type < 0) return "atom"; + if (v->type > 0) return "vec"; + return "?"; + } +} + +static ray_t* h_env(ray_t* arg, ray_syscmd_ctx_t* ctx) { + (void)arg; + int64_t sym_ids[512]; + ray_t* vals[512]; + int32_t n = ray_env_list(sym_ids, vals, 512); + if (ctx && ctx->repl) { + for (int32_t i = 0; i < n; i++) { + ray_t* s = ray_sym_str(sym_ids[i]); + const char* name = s ? ray_str_ptr(s) : "?"; + fprintf(stdout, " %-20s %s\n", name, type_label_short(vals[i])); + } + fprintf(stdout, "(%d entries)\n", n); + return NULL; + } + /* Non-REPL: just return the count. Returning the full env as a + * Rayfall list is doable but not needed for the .sys.cmd "env" + * use case (which is purely informational). */ + return ray_i64(n); +} + +/* clear — REPL-only screen clear. */ +static ray_t* h_clear(ray_t* arg, ray_syscmd_ctx_t* ctx) { + (void)arg; + if (ctx && ctx->repl && ctx->color) { + fprintf(stdout, "\033[2J\033[H"); + fflush(stdout); + } + return NULL; +} + +/* help/? — REPL-only. Walks the table to print every command's + * one-liner so we never get out of sync with what's registered. */ +static ray_t* h_help(ray_t* arg, ray_syscmd_ctx_t* ctx) { + (void)arg; + if (!ctx || !ctx->repl) return RAY_NULL_OBJ; + bool color = ctx->color; + if (color) fprintf(stdout, "\033[1;33m"); + fprintf(stdout, ". Commands list:"); + if (color) fprintf(stdout, "\033[0m"); + fprintf(stdout, "\n"); + if (color) fprintf(stdout, "\033[90m"); + size_t n = 0; + const ray_syscmd_t* tbl = ray_syscmd_table(&n); + for (size_t i = 0; i < n; i++) { + char tag[32]; + if (tbl[i].alias) snprintf(tag, sizeof(tag), ":%s/:%s", tbl[i].name, tbl[i].alias); + else snprintf(tag, sizeof(tag), ":%s", tbl[i].name); + fprintf(stdout, " %-12s - %s\n", tag, tbl[i].help ? tbl[i].help : ""); + } + if (color) fprintf(stdout, "\033[0m"); + fprintf(stdout, "\n"); + return NULL; +} + +/* q/quit — REPL-only graceful exit. */ +static ray_t* h_quit(ray_t* arg, ray_syscmd_ctx_t* ctx) { + (void)arg; (void)ctx; + /* Defer to the standard exit path so atexit handlers run. */ + exit(0); + return NULL; +} + +/* ══════════════════════════════════════════ + * Registry + * ══════════════════════════════════════════ */ + +static const ray_syscmd_t TABLE[] = { + { "help", "?", h_help, RAY_SYSCMD_REPL_ONLY, "Display this help." }, + { "timeit", "t", h_timeit, 0, "Toggle profiling on/off (or :t 0|1)." }, + { "env", NULL, h_env, 0, "List defined globals." }, + { "clear", NULL, h_clear, RAY_SYSCMD_REPL_ONLY, "Clear the screen." }, + { "listen", NULL, h_listen, RAY_SYSCMD_RESTRICTED, "Start IPC listener on PORT." }, + { "q", NULL, h_quit, RAY_SYSCMD_REPL_ONLY, "Exit the REPL." }, + { "quit", NULL, h_quit, RAY_SYSCMD_REPL_ONLY, "Exit the REPL." }, +}; +static const size_t TABLE_LEN = sizeof(TABLE) / sizeof(TABLE[0]); + +const ray_syscmd_t* ray_syscmd_lookup(const char* name, size_t name_len) { + if (!name || name_len == 0) return NULL; + for (size_t i = 0; i < TABLE_LEN; i++) { + const ray_syscmd_t* e = &TABLE[i]; + if (e->name && strlen(e->name) == name_len && memcmp(e->name, name, name_len) == 0) + return e; + if (e->alias && strlen(e->alias) == name_len && memcmp(e->alias, name, name_len) == 0) + return e; + } + return NULL; +} + +const ray_syscmd_t* ray_syscmd_table(size_t* out_count) { + if (out_count) *out_count = TABLE_LEN; + return TABLE; +} + +/* ══════════════════════════════════════════ + * Dispatcher used by `.sys.cmd "..."` and the REPL `:` path. + * + * Splits the string into (command, args). Looks up the command in + * the registry. If found, builds a Rayfall arg from the args slice + * (RAY_NULL_OBJ for empty, otherwise an owned RAY_STR — the handler + * can then re-coerce via arg_as_i64 etc.) and calls the handler. + * + * On miss with allow_shell=true, falls through to system() so users + * can do `(.sys.cmd "ls -la")` the kdb way. With allow_shell=false + * (REPL path), returns "domain" so a typo'd `:foo` doesn't hand the + * shell anything by accident. + * ══════════════════════════════════════════ */ +ray_t* ray_syscmd_dispatch(const char* str, size_t len, + ray_syscmd_ctx_t* ctx, bool allow_shell) { + /* Trim leading whitespace */ + size_t i = 0; + while (i < len && (str[i] == ' ' || str[i] == '\t')) i++; + if (i >= len) return ray_error("domain", "empty command"); + + /* First word = command name (until whitespace). */ + size_t name_start = i; + while (i < len && str[i] != ' ' && str[i] != '\t') i++; + size_t name_len = i - name_start; + + /* Args = rest, leading whitespace trimmed. */ + while (i < len && (str[i] == ' ' || str[i] == '\t')) i++; + const char* args_p = str + i; + size_t args_len = len - i; + + const ray_syscmd_t* e = ray_syscmd_lookup(str + name_start, name_len); + if (!e) { + if (!allow_shell) + return ray_error("domain", "unknown command"); + /* Shell fallback — pass the entire original string verbatim + * so quoting/redirection survives. Match .sys.exec semantics: + * return the host shell's exit code. */ + char* cmd = (char*)malloc(len + 1); + if (!cmd) return ray_error("oom", NULL); + memcpy(cmd, str, len); + cmd[len] = '\0'; + int rc = system(cmd); + free(cmd); + return ray_i64(rc); + } + + if (!e->fn) return ray_error("nyi", "command has no handler"); + + /* REPL-only commands (clear / q / help) are reachable only when + * a REPL context was supplied — typing them in a Rayfall script + * via .sys.cmd would have no useful effect, so reject early with + * a clear domain error rather than silently no-op'ing. */ + if ((e->flags & RAY_SYSCMD_REPL_ONLY) && (!ctx || !ctx->repl)) + return ray_error("domain", "command is REPL-only"); + + ray_t* arg = (args_len > 0) ? ray_str(args_p, args_len) : RAY_NULL_OBJ; + ray_t* result = e->fn(arg, ctx); + if (arg && arg != RAY_NULL_OBJ) ray_release(arg); + return result ? result : RAY_NULL_OBJ; +} + +/* ══════════════════════════════════════════ + * Rayfall builtins: + * (.sys.cmd "name args") → string-dispatched + * (.sys. arg) → direct, typed + * + * The direct builtins are registered from eval.c at startup; this + * file just exposes the entry point each one wraps. + * ══════════════════════════════════════════ */ + +ray_t* ray_syscmd_string_dispatch_fn(ray_t* x) { + if (!ray_is_atom_local(x) || x->type != -RAY_STR) + return ray_error("type", ".sys.cmd expects a string"); + return ray_syscmd_dispatch(ray_str_ptr(x), ray_str_len(x), + /*ctx=*/NULL, /*allow_shell=*/true); +} + +/* Adapter for direct `.sys.` invocation: pass the user's arg + * straight to the named handler, no string parsing. */ +static ray_t* invoke_by_name(const char* name, ray_t* arg) { + const ray_syscmd_t* e = ray_syscmd_lookup(name, strlen(name)); + if (!e || !e->fn) return ray_error("nyi", NULL); + if (e->flags & RAY_SYSCMD_REPL_ONLY) + return ray_error("domain", "command is REPL-only"); + ray_syscmd_ctx_t ctx = { NULL, false }; + ray_t* r = e->fn(arg, &ctx); + return r ? r : RAY_NULL_OBJ; +} + +/* listen requires an arg; keep it unary. */ +ray_t* ray_sys_listen_fn(ray_t* x) { return invoke_by_name("listen", x); } + +/* timeit and env are usable with or without an arg ((.sys.timeit) => + * toggle, (.sys.env) => list). Registering them variadic in eval.c + * matches `.sys.gc`'s convention and avoids the arity error users + * would otherwise hit calling `(.sys.env)` with no args. */ +ray_t* ray_sys_timeit_fn(ray_t** args, int64_t n) { + return invoke_by_name("timeit", n > 0 ? args[0] : RAY_NULL_OBJ); +} +ray_t* ray_sys_env_fn(ray_t** args, int64_t n) { + (void)args; + return invoke_by_name("env", n > 0 ? args[0] : RAY_NULL_OBJ); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/lang/syscmd.h b/crates/rayforce-sys/vendor/rayforce/src/lang/syscmd.h new file mode 100644 index 0000000..f68729d --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/lang/syscmd.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * syscmd.h — single registry of system-level commands. + * + * One source of truth feeding three entry points: + * + * 1. `.sys.cmd "name args"` — string-dispatched Rayfall builtin + * 2. `:name args` — REPL terminal command + * 3. `(.sys. arg)` — direct typed Rayfall builtin (per entry) + * + * Each handler is invoked with one Rayfall arg (or RAY_NULL_OBJ for + * commands that take none) and an optional REPL context that carries + * the surface-specific state (color flag, repl pointer for things like + * `:clear` and `:q`). Handlers parse / coerce the arg themselves so + * callers don't have to special-case whether `:t 1` came in as the + * string "1" or the integer 1. + * + * Unknown command names dispatched through `.sys.cmd` fall through to + * the host shell via system(2) — matches the kdb+ `system "..."` + * convention so existing muscle memory works. + */ + +#ifndef RAY_LANG_SYSCMD_H +#define RAY_LANG_SYSCMD_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct ray_repl; + +typedef struct ray_syscmd_ctx { + struct ray_repl *repl; /* non-NULL when invoked from the REPL */ + bool color; /* terminal supports ANSI; only meaningful with `repl` */ +} ray_syscmd_ctx_t; + +/* Handler contract: + * - `arg` is RAY_NULL_OBJ when no argument was supplied; otherwise an + * owned reference the caller manages (handlers do not retain). + * - Returning NULL means "no value" (treated as RAY_NULL_OBJ by the + * Rayfall surface; suppressed from REPL print). + * - Errors are returned as ray_error(...) values. + */ +typedef ray_t* (*ray_syscmd_handler_t)(ray_t* arg, ray_syscmd_ctx_t* ctx); + +/* Entry flags */ +#define RAY_SYSCMD_REPL_ONLY 0x01 /* not exposed via .sys.cmd / .sys. */ +#define RAY_SYSCMD_RESTRICTED 0x02 /* honors --restricted IPC mode */ + +typedef struct ray_syscmd { + const char* name; /* primary command name, e.g. "timeit" */ + const char* alias; /* short alias e.g. "t"; NULL if none */ + ray_syscmd_handler_t fn; + int flags; + const char* help; /* one-line help text */ +} ray_syscmd_t; + +/* Look up a command by name or alias. `name_len` lets the caller pass + * an unterminated slice (e.g. straight out of the REPL or .sys.cmd + * tokeniser). Returns NULL if not found. */ +const ray_syscmd_t* ray_syscmd_lookup(const char* name, size_t name_len); + +/* Walk the table. Returns the entry array + count. */ +const ray_syscmd_t* ray_syscmd_table(size_t* out_count); + +/* Parse `"name args..."` into (command, arg-string), look the command + * up, dispatch. Unknown names fall through to system(str) when + * `allow_shell` is set (the .sys.cmd path uses true; the REPL passes + * false so `:foo` doesn't accidentally exec arbitrary shell). */ +ray_t* ray_syscmd_dispatch(const char* str, size_t len, + ray_syscmd_ctx_t* ctx, bool allow_shell); + +#ifdef __cplusplus +} +#endif + +#endif /* RAY_LANG_SYSCMD_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/mem/arena.c b/crates/rayforce-sys/vendor/rayforce/src/mem/arena.c new file mode 100644 index 0000000..26f5636 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/mem/arena.c @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arena.h" +#include "heap.h" +#include "sys.h" +#include + +/* 32-byte alignment for ray_t */ +#define ARENA_ALIGN 32 +#define ARENA_ALIGN_UP(x) (((x) + ARENA_ALIGN - 1) & ~(size_t)(ARENA_ALIGN - 1)) + +/* Each chunk is a contiguous block of memory with a bump pointer. */ +typedef struct ray_arena_chunk { + struct ray_arena_chunk* next; + size_t cap; /* usable capacity (excluding this header) */ + size_t used; /* bytes used so far */ +} ray_arena_chunk_t; + +/* Arena header */ +struct ray_arena { + ray_arena_chunk_t* chunks; /* linked list of all chunks (head = current) */ + size_t chunk_size; /* default chunk capacity */ +}; + +/* Chunk data starts at aligned offset after the header */ +static inline char* chunk_data(ray_arena_chunk_t* c) { + size_t hdr = ARENA_ALIGN_UP(sizeof(ray_arena_chunk_t)); + return (char*)c + hdr; +} + +static ray_arena_chunk_t* arena_new_chunk(size_t min_cap) { + size_t hdr = ARENA_ALIGN_UP(sizeof(ray_arena_chunk_t)); + if (min_cap > SIZE_MAX - hdr) return NULL; + size_t total = hdr + min_cap; + ray_arena_chunk_t* c = (ray_arena_chunk_t*)ray_sys_alloc(total); + if (!c) return NULL; + c->next = NULL; + c->cap = min_cap; + c->used = 0; + return c; +} + +ray_arena_t* ray_arena_new(size_t chunk_size) { + if (chunk_size < 256) chunk_size = 256; + chunk_size = ARENA_ALIGN_UP(chunk_size); + + ray_arena_t* a = (ray_arena_t*)ray_sys_alloc(sizeof(ray_arena_t)); + if (!a) return NULL; + + ray_arena_chunk_t* first = arena_new_chunk(chunk_size); + if (!first) { + ray_sys_free(a); + return NULL; + } + + a->chunks = first; + a->chunk_size = chunk_size; + return a; +} + +ray_t* ray_arena_alloc(ray_arena_t* arena, size_t nbytes) { + if (!arena) return NULL; + if (nbytes > SIZE_MAX - 32 - (ARENA_ALIGN - 1)) return NULL; + size_t block_size = ARENA_ALIGN_UP(32 + nbytes); + + ray_arena_chunk_t* c = arena->chunks; + + if (c->used + block_size > c->cap) { + size_t new_cap = arena->chunk_size; + if (block_size > new_cap) new_cap = ARENA_ALIGN_UP(block_size); + + ray_arena_chunk_t* nc = arena_new_chunk(new_cap); + if (!nc) return NULL; + + nc->next = arena->chunks; + arena->chunks = nc; + c = nc; + } + + char* base = chunk_data(c); + ray_t* v = (ray_t*)(base + c->used); + c->used += block_size; + + memset(v, 0, 32); + v->attrs = RAY_ATTR_ARENA; + v->rc = 1; + + return v; +} + +bool ray_arena_reserve(ray_arena_t* arena, size_t bytes) { + if (!arena) return false; + if (bytes == 0) return true; + ray_arena_chunk_t* c = arena->chunks; + if (c && (c->cap - c->used) >= bytes) return true; + size_t new_cap = arena->chunk_size; + if (bytes > new_cap) new_cap = ARENA_ALIGN_UP(bytes); + ray_arena_chunk_t* nc = arena_new_chunk(new_cap); + if (!nc) return false; + nc->next = arena->chunks; + arena->chunks = nc; + return true; +} + +size_t ray_arena_total_used(const ray_arena_t* arena) { + if (!arena) return 0; + size_t total = 0; + for (const ray_arena_chunk_t* c = arena->chunks; c; c = c->next) { + total += c->used; + } + return total; +} + +void ray_arena_reset(ray_arena_t* arena) { + if (!arena || !arena->chunks) return; + + /* Keep the head chunk (most recently allocated), free the rest */ + ray_arena_chunk_t* keep = arena->chunks; + ray_arena_chunk_t* c = keep->next; + while (c) { + ray_arena_chunk_t* next = c->next; + ray_sys_free(c); + c = next; + } + keep->next = NULL; + keep->used = 0; + arena->chunks = keep; +} + +void ray_arena_destroy(ray_arena_t* arena) { + if (!arena) return; + ray_arena_chunk_t* c = arena->chunks; + while (c) { + ray_arena_chunk_t* next = c->next; + ray_sys_free(c); + c = next; + } + ray_sys_free(arena); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/mem/arena.h b/crates/rayforce-sys/vendor/rayforce/src/mem/arena.h new file mode 100644 index 0000000..f405a2c --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/mem/arena.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_ARENA_H +#define RAY_ARENA_H + +#include +#include + +typedef struct ray_arena ray_arena_t; + +/* Create arena with given chunk size (bytes). Chunks allocated via ray_sys_alloc. */ +ray_arena_t* ray_arena_new(size_t chunk_size); + +/* Allocate ray_t* block with nbytes of data space. + * Returns 32-byte aligned ray_t* with RAY_ATTR_ARENA set, rc=1. + * Returns NULL on OOM. */ +ray_t* ray_arena_alloc(ray_arena_t* arena, size_t nbytes); + +/* Ensure the arena can serve subsequent allocations totalling at least + * `bytes` without the head chunk needing to grow. If the head chunk has + * enough free space already, this is a no-op; otherwise a new chunk with + * capacity >= `bytes` is allocated and becomes the head. Returns true on + * success, false on OOM. Useful for making a sequence of follow-on + * allocations infallible, which is necessary when commits to multiple + * data structures must be atomic. */ +bool ray_arena_reserve(ray_arena_t* arena, size_t bytes); + +/* Total bytes currently used across every chunk in this arena. Diagnostic + * introspection — monotonically grows with ray_arena_alloc, resets on + * ray_arena_reset. Safe to call at any time. */ +size_t ray_arena_total_used(const ray_arena_t* arena); + +/* Reset arena — rewind all chunks to zero. Memory retained for reuse. */ +void ray_arena_reset(ray_arena_t* arena); + +/* Destroy arena — free all backing memory. */ +void ray_arena_destroy(ray_arena_t* arena); + +#endif /* RAY_ARENA_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/mem/cow.c b/crates/rayforce-sys/vendor/rayforce/src/mem/cow.c new file mode 100644 index 0000000..6a453d9 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/mem/cow.c @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "cow.h" +#include "heap.h" + +/* Thread-local flag: when false (default), refcount uses plain inc/dec. + * The thread pool sets this to true before dispatching parallel work. + * Mirrors rayforce 1's VM->rc_sync fast path. */ +RAY_TLS bool ray_rc_sync = false; + +/* -------------------------------------------------------------------------- + * ray_retain + * -------------------------------------------------------------------------- */ + +void ray_retain(ray_t* v) { + if (!v || RAY_IS_ERR(v)) return; + if (v->attrs & RAY_ATTR_ARENA) return; + if (RAY_LIKELY(!ray_rc_sync)) + v->rc++; + else + ray_atomic_inc(&v->rc); +} + +/* -------------------------------------------------------------------------- + * ray_release + * -------------------------------------------------------------------------- */ + +void ray_release(ray_t* v) { + if (!v || RAY_IS_ERR(v)) return; + if (v->attrs & RAY_ATTR_ARENA) return; + uint32_t prev; + if (RAY_LIKELY(!ray_rc_sync)) { + prev = v->rc--; + } else { + prev = ray_atomic_dec(&v->rc); + } + if (prev == 1) { + if (RAY_UNLIKELY(ray_rc_sync)) + ray_atomic_fence_acquire(); + ray_free(v); + } +} + +/* -------------------------------------------------------------------------- + * ray_cow + * -------------------------------------------------------------------------- */ + +ray_t* ray_cow(ray_t* v) { + if (!v || RAY_IS_ERR(v)) return v; + if (v->attrs & RAY_ATTR_ARENA) return v; /* arena-owned, no-op */ + uint32_t rc = RAY_LIKELY(!ray_rc_sync) ? v->rc : ray_atomic_load(&v->rc); + if (rc == 1) return v; /* sole owner -- mutate in place */ + ray_t* copy = ray_alloc_copy(v); + if (!copy || RAY_IS_ERR(copy)) return copy; + /* L3: ray_alloc_copy() already sets copy->rc = 1, so no redundant store needed. */ + ray_release(v); + return copy; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/mem/cow.h b/crates/rayforce-sys/vendor/rayforce/src/mem/cow.h new file mode 100644 index 0000000..b42643b --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/mem/cow.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_COW_H +#define RAY_COW_H + +/* + * cow.h -- COW (Copy-on-Write) ref counting. + * + * ray_retain: increment reference count + * ray_release: decrement reference count, free when it reaches zero + * ray_cow: copy-on-write — return same pointer if sole owner, else copy + */ + +#include +#include "core/platform.h" + +/* Thread-local flag: plain (false) vs atomic (true) refcount ops. + * Default is false (fast single-threaded path). + * The thread pool sets true before parallel dispatch. */ +extern RAY_TLS bool ray_rc_sync; + +#endif /* RAY_COW_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/mem/heap.c b/crates/rayforce-sys/vendor/rayforce/src/mem/heap.c new file mode 100644 index 0000000..8f93c5f --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/mem/heap.c @@ -0,0 +1,1601 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if defined(__APPLE__) +# define _DARWIN_C_SOURCE +#elif !defined(_WIN32) +# define _GNU_SOURCE /* ftruncate, MAP_SHARED, etc. */ +#endif + +#include "heap.h" +#include "cow.h" +#include "sys.h" +#include "core/platform.h" +#include "table/sym.h" +#include "lang/eval.h" +#include "store/hnsw.h" +#include "ops/idxop.h" +#include +#include /* getenv */ +#include /* snprintf */ +#include /* getpid, close, ftruncate, unlink */ +#include /* open, fcntl, F_PREALLOCATE on macOS */ +#include +#include /* mmap, munmap */ +#include /* O_* modes */ +#include +#include + +/* Portable disk-block preallocation. Returns 0 on success, errno-style + * code on failure (matching posix_fallocate's contract). Linux has + * posix_fallocate natively. macOS uses fcntl(F_PREALLOCATE) — try + * contiguous first, fall back to non-contiguous, then ftruncate to + * extend the file size if needed (F_PREALLOCATE doesn't grow the file + * beyond its current size). */ +static int heap_preallocate(int fd, off_t offset, off_t len) { +#if defined(__APPLE__) + fstore_t fs = { + .fst_flags = F_ALLOCATECONTIG | F_ALLOCATEALL, + .fst_posmode = F_PEOFPOSMODE, + .fst_offset = 0, + .fst_length = offset + len, + .fst_bytesalloc = 0, + }; + if (fcntl(fd, F_PREALLOCATE, &fs) == -1) { + /* Retry without contiguous-only constraint. */ + fs.fst_flags = F_ALLOCATEALL; + if (fcntl(fd, F_PREALLOCATE, &fs) == -1) return errno ? errno : -1; + } + /* F_PREALLOCATE reserves blocks but doesn't grow the logical file + * size — extend with ftruncate so mmap'd pages past the old size + * can actually be written without SIGBUS. */ + if (ftruncate(fd, offset + len) != 0) return errno ? errno : -1; + return 0; +#else + return posix_fallocate(fd, offset, len); +#endif +} + +/* -------------------------------------------------------------------------- + * Static asserts + * -------------------------------------------------------------------------- */ +_Static_assert(sizeof(ray_pool_hdr_t) <= 16, + "ray_pool_hdr_t must fit in nullmap (16 bytes)"); + +/* -------------------------------------------------------------------------- + * Thread-local state + * -------------------------------------------------------------------------- */ +RAY_TLS ray_heap_t* ray_tl_heap = NULL; + +/* Stats tracking — always enabled (plain integer ops, negligible vs atomics). + * All stats go through the per-heap struct (ray_tl_heap->stats) so that + * heap merges keep bytes_allocated accurate. + * + * bytes_allocated is only modified by the owning thread (alloc/local-free) + * or by the main thread during GC flush (return_to_owner=true, workers idle). + * No atomics needed. */ +#define RAY_STAT(x) (x) + +/* -------------------------------------------------------------------------- + * Bitmap-based heap ID allocator (atomic CAS, reusable IDs) + * + * Each bit in the bitmap represents one heap ID. Acquiring sets a bit, + * releasing clears it. IDs are reused after release (unlike a monotonic + * counter). Cursor rotates to spread contention across words. + * -------------------------------------------------------------------------- */ +static _Atomic(uint64_t) g_heap_id_bitmap[RAY_HEAP_ID_WORDS] = { [0] = 1ULL }; +static _Atomic(uint64_t) g_heap_id_cursor = 0; + +ray_heap_t* ray_heap_registry[RAY_HEAP_REGISTRY_SIZE]; + +/* Pending-merge queue head (lock-free LIFO) */ +_Atomic(ray_heap_t*) ray_heap_pending_merge = NULL; + +static int heap_id_acquire(void) { + uint64_t start = atomic_fetch_add_explicit(&g_heap_id_cursor, 1, + memory_order_relaxed); + for (uint64_t off = 0; off < RAY_HEAP_ID_WORDS; off++) { + uint64_t idx = (start + off) % RAY_HEAP_ID_WORDS; + uint64_t word = atomic_load_explicit(&g_heap_id_bitmap[idx], + memory_order_relaxed); + while (~word != 0ULL) { + uint64_t free_bits = ~word; + uint64_t bit = (uint64_t)__builtin_ctzll(free_bits); + uint64_t mask = 1ULL << bit; + uint64_t new_word = word | mask; + if (atomic_compare_exchange_weak_explicit( + &g_heap_id_bitmap[idx], &word, new_word, + memory_order_acq_rel, memory_order_relaxed)) { + return (int)(idx * 64 + bit); + } + /* CAS failed — word updated, retry with new value */ + } + } + return -1; /* pool exhausted */ +} + +static void heap_id_release(int id) { + if (id < 0 || id >= (int)RAY_HEAP_ID_BITS) return; + uint64_t idx = (uint64_t)id >> 6; + uint64_t bit = (uint64_t)id & 63ULL; + uint64_t mask = ~(1ULL << bit); + atomic_fetch_and_explicit(&g_heap_id_bitmap[idx], mask, + memory_order_release); +} + +/* -------------------------------------------------------------------------- + * Parallel flag + * -------------------------------------------------------------------------- */ +_Atomic(uint32_t) ray_parallel_flag = 0; + +/* -------------------------------------------------------------------------- + * Helpers + * -------------------------------------------------------------------------- */ + +static uint8_t ceil_log2(size_t n) { + if (n <= 1) return 0; + return (uint8_t)(64 - __builtin_clzll(n - 1)); +} + +uint8_t ray_order_for_size(size_t data_size) { + if (data_size > SIZE_MAX - 32) return RAY_HEAP_MAX_ORDER + 1; + size_t total = data_size + 32; /* 32B ray_t header (no prefix) */ + uint8_t k = ceil_log2(total); + if (k < RAY_ORDER_MIN) k = RAY_ORDER_MIN; + return k; +} + +/* -------------------------------------------------------------------------- + * Pool management + * + * Self-aligned pools: pool base = ptr & ~(pool_size - 1). + * First min-block (64B at offset 0) reserved for pool header. + * Remaining space split via cascading buddy split. + * + * For oversized blocks (order > POOL_ORDER), pool_order = order + 1 + * so the cascading split produces a right-half block of the needed order. + * -------------------------------------------------------------------------- */ + +static bool heap_add_pool(ray_heap_t* h, uint8_t order); + +/* -------------------------------------------------------------------------- + * Freelist operations (circular sentinel via fl_prev/fl_next) + * + * Each freelist[order] is a ray_fl_head_t sentinel. fl_remove() unlinks a + * block from ANY circular list without needing the head pointer — enabling + * safe cross-heap buddy coalescing. + * -------------------------------------------------------------------------- */ + +RAY_INLINE void heap_insert_block(ray_heap_t* h, ray_t* blk, uint8_t order) { + ray_fl_head_t* head = &h->freelist[order]; + ray_t* first = head->fl_next; + blk->fl_prev = (ray_t*)head; + blk->fl_next = first; + first->fl_prev = blk; + head->fl_next = blk; + ray_atomic_store(&blk->rc, 0); /* free marker */ + blk->order = order; + h->avail |= (1ULL << order); +} + +/* heap_remove_block: currently unused — retained for future coalescing paths */ +static void __attribute__((unused)) +heap_remove_block(ray_heap_t* h, ray_t* blk, uint8_t order) { + fl_remove(blk); /* circular unlink — works across heaps */ + if (fl_empty(&h->freelist[order])) + h->avail &= ~(1ULL << order); +} + +RAY_INLINE void heap_split_block(ray_heap_t* h, ray_t* blk, + uint8_t target_order, uint8_t block_order) { + while (block_order > target_order) { + block_order--; + ray_t* buddy = (ray_t*)((char*)blk + BSIZEOF(block_order)); + buddy->mmod = 0; + buddy->order = block_order; + heap_insert_block(h, buddy, block_order); + } +} + +/* -------------------------------------------------------------------------- + * Coalescing: merge block with buddies up to pool_order + * + * Pool header at offset 0 has rc=1 and order=RAY_ORDER_MIN, so buddy + * checks always fail before reaching the header. Safe sentinel. + * -------------------------------------------------------------------------- */ + +static void heap_coalesce(ray_heap_t* h, ray_t* blk, + uintptr_t pool_base, uint8_t pool_order) { + uint8_t order = blk->order; + + /* During parallel execution, skip coalescing entirely — buddies may + * belong to other heaps' freelists, and fl_remove would corrupt them. */ + if (atomic_load_explicit(&ray_parallel_flag, memory_order_relaxed) != 0) { + heap_insert_block(h, blk, order); + return; + } + + for (;; order++) { + if (order >= pool_order) break; + + ray_t* buddy = ray_buddy_of(blk, order, pool_base); + __builtin_prefetch(buddy, 0, 1); + + uint32_t buddy_rc = ray_atomic_load(&buddy->rc); + if (buddy_rc != 0 || buddy->order != order) break; + + fl_remove(buddy); + if (fl_empty(&h->freelist[order])) + h->avail &= ~(1ULL << order); + + blk = (buddy < blk) ? buddy : blk; + } + + heap_insert_block(h, blk, order); +} + +/* -------------------------------------------------------------------------- + * heap_add_pool implementation + * -------------------------------------------------------------------------- */ + +static bool heap_add_pool(ray_heap_t* h, uint8_t order) { + if (h->pool_count >= RAY_MAX_POOLS) return false; + + uint8_t pool_order; + if (order >= RAY_HEAP_POOL_ORDER) + pool_order = order + 1; /* need one order larger for header + block */ + else + pool_order = RAY_HEAP_POOL_ORDER; + + if (pool_order > RAY_HEAP_MAX_ORDER) return false; + size_t pool_size = BSIZEOF(pool_order); + + void* mem = ray_vm_alloc_aligned(pool_size, pool_size); + int swap_fd = -1; + char* swap_path = NULL; + + if (!mem) { + /* Anonymous mmap refused — usually means RAM+swap can't satisfy + * pool_size right now. Fall back to file-backed mmap: create a + * tempfile in h->swap_path, reserve `pool_size` bytes of disk + * blocks (so writes won't SIGBUS later on disk-full), then map + * the file at a self-aligned address using the anonymous-VM + * reservation trick — no over-allocation of file or disk. */ + static _Atomic uint64_t swap_counter = 0; + uint64_t cnt = atomic_fetch_add_explicit(&swap_counter, 1, memory_order_relaxed); + + size_t plen = strlen(h->swap_path); + size_t need = plen + 64; /* room for "rayheap___.dat" */ + swap_path = (char*)ray_sys_alloc(need); + if (!swap_path) return false; + snprintf(swap_path, need, "%srayheap_%d_%u_%llu.dat", + h->swap_path, (int)getpid(), (unsigned)h->id, + (unsigned long long)cnt); + + swap_fd = open(swap_path, O_RDWR | O_CREAT | O_EXCL, 0600); + if (swap_fd < 0) { + ray_sys_free(swap_path); + return false; + } + + /* Reserve EXACTLY pool_size bytes of disk blocks AND grow the + * file to pool_size. Crucial that the file is empty (EOF=0) + * when this runs: macOS F_PREALLOCATE with F_PEOFPOSMODE + * extends past the current EOF, so doing this before any other + * ftruncate keeps the reservation == pool_size, not 2x. ENOSPC + * here surfaces as a clean false return -> ray_alloc NULL -> + * ray_error("oom") at the wrapper layer. */ + if (heap_preallocate(swap_fd, 0, (off_t)pool_size) != 0) { + close(swap_fd); + unlink(swap_path); + ray_sys_free(swap_path); + return false; + } + + /* Reserve 2*pool_size of address space anonymously to guarantee + * a self-aligned subrange exists. PROT_NONE is enough — we + * never read/write the anon mapping; it just holds the address + * range so the kernel won't hand it out to a concurrent mmap. + * After computing the aligned subrange, free the slack and + * MAP_FIXED the file-backed mapping over the kept region. */ + size_t reserve_size = pool_size + pool_size; + void* anon = mmap(NULL, reserve_size, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (anon == MAP_FAILED) { + close(swap_fd); + unlink(swap_path); + ray_sys_free(swap_path); + return false; + } + + uintptr_t addr = (uintptr_t)anon; + uintptr_t aligned = (addr + pool_size - 1) & ~(pool_size - 1); + if (aligned > addr) + munmap(anon, aligned - addr); + uintptr_t end = addr + reserve_size; + uintptr_t aligned_end = aligned + pool_size; + if (end > aligned_end) + munmap((void*)aligned_end, end - aligned_end); + + /* MAP_FIXED replaces the kept anon mapping atomically with the + * file-backed one. No address-space race since the kept range + * is still anon-reserved at this point. */ + void* mapped = mmap((void*)aligned, pool_size, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, swap_fd, 0); + if (mapped == MAP_FAILED) { + munmap((void*)aligned, pool_size); + close(swap_fd); + unlink(swap_path); + ray_sys_free(swap_path); + return false; + } + + mem = (void*)aligned; + } + + /* --- Write pool header at offset 0 --- */ + ray_t* hdr_block = (ray_t*)mem; + memset(hdr_block, 0, BSIZEOF(RAY_ORDER_MIN)); + hdr_block->mmod = 0; + hdr_block->order = RAY_ORDER_MIN; + ray_atomic_store(&hdr_block->rc, 1); /* sentinel: never free */ + + ray_pool_hdr_t* hdr = (ray_pool_hdr_t*)hdr_block; /* overlay on nullmap */ + hdr->heap_id = h->id; + hdr->pool_order = pool_order; + hdr->vm_base = mem; /* on POSIX, same as aligned base */ + + /* --- Cascading split: split from pool_order down to RAY_ORDER_MIN. + * Right half of each split → freelist. + * Leftmost min-block = pool header (already set, rc=1). --- */ + for (uint8_t o = pool_order; o > RAY_ORDER_MIN; o--) { + ray_t* right = (ray_t*)((char*)mem + BSIZEOF(o - 1)); + right->mmod = 0; + right->order = (uint8_t)(o - 1); + heap_insert_block(h, right, (uint8_t)(o - 1)); + } + + /* --- Track pool --- */ + h->pools[h->pool_count].base = mem; + h->pools[h->pool_count].pool_order = pool_order; + h->pools[h->pool_count].backed = (swap_fd >= 0) ? 1 : 0; + h->pools[h->pool_count].swap_fd = swap_fd; + h->pools[h->pool_count].swap_path = swap_path; /* NULL when not backed */ + h->pool_count++; + + return true; +} + +/* -------------------------------------------------------------------------- + * Slab cache flush (with coalescing for GC effectiveness) + * -------------------------------------------------------------------------- */ + +static void heap_flush_slabs(ray_heap_t* h) { + for (int i = 0; i < RAY_SLAB_ORDERS; i++) { + while (h->slabs[i].count > 0) { + ray_t* blk = h->slabs[i].stack[--h->slabs[i].count]; + int pidx = heap_find_pool(h, blk); + uintptr_t pb; + uint8_t po; + if (pidx >= 0) { + pb = (uintptr_t)h->pools[pidx].base; + po = h->pools[pidx].pool_order; + } else { + ray_pool_hdr_t* phdr = ray_pool_of(blk); + if (!phdr) continue; + pb = (uintptr_t)phdr; + po = phdr->pool_order; + } + heap_coalesce(h, blk, pb, po); + } + } +} + +/* -------------------------------------------------------------------------- + * Foreign blocks flush + * + * When return_to_owner is true, returns each foreign block to its owning + * heap (via pool header heap_id → global registry). This ensures workers + * can reuse their pools across queries instead of allocating new ones. + * + * return_to_owner must only be true when workers are idle (on semaphore), + * i.e. ray_parallel_flag == 0. Otherwise coalesce into current heap. + * -------------------------------------------------------------------------- */ + +static void heap_flush_foreign(ray_heap_t* h, bool return_to_owner) { + /* When workers are active (return_to_owner=false), skip entirely. + * Foreign blocks stay queued until the proper GC flush after workers + * finish. Absorbing foreign blocks locally would let them be re- + * allocated under a different heap while pool ownership stays with + * the original heap, corrupting bytes_allocated accounting. */ + if (!return_to_owner) return; + + ray_t* blk = h->foreign; + while (blk) { + ray_t* next = blk->fl_next; + ray_pool_hdr_t* phdr = ray_pool_of(blk); + if (!phdr) { blk = next; continue; } + uint16_t owner_id = phdr->heap_id; + ray_heap_t* owner = ray_heap_registry[owner_id % RAY_HEAP_REGISTRY_SIZE]; + if (owner && owner->id == owner_id && owner != h) { + /* Return to owner and decrement owner's bytes_allocated. + * Safe: workers are idle (return_to_owner=true implies + * ray_parallel_flag==0). */ + int pidx = heap_find_pool(owner, blk); + uintptr_t pb; + uint8_t po; + if (pidx >= 0) { + pb = (uintptr_t)owner->pools[pidx].base; + po = owner->pools[pidx].pool_order; + } else { + pb = (uintptr_t)phdr; + po = phdr->pool_order; + } + RAY_STAT(owner->stats.bytes_allocated -= BSIZEOF(blk->order)); + heap_coalesce(owner, blk, pb, po); + } else { + /* Owner gone (destroyed/unregistered) — coalesce locally. + * No stats adjustment: the owner's stats were destroyed + * with the heap, and h never charged the alloc. */ + int pidx = heap_find_pool(h, blk); + uintptr_t pb; + uint8_t po; + if (pidx >= 0) { + pb = (uintptr_t)h->pools[pidx].base; + po = h->pools[pidx].pool_order; + } else { + if (!phdr) { blk = next; continue; } + pb = (uintptr_t)phdr; + po = phdr->pool_order; + } + heap_coalesce(h, blk, pb, po); + } + blk = next; + } + h->foreign = NULL; +} + +/* -------------------------------------------------------------------------- + * Owned-reference helpers + * -------------------------------------------------------------------------- */ + +static bool ray_atom_str_is_sso(const ray_t* s) { + if (s->slen >= 1 && s->slen <= 7) return true; + if (s->slen == 0 && s->obj == NULL) return true; + return false; +} + +static bool ray_atom_owns_obj(const ray_t* v) { + if (v->type == -RAY_GUID) return v->obj != NULL; + if (v->type == -RAY_STR) return !ray_atom_str_is_sso(v); + return false; +} + +static void ray_release_owned_refs(ray_t* v) { + if (!v || RAY_IS_ERR(v)) return; + + if (ray_is_atom(v)) { + if (v->type == RAY_LAMBDA) { + /* Lambda stores [params, body, bytecode, constants, n_locals, nfo, dbg] in ray_data */ + ray_t** slots = (ray_t**)ray_data(v); + for (int i = 0; i < 4; i++) { + if (slots[i] && !RAY_IS_ERR(slots[i])) + ray_release(slots[i]); + } + /* Release optional debug info slots */ + if (LAMBDA_NFO(v)) ray_release(LAMBDA_NFO(v)); + if (LAMBDA_DBG(v)) ray_release(LAMBDA_DBG(v)); + return; + } + if (v->type == RAY_LAZY) { + ray_graph_t* g = RAY_LAZY_GRAPH(v); + if (g) { + ray_graph_free(g); + RAY_LAZY_GRAPH(v) = NULL; + } + return; + } + /* I64 atom tagged as an HNSW handle owns a ray_hnsw_t — free it + * when the atom's rc drops to zero so rebindings and scope-exit + * don't leak the (potentially large) index graph. */ + if (v->type == -RAY_I64 && (v->attrs & RAY_ATTR_HNSW)) { + ray_hnsw_t* idx = (ray_hnsw_t*)(uintptr_t)v->i64; + if (idx) ray_hnsw_free(idx); + v->i64 = 0; + v->attrs &= (uint8_t)~RAY_ATTR_HNSW; + return; + } + if (ray_atom_owns_obj(v) && v->obj && !RAY_IS_ERR(v->obj)) + ray_release(v->obj); + return; + } + + if (v->attrs & RAY_ATTR_SLICE) { + if (v->slice_parent && !RAY_IS_ERR(v->slice_parent)) + ray_release(v->slice_parent); + return; + } + + /* RAY_INDEX block: release per-kind payload children + saved-nullmap + * pointers. Must run before the LIST/TABLE compound checks below + * (which would mistreat the data[] payload as child pointers). */ + if (v->type == RAY_INDEX) { + ray_index_t* ix = ray_index_payload(v); + ray_index_release_payload(ix); + ray_index_release_saved(ix); + return; + } + + /* Vector with attached index: nullmap[0..7] holds an owning ref to + * the index ray_t. The index owns the displaced ext_nullmap/str_pool/ + * sym_dict, so we must NOT also try to release those off the parent — + * they aren't there anymore. Skip the NULLMAP_EXT and STR_pool branches. */ + if (v->attrs & RAY_ATTR_HAS_INDEX) { + if (v->index && !RAY_IS_ERR(v->index)) + ray_release(v->index); + return; + } + + if ((v->attrs & RAY_ATTR_NULLMAP_EXT) && + v->ext_nullmap && !RAY_IS_ERR(v->ext_nullmap)) + ray_release(v->ext_nullmap); + + if (v->type == RAY_STR && v->str_pool && !RAY_IS_ERR(v->str_pool)) + ray_release(v->str_pool); + + if (RAY_IS_PARTED(v->type)) { + int64_t n_segs = v->len; + ray_t** segs = (ray_t**)ray_data(v); + for (int64_t i = 0; i < n_segs; i++) { + if (segs[i] && !RAY_IS_ERR(segs[i])) + ray_release(segs[i]); + } + return; + } + + if (v->type == RAY_MAPCOMMON) { + ray_t** ptrs = (ray_t**)ray_data(v); + if (ptrs[0] && !RAY_IS_ERR(ptrs[0])) ray_release(ptrs[0]); + if (ptrs[1] && !RAY_IS_ERR(ptrs[1])) ray_release(ptrs[1]); + return; + } + + if (v->type == RAY_TABLE || v->type == RAY_DICT) { + ray_t** slots = (ray_t**)ray_data(v); + if (slots[0] && !RAY_IS_ERR(slots[0])) ray_release(slots[0]); + if (slots[1] && !RAY_IS_ERR(slots[1])) ray_release(slots[1]); + return; + } + + if (v->type == RAY_LIST) { + ray_t** ptrs = (ray_t**)ray_data(v); + for (int64_t i = 0; i < v->len; i++) { + ray_t* child = ptrs[i]; + if (child && !RAY_IS_ERR(child)) ray_release(child); + } + } +} + +bool ray_retain_owned_refs(ray_t* v) { + if (!v || RAY_IS_ERR(v)) return true; + + if (ray_is_atom(v)) { + if (v->type == RAY_LAMBDA) { + ray_t** slots = (ray_t**)ray_data(v); + for (int i = 0; i < 4; i++) { + if (slots[i] && !RAY_IS_ERR(slots[i])) + ray_retain(slots[i]); + } + if (LAMBDA_NFO(v)) ray_retain(LAMBDA_NFO(v)); + if (LAMBDA_DBG(v)) ray_retain(LAMBDA_DBG(v)); + return true; + } + /* Lazy handles own their graph uniquely — no retain on copy */ + if (v->type == RAY_LAZY) return true; + /* HNSW handle owns its ray_hnsw_t uniquely. Deep-clone the index + * so the copy is an independent owner with the same semantics as + * the source. On clone-OOM, detach the copy (so caller can free + * it cleanly) and signal failure — the caller must not treat the + * copy as a valid handle. */ + if (v->type == -RAY_I64 && (v->attrs & RAY_ATTR_HNSW)) { + ray_hnsw_t* src = (ray_hnsw_t*)(uintptr_t)v->i64; + if (src) { + ray_hnsw_t* dup = ray_hnsw_clone(src); + if (!dup) { + v->i64 = 0; + v->attrs &= (uint8_t)~RAY_ATTR_HNSW; + return false; + } + v->i64 = (int64_t)(uintptr_t)dup; + } + return true; + } + if (ray_atom_owns_obj(v) && v->obj && !RAY_IS_ERR(v->obj)) + ray_retain(v->obj); + return true; + } + + if (v->attrs & RAY_ATTR_SLICE) { + if (v->slice_parent && !RAY_IS_ERR(v->slice_parent)) + ray_retain(v->slice_parent); + return true; + } + + if (v->type == RAY_INDEX) { + ray_index_t* ix = ray_index_payload(v); + ray_index_retain_payload(ix); + ray_index_retain_saved(ix); + return true; + } + + if (v->attrs & RAY_ATTR_HAS_INDEX) { + if (v->index && !RAY_IS_ERR(v->index)) + ray_retain(v->index); + return true; + } + + if ((v->attrs & RAY_ATTR_NULLMAP_EXT) && + v->ext_nullmap && !RAY_IS_ERR(v->ext_nullmap)) + ray_retain(v->ext_nullmap); + + if (v->type == RAY_STR && v->str_pool && !RAY_IS_ERR(v->str_pool)) + ray_retain(v->str_pool); + + if (RAY_IS_PARTED(v->type)) { + int64_t n_segs = v->len; + ray_t** segs = (ray_t**)ray_data(v); + for (int64_t i = 0; i < n_segs; i++) { + if (segs[i] && !RAY_IS_ERR(segs[i])) + ray_retain(segs[i]); + } + return true; + } + + if (v->type == RAY_MAPCOMMON) { + ray_t** ptrs = (ray_t**)ray_data(v); + if (ptrs[0] && !RAY_IS_ERR(ptrs[0])) ray_retain(ptrs[0]); + if (ptrs[1] && !RAY_IS_ERR(ptrs[1])) ray_retain(ptrs[1]); + return true; + } + + if (v->type == RAY_TABLE || v->type == RAY_DICT) { + ray_t** slots = (ray_t**)ray_data(v); + if (slots[0] && !RAY_IS_ERR(slots[0])) ray_retain(slots[0]); + if (slots[1] && !RAY_IS_ERR(slots[1])) ray_retain(slots[1]); + return true; + } + + if (v->type == RAY_LIST) { + ray_t** ptrs = (ray_t**)ray_data(v); + for (int64_t i = 0; i < v->len; i++) { + ray_t* child = ptrs[i]; + if (child && !RAY_IS_ERR(child)) ray_retain(child); + } + } + return true; +} + +static void ray_detach_owned_refs(ray_t* v) { + if (!v || RAY_IS_ERR(v)) return; + + if (ray_is_atom(v)) { + if (v->type == RAY_LAMBDA) { + ray_t** slots = (ray_t**)ray_data(v); + for (int i = 0; i < 4; i++) slots[i] = NULL; + LAMBDA_NFO(v) = NULL; + LAMBDA_DBG(v) = NULL; + return; + } + if (v->type == RAY_LAZY) { + RAY_LAZY_GRAPH(v) = NULL; + RAY_LAZY_OP(v) = NULL; + return; + } + /* HNSW handle: ownership has been transferred elsewhere; stop the + * rc→0 cleanup hook from freeing the (now-foreign) index. */ + if (v->type == -RAY_I64 && (v->attrs & RAY_ATTR_HNSW)) { + v->i64 = 0; + v->attrs &= (uint8_t)~RAY_ATTR_HNSW; + return; + } + if (ray_atom_owns_obj(v)) v->obj = NULL; + return; + } + + if (v->attrs & RAY_ATTR_SLICE) { + v->slice_parent = NULL; + v->slice_offset = 0; + v->attrs &= (uint8_t)~RAY_ATTR_SLICE; + return; + } + + if (v->type == RAY_INDEX) { + ray_index_t* ix = ray_index_payload(v); + switch ((ray_idx_kind_t)ix->kind) { + case RAY_IDX_HASH: ix->u.hash.table = ix->u.hash.chain = NULL; break; + case RAY_IDX_SORT: ix->u.sort.perm = NULL; break; + case RAY_IDX_BLOOM: ix->u.bloom.bits = NULL; break; + default: break; + } + memset(ix->saved_nullmap, 0, 16); + ix->saved_attrs = 0; + return; + } + + if (v->attrs & RAY_ATTR_HAS_INDEX) { + v->index = NULL; + v->_idx_pad = NULL; + v->attrs &= (uint8_t)~RAY_ATTR_HAS_INDEX; + return; + } + + if (v->attrs & RAY_ATTR_NULLMAP_EXT) { + v->ext_nullmap = NULL; + v->attrs &= (uint8_t)~RAY_ATTR_NULLMAP_EXT; + } + + if (v->type == RAY_STR) { + v->str_pool = NULL; + } + + if (RAY_IS_PARTED(v->type)) { + int64_t n_segs = v->len; + ray_t** segs = (ray_t**)ray_data(v); + for (int64_t i = 0; i < n_segs; i++) + segs[i] = NULL; + return; + } + + if (v->type == RAY_MAPCOMMON) { + ray_t** ptrs = (ray_t**)ray_data(v); + ptrs[0] = NULL; + ptrs[1] = NULL; + return; + } + + if (v->type == RAY_TABLE || v->type == RAY_DICT) { + ray_t** slots = (ray_t**)ray_data(v); + slots[0] = NULL; + slots[1] = NULL; + v->len = 0; + return; + } + + if (v->type == RAY_LIST) { + v->len = 0; + } +} + +/* -------------------------------------------------------------------------- + * ray_alloc + * -------------------------------------------------------------------------- */ + +ray_t* ray_alloc(size_t data_size) { + ray_heap_t* h = ray_tl_heap; + if (RAY_UNLIKELY(!h)) { + ray_heap_init(); + h = ray_tl_heap; + if (!h) return NULL; + } + + uint8_t order = ray_order_for_size(data_size); + if (order > RAY_HEAP_MAX_ORDER) return NULL; + + /* Slab fast path */ + if (RAY_LIKELY(IS_SLAB_ORDER(order))) { + int idx = SLAB_INDEX(order); + if (RAY_LIKELY(h->slabs[idx].count > 0)) { + ray_t* v = h->slabs[idx].stack[--h->slabs[idx].count]; + + /* Zero full 32-byte header (hot path). + * Nullmap (bytes 0-15) must be cleared for null-bit correctness. */ + memset(v, 0, 32); + v->order = order; + if (RAY_UNLIKELY(ray_rc_sync)) + ray_atomic_store(&v->rc, 1); + else + v->rc = 1; + + RAY_STAT(h->stats.alloc_count++); + RAY_STAT(h->stats.slab_hits++); + RAY_STAT(h->stats.bytes_allocated += BSIZEOF(order)); + RAY_STAT(h->stats.peak_bytes = h->stats.bytes_allocated > h->stats.peak_bytes + ? h->stats.bytes_allocated : h->stats.peak_bytes); + return v; + } + } + + /* Find free block via avail bitmask. + * Avail bits can be stale from cross-heap fl_remove, so we loop + * to find a genuinely non-empty freelist. */ + uint64_t candidates = h->avail & (UINT64_MAX << order); + + if (RAY_UNLIKELY(candidates == 0)) { + heap_flush_foreign(h, false); /* always local in ray_alloc */ + + candidates = h->avail & (UINT64_MAX << order); + + if (candidates == 0) { + if (!heap_add_pool(h, order)) return NULL; + candidates = h->avail & (UINT64_MAX << order); + if (candidates == 0) return NULL; + } + } + + /* Scan past stale avail bits (cross-heap fl_remove may have emptied lists) */ + uint8_t found_order; + for (;;) { + if (candidates == 0) { + if (!heap_add_pool(h, order)) return NULL; + candidates = h->avail & (UINT64_MAX << order); + if (candidates == 0) return NULL; + } + found_order = (uint8_t)__builtin_ctzll(candidates); + if (!fl_empty(&h->freelist[found_order])) break; + /* Clear stale bit and try next */ + h->avail &= ~(1ULL << found_order); + candidates &= ~(1ULL << found_order); + } + + /* Pop from circular sentinel freelist */ + ray_fl_head_t* head = &h->freelist[found_order]; + ray_t* blk = head->fl_next; + fl_remove(blk); + if (fl_empty(head)) + h->avail &= ~(1ULL << found_order); + + /* Split down to requested order */ + heap_split_block(h, blk, order, found_order); + + /* Zero ray_t header and set metadata */ + memset(blk, 0, 32); + blk->mmod = 0; + blk->order = order; + if (RAY_UNLIKELY(ray_rc_sync)) + ray_atomic_store(&blk->rc, 1); + else + blk->rc = 1; + + RAY_STAT(h->stats.alloc_count++); + RAY_STAT(h->stats.bytes_allocated += BSIZEOF(order)); + RAY_STAT(h->stats.peak_bytes = h->stats.bytes_allocated > h->stats.peak_bytes + ? h->stats.bytes_allocated : h->stats.peak_bytes); + + return blk; +} + +/* -------------------------------------------------------------------------- + * ray_free + * -------------------------------------------------------------------------- */ + +void ray_free(ray_t* v) { + if (!v || RAY_IS_ERR(v)) return; + if (v->attrs & RAY_ATTR_ARENA) return; /* arena-owned, bulk-freed */ + + /* Guard: keep rc=1 while releasing children so buddy coalescing + * won't merge this block prematurely (it checks buddy_rc==0). */ + ray_atomic_store(&v->rc, 1); + + ray_release_owned_refs(v); + + ray_heap_t* h = ray_tl_heap; + + /* File-mapped: munmap */ + if (v->mmod == 1) { + if (v->type == RAY_TABLE || v->type == RAY_DICT || v->type == RAY_LIST) return; + if (v->type > 0 && v->type < RAY_TYPE_COUNT) { + uint8_t esz = ray_sym_elem_size(v->type, v->attrs); + size_t data_size = 32 + (size_t)v->len * esz; + if (v->attrs & RAY_ATTR_NULLMAP_EXT) + data_size += ((size_t)v->len + 7) / 8; + size_t mapped_size = (data_size + 4095) & ~(size_t)4095; + ray_vm_unmap_file(v, mapped_size); + } else { + ray_vm_unmap_file(v, 4096); + } + if (h) RAY_STAT(h->stats.free_count++); + return; + } + + /* Legacy mmod==2 guard */ + if (v->mmod == 2) return; + + if (!h) return; + + uint8_t order = v->order; + + if (order < RAY_ORDER_MIN || order > RAY_HEAP_MAX_ORDER) return; + + size_t block_size = BSIZEOF(order); + + /* O(1) ownership check via pool header heap_id. + * ray_pool_of() derives pool base in O(1) via self-aligned AND mask. + * Pool header stores heap_id stamped at pool creation. */ + ray_pool_hdr_t* phdr = ray_pool_of(v); + if (!phdr) return; + bool is_local = (phdr->heap_id == h->id); + + /* Slab fast path (same heap only) */ + if (IS_SLAB_ORDER(order) && is_local) { + int idx = SLAB_INDEX(order); + if (h->slabs[idx].count < RAY_SLAB_CACHE_SIZE) { + /* Mark rc=1 so buddy coalescing skips slab-cached blocks. + * Blocks freed via ray_release arrive with rc=0; without this, + * a buddy being freed would see rc==0 and incorrectly merge + * with the slab-cached block, causing overlapping allocations. + * Must be atomic: buddy coalescing on another thread reads rc. */ + ray_atomic_store(&v->rc, 1); + h->slabs[idx].stack[h->slabs[idx].count++] = v; + RAY_STAT(h->stats.free_count++); + RAY_STAT(h->stats.bytes_allocated -= block_size); + return; + } + } + + /* Foreign: different heap — enqueue to foreign list for later + * return to the owner during GC (flush with return_to_owner=true). + * Do NOT adjust any heap's bytes_allocated here: the block stays + * counted on the owning heap until properly returned and coalesced. */ + if (!is_local) { + v->fl_next = h->foreign; + h->foreign = v; + RAY_STAT(h->stats.free_count++); + return; + } + + /* Local block — coalesce with buddy */ + heap_coalesce(h, v, (uintptr_t)phdr, phdr->pool_order); + + RAY_STAT(h->stats.free_count++); + RAY_STAT(h->stats.bytes_allocated -= block_size); +} + +/* -------------------------------------------------------------------------- + * ray_alloc_copy + * -------------------------------------------------------------------------- */ + +ray_t* ray_alloc_copy(ray_t* v) { + if (!v || RAY_IS_ERR(v)) return NULL; + size_t data_size; + if (ray_is_atom(v)) { + data_size = 0; + } else if (v->type == RAY_TABLE || v->type == RAY_DICT) { + data_size = 2 * sizeof(ray_t*); + } else if (RAY_IS_PARTED(v->type) || v->type == RAY_MAPCOMMON) { + int64_t n_ptrs = v->len; + if (v->type == RAY_MAPCOMMON) n_ptrs = 2; + if (n_ptrs < 0) return ray_error("oom", NULL); + data_size = (size_t)n_ptrs * sizeof(ray_t*); + } else if (v->type == RAY_LIST) { + /* RAY_LIST has type==0, which the generic branch below (t <= 0) + * would route to data_size=0, silently producing a header-only copy + * whose item-pointer area is uninitialised — a shallow COW of a + * shared list would then lose every element. Handle explicitly. */ + if (v->len < 0 || (uint64_t)v->len > SIZE_MAX / sizeof(ray_t*)) + return ray_error("oom", NULL); + data_size = (size_t)ray_len(v) * sizeof(ray_t*); + } else { + int8_t t = ray_type(v); + if (t <= 0 || t >= RAY_TYPE_COUNT) + data_size = 0; + else { + uint8_t esz = ray_sym_elem_size(t, v->attrs); + if (v->len < 0 || (esz > 0 && (uint64_t)v->len > SIZE_MAX / esz)) + return ray_error("oom", NULL); + data_size = (size_t)ray_len(v) * esz; + } + } + ray_t* copy = ray_alloc(data_size); + if (!copy) return NULL; + + uint8_t new_order = copy->order; + uint8_t new_mmod = copy->mmod; + memcpy(copy, v, 32 + data_size); + copy->mmod = new_mmod; + copy->order = new_order; + if (RAY_UNLIKELY(ray_rc_sync)) + ray_atomic_store(©->rc, 1); + else + copy->rc = 1; + if (!ray_retain_owned_refs(copy)) { + /* Deep-clone of an owned resource failed (e.g. HNSW index OOM). + * The copy's owned state has already been neutralized, so a plain + * ray_free won't touch the source's resources. */ + ray_free(copy); + return ray_error("oom", NULL); + } + return copy; +} + +/* -------------------------------------------------------------------------- + * ray_scratch_alloc / ray_scratch_realloc + * -------------------------------------------------------------------------- */ + +ray_t* ray_scratch_alloc(size_t data_size) { + return ray_alloc(data_size); +} + +ray_t* ray_scratch_realloc(ray_t* v, size_t new_data_size) { + ray_t* new_v = ray_alloc(new_data_size); + if (!new_v) return NULL; + if (v && !RAY_IS_ERR(v)) { + size_t old_data; + if (ray_is_atom(v)) + old_data = 0; + else if (v->type == RAY_LIST) { + if (v->len < 0) { old_data = 0; } + else old_data = (size_t)ray_len(v) * sizeof(ray_t*); + } else if (v->type == RAY_TABLE || v->type == RAY_DICT) { + old_data = 2 * sizeof(ray_t*); + } else if (RAY_IS_PARTED(v->type) || v->type == RAY_MAPCOMMON) { + int64_t n_ptrs = v->len; + if (v->type == RAY_MAPCOMMON) n_ptrs = 2; + if (n_ptrs < 0) n_ptrs = 0; + old_data = (size_t)n_ptrs * sizeof(ray_t*); + } else { + int8_t t = ray_type(v); + old_data = (t > 0 && t < RAY_TYPE_COUNT && v->len >= 0) ? + (size_t)ray_len(v) * ray_sym_elem_size(t, v->attrs) : 0; + } + /* Clamp old_data to actual allocation size */ + if (v->mmod == 0 && v->order >= RAY_ORDER_MIN) { + size_t alloc_data = BSIZEOF(v->order) - 32; + if (old_data > alloc_data) old_data = alloc_data; + } + size_t copy_data = old_data < new_data_size ? old_data : new_data_size; + uint8_t new_mmod = new_v->mmod; + uint8_t new_order = new_v->order; + memcpy(new_v, v, 32 + copy_data); + new_v->mmod = new_mmod; + new_v->order = new_order; + if (RAY_UNLIKELY(ray_rc_sync)) + ray_atomic_store(&new_v->rc, 1); + else + new_v->rc = 1; + /* Ownership transfers via memcpy — no retain needed on new_v. + * Detach nulls old pointers so ray_free won't double-release. */ + if (!(v->attrs & RAY_ATTR_ARENA)) { + ray_detach_owned_refs(v); + ray_free(v); + } + } + return new_v; +} + +/* -------------------------------------------------------------------------- + * ray_mem_stats + * -------------------------------------------------------------------------- */ + +void ray_mem_stats(ray_mem_stats_t* out) { + if (ray_tl_heap) + *out = ray_tl_heap->stats; + else + memset(out, 0, sizeof(*out)); + int64_t sc = 0, sp = 0; + ray_sys_get_stat(&sc, &sp); + out->sys_current = (size_t)sc; + out->sys_peak = (size_t)sp; +} + +/* -------------------------------------------------------------------------- + * Heap lifecycle + * -------------------------------------------------------------------------- */ + +void ray_heap_init(void) { + if (ray_tl_heap) return; + + size_t heap_sz = (sizeof(ray_heap_t) + 4095) & ~(size_t)4095; + ray_heap_t* h = (ray_heap_t*)ray_vm_alloc(heap_sz); + if (!h) return; + memset(h, 0, heap_sz); + + /* Bitmap-based ID: acquire reusable ID via atomic CAS */ + int id = heap_id_acquire(); + if (id < 0) { + ray_vm_free(h, heap_sz); + return; /* ID pool exhausted */ + } + h->id = (uint16_t)id; + + /* Register in global heap registry */ + ray_heap_registry[h->id % RAY_HEAP_REGISTRY_SIZE] = h; + + /* Initialize circular sentinel freelists */ + for (int i = 0; i < RAY_HEAP_FL_SIZE; i++) + fl_init(&h->freelist[i]); + + /* Resolve swap directory for file-backed pool fallback. RAY_HEAP_SWAP + * env var overrides the default ("./"); we always ensure a trailing + * slash so heap_add_pool can concatenate `` + * unconditionally. An empty / over-long env value is rejected and the + * default kicks in. */ + const char* env = getenv("RAY_HEAP_SWAP"); + const char* sp = (env && *env && strlen(env) < sizeof(h->swap_path) - 16) ? env : "./"; + size_t sp_len = strlen(sp); + memcpy(h->swap_path, sp, sp_len); + h->swap_path[sp_len] = '\0'; + if (sp_len > 0 && h->swap_path[sp_len - 1] != '/' && sp_len < sizeof(h->swap_path) - 1) { + h->swap_path[sp_len] = '/'; + h->swap_path[sp_len + 1] = '\0'; + } + + ray_tl_heap = h; +} + +void ray_heap_destroy(void) { + ray_heap_t* h = ray_tl_heap; + if (!h) return; + + uint16_t saved_id = h->id; + + /* Unregister from global heap registry */ + ray_heap_registry[h->id % RAY_HEAP_REGISTRY_SIZE] = NULL; + + /* Skip flush_slabs and flush_foreign — all pools are about to be + * munmap'd. Flushing would coalesce blocks and fl_remove buddies + * from other heaps' freelists, which races with concurrent worker + * destruction during ray_pool_free(). */ + + /* Munmap all tracked pools. File-backed pools also need their fd + * closed and their tempfile unlinked so the swap directory doesn't + * accumulate orphans. */ + for (uint32_t i = 0; i < h->pool_count; i++) { + ray_pool_hdr_t* hdr = (ray_pool_hdr_t*)h->pools[i].base; + ray_vm_free(hdr->vm_base, BSIZEOF(h->pools[i].pool_order)); + if (h->pools[i].backed) { + if (h->pools[i].swap_fd >= 0) close(h->pools[i].swap_fd); + if (h->pools[i].swap_path) { + unlink(h->pools[i].swap_path); + ray_sys_free(h->pools[i].swap_path); + } + } + } + + size_t heap_sz = (sizeof(ray_heap_t) + 4095) & ~(size_t)4095; + ray_vm_free(h, heap_sz); + ray_tl_heap = NULL; + + /* Release bitmap ID after all memory is freed */ + heap_id_release(saved_id); +} + +/* -------------------------------------------------------------------------- + * Return worker-pool blocks from this heap's freelists to their owners. + * + * After ray_alloc flushes foreign blocks locally (coalesce + madvise), + * worker-pool blocks sit on main's freelists with released physical pages. + * This function walks the freelists, finds blocks whose pool header + * heap_id != ours, removes them, and inserts into the owning worker heap. + * Workers can then reuse their pools without allocating new ones. + * + * ONLY safe when workers are idle (on semaphore, ray_parallel_flag == 0). + * -------------------------------------------------------------------------- */ + +static void heap_return_foreign_freelist(ray_heap_t* h) { + for (int order = RAY_ORDER_MIN; order < RAY_HEAP_FL_SIZE; order++) { + ray_fl_head_t* head = &h->freelist[order]; + ray_t* blk = head->fl_next; + while (blk != (ray_t*)head) { + ray_t* next = blk->fl_next; + /* Use heap_find_pool on h first — if found, block is local */ + int pidx = heap_find_pool(h, blk); + if (pidx < 0) { + /* Foreign block — find owner via pool header (GC path) */ + ray_pool_hdr_t* phdr = ray_pool_of(blk); + if (!phdr) { blk = next; continue; } + ray_heap_t* owner = ray_heap_registry[phdr->heap_id % RAY_HEAP_REGISTRY_SIZE]; + if (owner && owner->id == phdr->heap_id) { + fl_remove(blk); + if (fl_empty(head)) + h->avail &= ~(1ULL << order); + /* Coalesce on owner for defragmentation */ + int opidx = heap_find_pool(owner, blk); + uintptr_t pb; + uint8_t po; + if (opidx >= 0) { + pb = (uintptr_t)owner->pools[opidx].base; + po = owner->pools[opidx].pool_order; + } else { + pb = (uintptr_t)phdr; + po = phdr->pool_order; + } + heap_coalesce(owner, blk, pb, po); + } + } + blk = next; + } + } +} + +void ray_heap_gc(void) { + ray_heap_t* h = ray_tl_heap; + if (!h) return; + + bool safe = (atomic_load_explicit(&ray_parallel_flag, memory_order_relaxed) == 0); + + /* Phase 1: Flush main heap's foreign blocks and slab caches. + * When safe (workers idle), return foreign blocks to their owners + * so worker pools become reusable. */ + heap_flush_foreign(h, safe); + heap_flush_slabs(h); + + if (safe) { + /* Phase 2: Return foreign blocks absorbed onto our freelists + * back to their owning worker heaps. */ + heap_return_foreign_freelist(h); + + /* Phase 3: Skip worker heaps — we cannot safely touch their + * foreign lists or slab caches because workers may still be + * between pending-- and sem_wait, calling ray_free which + * modifies wh->foreign and wh->slabs. Workers flush their + * own foreign/slabs on their next dispatch entry. + * TODO: full cross-heap reclamation requires a worker + * quiescence barrier. */ + + /* Phase 4: Reclaim OVERSIZED empty pools. + * Standard pools (pool_order == RAY_HEAP_POOL_ORDER) are never + * munmapped — physical pages released via madvise (phase 5) + * re-fault cheaply on next query. + * Only oversized pools (pool_order > RAY_HEAP_POOL_ORDER) are + * candidates — these are one-off large allocations. + * + * Emptiness is computed by walking all heaps' freelists and slab + * caches to sum free capacity within the pool. This avoids atomic + * live_count operations on the alloc/free hot path. */ + /* Phase 4: Reclaim oversized empty pools. + * + * For each candidate pool (owned by heap gh), count free bytes from: + * (a) gh's own freelist + slab cache — safe, only gh modifies these + * (b) ALL heaps' foreign lists (read-only) — foreign lists are + * prepend-only during the race window, so a read-only walk + * sees a consistent suffix. A concurrent prepend may be + * missed, making us undercount — which is conservative. + * + * On removal, only unlink from gh's freelist/slabs. Blocks still + * in other heaps' foreign lists will be discovered as dangling on + * their next flush (foreign block with unmapped pool → ray_pool_of + * returns NULL → skipped by the NULL guard). */ + for (int hid = 0; hid < RAY_HEAP_REGISTRY_SIZE; hid++) { + ray_heap_t* gh = ray_heap_registry[hid]; + if (!gh) continue; + + for (uint32_t p = 0; p < gh->pool_count; ) { + ray_pool_hdr_t* phdr = (ray_pool_hdr_t*)gh->pools[p].base; + + /* Skip standard pools and last-remaining pool */ + if (phdr->pool_order <= RAY_HEAP_POOL_ORDER + || gh->pool_count <= 1) { + p++; + continue; + } + + uint8_t po = phdr->pool_order; + uintptr_t pb = (uintptr_t)phdr; + uintptr_t pe = pb + BSIZEOF(po); + size_t pool_capacity = BSIZEOF(po) - BSIZEOF(RAY_ORDER_MIN); + + /* (a) Sum free bytes from owning heap's freelist + slabs */ + size_t free_bytes = 0; + for (int ord = RAY_ORDER_MIN; ord < RAY_HEAP_FL_SIZE; ord++) { + ray_fl_head_t* fh = &gh->freelist[ord]; + ray_t* blk = fh->fl_next; + while (blk != (ray_t*)fh) { + if ((uintptr_t)blk >= pb && (uintptr_t)blk < pe) + free_bytes += BSIZEOF(ord); + blk = blk->fl_next; + } + } + for (int si = 0; si < RAY_SLAB_ORDERS; si++) { + for (uint32_t j = 0; j < gh->slabs[si].count; j++) { + ray_t* sb = gh->slabs[si].stack[j]; + if ((uintptr_t)sb >= pb && (uintptr_t)sb < pe) + free_bytes += BSIZEOF(RAY_SLAB_MIN + si); + } + } + + /* (b) Check if ANY blocks from this pool are still in other + * heaps' foreign lists. If so, we cannot munmap — + * those blocks are threaded into the foreign list and + * dereferencing them after munmap would crash. + * They'll be flushed to the owner on the next GC. */ + bool has_foreign = false; + for (int fh_id = 0; fh_id < RAY_HEAP_REGISTRY_SIZE && !has_foreign; fh_id++) { + ray_heap_t* fh_heap = ray_heap_registry[fh_id]; + if (!fh_heap || fh_heap == gh) continue; + ray_t* fb = fh_heap->foreign; + while (fb) { + if ((uintptr_t)fb >= pb && (uintptr_t)fb < pe) { + has_foreign = true; + break; + } + fb = fb->fl_next; + } + } + + if (free_bytes < pool_capacity || has_foreign) { + p++; + continue; /* pool has live allocations or dangling foreign refs */ + } + + /* Pool is empty and no foreign-list refs — safe to munmap. + * Remove blocks from owning heap's freelists and slab caches. */ + for (int ord = RAY_ORDER_MIN; ord < RAY_HEAP_FL_SIZE; ord++) { + ray_fl_head_t* fh = &gh->freelist[ord]; + ray_t* blk = fh->fl_next; + while (blk != (ray_t*)fh) { + ray_t* next = blk->fl_next; + if ((uintptr_t)blk >= pb && (uintptr_t)blk < pe) { + fl_remove(blk); + if (fl_empty(fh)) + gh->avail &= ~(1ULL << ord); + } + blk = next; + } + } + for (int si = 0; si < RAY_SLAB_ORDERS; si++) { + uint32_t dst = 0; + for (uint32_t j = 0; j < gh->slabs[si].count; j++) { + ray_t* sb = gh->slabs[si].stack[j]; + if ((uintptr_t)sb >= pb && (uintptr_t)sb < pe) + continue; + gh->slabs[si].stack[dst++] = sb; + } + gh->slabs[si].count = dst; + } + + ray_vm_free(phdr->vm_base, BSIZEOF(po)); + /* File-backed pools also need their fd closed and tempfile + * unlinked, mirroring the heap_destroy path. */ + if (gh->pools[p].backed) { + if (gh->pools[p].swap_fd >= 0) close(gh->pools[p].swap_fd); + if (gh->pools[p].swap_path) { + unlink(gh->pools[p].swap_path); + ray_sys_free(gh->pools[p].swap_path); + } + } + gh->pools[p] = gh->pools[--gh->pool_count]; + /* Don't increment p — check swapped entry */ + } + } + } + +} + +void ray_heap_release_pages(void) { + ray_heap_t* h = ray_tl_heap; + if (!h) return; + for (int i = 13; i < RAY_HEAP_FL_SIZE; i++) { + ray_fl_head_t* head = &h->freelist[i]; + ray_t* blk = head->fl_next; + while (blk != (ray_t*)head) { + size_t bsize = BSIZEOF(i); + if (bsize > 4096) + ray_vm_release((char*)blk + 4096, bsize - 4096); + blk = blk->fl_next; + } + } +} + +void ray_heap_merge(ray_heap_t* src) { + ray_heap_t* dst = ray_tl_heap; + if (!dst || !src) return; + + /* Merge stats: dst inherits src's outstanding allocations so that + * future local frees of those blocks correctly decrement dst. */ + dst->stats.alloc_count += src->stats.alloc_count; + dst->stats.free_count += src->stats.free_count; + dst->stats.bytes_allocated += src->stats.bytes_allocated; + dst->stats.slab_hits += src->stats.slab_hits; + dst->stats.direct_count += src->stats.direct_count; + dst->stats.direct_bytes += src->stats.direct_bytes; + if (src->stats.peak_bytes > dst->stats.peak_bytes) + dst->stats.peak_bytes = src->stats.peak_bytes; + + /* Transfer slabs: fit into dst cache, coalesce overflow */ + for (int i = 0; i < RAY_SLAB_ORDERS; i++) { + while (src->slabs[i].count > 0 && dst->slabs[i].count < RAY_SLAB_CACHE_SIZE) + dst->slabs[i].stack[dst->slabs[i].count++] = + src->slabs[i].stack[--src->slabs[i].count]; + while (src->slabs[i].count > 0) { + ray_t* blk = src->slabs[i].stack[--src->slabs[i].count]; + int pidx = heap_find_pool(dst, blk); + uintptr_t pb; + uint8_t po; + if (pidx >= 0) { + pb = (uintptr_t)dst->pools[pidx].base; + po = dst->pools[pidx].pool_order; + } else { + ray_pool_hdr_t* phdr = ray_pool_of(blk); + if (!phdr) continue; + pb = (uintptr_t)phdr; + po = phdr->pool_order; + } + heap_coalesce(dst, blk, pb, po); + } + } + + /* Free foreign blocks via coalescing */ + ray_t* fblk = src->foreign; + while (fblk) { + ray_t* next = fblk->fl_next; + int pidx = heap_find_pool(dst, fblk); + uintptr_t pb; + uint8_t po; + if (pidx >= 0) { + pb = (uintptr_t)dst->pools[pidx].base; + po = dst->pools[pidx].pool_order; + } else { + ray_pool_hdr_t* phdr = ray_pool_of(fblk); + if (!phdr) { fblk = next; continue; } + pb = (uintptr_t)phdr; + po = phdr->pool_order; + } + heap_coalesce(dst, fblk, pb, po); + fblk = next; + } + src->foreign = NULL; + + /* Merge freelists: circular list splice (src chain into dst chain) */ + for (int i = RAY_ORDER_MIN; i < RAY_HEAP_FL_SIZE; i++) { + if (fl_empty(&src->freelist[i])) continue; + + ray_fl_head_t* src_head = &src->freelist[i]; + ray_fl_head_t* dst_head = &dst->freelist[i]; + + /* Splice: src's chain [src_first...src_last] into dst after sentinel */ + ray_t* src_first = src_head->fl_next; + ray_t* src_last = src_head->fl_prev; + ray_t* dst_first = dst_head->fl_next; + + /* src_first goes after dst sentinel */ + dst_head->fl_next = src_first; + src_first->fl_prev = (ray_t*)dst_head; + + /* src_last connects to old dst_first */ + src_last->fl_next = dst_first; + dst_first->fl_prev = src_last; + + dst->avail |= (1ULL << i); + + /* Reset src sentinel to empty */ + fl_init(src_head); + } + + src->avail = 0; + + /* Update pool headers: set heap_id to dst, transfer pool entries. + * Do NOT rewrite heap_id for pools that can't be tracked — that would + * make coalescing reference a pool not in dst's pool table. */ + for (uint32_t i = 0; i < src->pool_count; i++) { + if (dst->pool_count < RAY_MAX_POOLS) { + ray_pool_hdr_t* hdr = (ray_pool_hdr_t*)src->pools[i].base; + hdr->heap_id = dst->id; + dst->pools[dst->pool_count++] = src->pools[i]; + } else { + /* Pool overflow: only triggers at RAY_MAX_POOLS (512 pools = 16GB+). + * Fix ownership so blocks free to the correct heap. */ + ray_pool_hdr_t* hdr = (ray_pool_hdr_t*)src->pools[i].base; + hdr->heap_id = dst->id; + assert(0 && "ray_heap_merge: pool overflow at RAY_MAX_POOLS"); + } + } + src->pool_count = 0; +} + +/* -------------------------------------------------------------------------- + * Public foreign-blocks flush + * -------------------------------------------------------------------------- */ + +void ray_heap_flush_foreign(void) { + ray_heap_t* h = ray_tl_heap; + if (!h) return; + bool safe = (atomic_load_explicit(&ray_parallel_flag, + memory_order_relaxed) == 0); + heap_flush_foreign(h, safe); +} + +/* -------------------------------------------------------------------------- + * Pending-merge queue (lock-free LIFO) + * + * Workers that are torn down push their heap onto this queue instead of + * destroying it immediately. The main thread drains the queue, merging + * each pending heap into its own and then destroying it. + * -------------------------------------------------------------------------- */ + +void ray_heap_push_pending(ray_heap_t* heap) { + if (!heap) return; + /* Unregister so no new foreign blocks target this heap */ + ray_heap_registry[heap->id % RAY_HEAP_REGISTRY_SIZE] = NULL; + /* Lock-free push: CAS loop on global LIFO head */ + heap->pending_next = atomic_load_explicit(&ray_heap_pending_merge, memory_order_relaxed); + while (!atomic_compare_exchange_weak_explicit( + &ray_heap_pending_merge, + &heap->pending_next, heap, + memory_order_release, memory_order_relaxed)) + ; +} + +void ray_heap_drain_pending(void) { + /* Atomically steal the entire pending list */ + ray_heap_t* pending = atomic_exchange_explicit( + &ray_heap_pending_merge, NULL, + memory_order_acquire); + while (pending) { + ray_heap_t* next = pending->pending_next; + ray_heap_merge(pending); + /* Free the heap struct (pools already transferred by merge) */ + uint16_t saved_id = pending->id; + size_t heap_sz = (sizeof(ray_heap_t) + 4095) & ~(size_t)4095; + ray_vm_free(pending, heap_sz); + heap_id_release(saved_id); + pending = next; + } +} + +/* -------------------------------------------------------------------------- + * Scratch arena: bump allocator backed by buddy-allocated 64KB blocks + * -------------------------------------------------------------------------- */ + +void* ray_scratch_arena_push(ray_scratch_arena_t* a, size_t nbytes) { + /* 16-byte alignment */ + nbytes = (nbytes + 15) & ~(size_t)15; + + if (RAY_LIKELY(a->ptr != NULL && a->ptr + nbytes <= a->end)) + goto bump; + + /* Need a new backing block */ + if (a->n_backing >= RAY_ARENA_MAX_BACKING) return NULL; + + size_t block_data = BSIZEOF(RAY_ARENA_BLOCK_ORDER) - 32; + /* If request exceeds standard block, allocate exact-fit */ + size_t alloc_size = nbytes > block_data ? nbytes : block_data; + ray_t* blk = ray_alloc(alloc_size); + if (!blk) return NULL; + a->backing[a->n_backing++] = blk; + a->ptr = (char*)ray_data(blk); + a->end = (char*)blk + BSIZEOF(blk->order); + +bump:; + void* ret = a->ptr; + a->ptr += nbytes; + return ret; +} + +void ray_scratch_arena_reset(ray_scratch_arena_t* a) { + for (int i = 0; i < a->n_backing; i++) + ray_free(a->backing[i]); + a->n_backing = 0; + a->ptr = NULL; + a->end = NULL; +} + +/* -------------------------------------------------------------------------- + * Parallel begin / end + * -------------------------------------------------------------------------- */ + +void ray_parallel_begin(void) { atomic_store(&ray_parallel_flag, 1); } +void ray_parallel_end(void) { + atomic_store(&ray_parallel_flag, 0); + ray_heap_gc(); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/mem/heap.h b/crates/rayforce-sys/vendor/rayforce/src/mem/heap.h new file mode 100644 index 0000000..093f3c4 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/mem/heap.h @@ -0,0 +1,404 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_HEAP_H +#define RAY_HEAP_H + +/* + * heap.h -- Rayforce-style per-thread heap allocator (zero-prefix layout). + * + * Each thread owns one ray_heap_t. Blocks are allocated from self-aligned + * mmap'd pools via buddy splitting. ray_t IS the block — no prefix. + * + * Pool metadata (heap_id, pool_order) is stored in a pool header at + * offset 0 of each self-aligned pool (first min-block reserved). + * Pool base is derived in O(1): ptr & ~(pool_size - 1). + * + * Free-list prev/next overlay nullmap bytes 0-15 of ray_t (unused when free). + * rc == 0 indicates a free block (replaces the old ray_blk_t.used flag). + * + * Cross-thread free uses a foreign_blocks list (checked via pool heap_id). + */ + +#include +#include "core/platform.h" +#include "ops/ops.h" +#include + +/* ===== Attribute Flags ===== + * + * The `attrs` byte in ray_t is type-namespaced: the same bit positions carry + * different meanings depending on the object's type tag. + * + * Bits 0x01-0x03 RAY_SYM vectors: sym index width (RAY_SYM_W8/W16/W32/W64) + * Bits 0x01-0x10 function objects (RAY_UNARY/BINARY/VARY): RAY_FN_* flags + * Bit 0x04 -RAY_I64 atoms: RAY_ATTR_HNSW (HNSW handle in .i64) + * Bit 0x08 vectors: RAY_ATTR_HAS_INDEX (index ray_t* in nullmap[0..7]) + * Bit 0x10 vectors: RAY_ATTR_SLICE + * Bit 0x20 vectors: RAY_ATTR_NULLMAP_EXT + * Bit 0x20 -RAY_SYM: RAY_ATTR_NAME (variable reference) + * Bit 0x40 vectors: RAY_ATTR_HAS_NULLS + * Bit 0x80 all types: RAY_ATTR_ARENA (arena-allocated, no refcount) + * + * Overlapping bit values are safe because consumers always check the type tag + * before interpreting attrs. + */ + +#ifndef RAY_ATTR_SLICE +#define RAY_ATTR_SLICE 0x10 +#endif +#define RAY_ATTR_NULLMAP_EXT 0x20 +#define RAY_ATTR_HAS_NULLS 0x40 +#define RAY_ATTR_ARENA 0x80 + +/* I64 atom carries an owning ray_hnsw_t* in its .i64 slot. + * Checked by HNSW builtins before dereferencing. User must (hnsw-free h). */ +#define RAY_ATTR_HNSW 0x04 + +/* Vector is a linked column. The 8 bytes of the nullmap union at offset + * 8 (i.e. parent->_idx_pad / parent->slice_offset / parent->sym_dict / + * parent->str_pool slot, depending on which arm is in use) hold an int64 + * sym ID naming the target table. Resolved against the global env at + * deref time. Restricted to RAY_I32 / RAY_I64 vectors — STR/SYM/SLICE + * already use bytes 8-15 for their own pointers/data so HAS_LINK on + * those types would alias. + * + * Coexists with HAS_INDEX: bytes 0-7 carry the index pointer (or saved + * nullmap), bytes 8-15 carry the link sym; both bits can be set on the + * same column. A linked vec with nulls is forced to RAY_ATTR_NULLMAP_EXT + * because the inline 128-bit bitmap would alias the link-target slot. + * + * Same numeric value as RAY_ATTR_HNSW (HNSW handles are -RAY_I64 atoms, + * the type tag disambiguates). */ +#define RAY_ATTR_HAS_LINK 0x04 + +/* Vector carries an attached accelerator index in nullmap[0..7] (a ray_t* + * of type RAY_INDEX). The original 16-byte nullmap union content (inline + * bitmap, ext_nullmap, str_ext_null/str_pool, sym_dict) is preserved inside + * the index ray_t and restored on detach. + * + * Attribute-bit invariant when HAS_INDEX is set: + * - HAS_NULLS is *preserved* (not cleared). Many call sites use it as a + * cheap "do I need null-aware logic?" gate; clearing it would silently + * break correctness for nullable columns. The bit is authoritative. + * - NULLMAP_EXT is *cleared*. The parent's ext_nullmap field is now the + * index pointer, not a U8 bitmap vec; readers that gate on NULLMAP_EXT + * and dereference ext_nullmap directly would otherwise read garbage. + * The displaced ext-nullmap pointer (if any) lives in + * ix->saved_nullmap[0..7]; ix->saved_attrs records the original + * NULLMAP_EXT bit for restoration on detach. + * + * Direct nullmap-byte readers (morsel iteration, ray_vec_is_null) MUST + * check HAS_INDEX first and route through ix->saved_nullmap / saved_attrs. + * See src/ops/idxop.h. */ +#define RAY_ATTR_HAS_INDEX 0x08 + +/* ===== Internal Allocator Variants ===== */ + +ray_t* ray_alloc_copy(ray_t* v); +ray_t* ray_scratch_alloc(size_t data_size); +ray_t* ray_scratch_realloc(ray_t* v, size_t new_data_size); + +/* ===== COW (Copy-on-Write) ===== */ + +ray_t* ray_cow(ray_t* v); + +/* ===== Memory Statistics ===== */ + +typedef struct { + size_t alloc_count; /* ray_alloc calls */ + size_t free_count; /* ray_free calls */ + size_t bytes_allocated; /* currently allocated */ + size_t peak_bytes; /* high-water mark */ + size_t slab_hits; /* slab cache hits */ + size_t direct_count; /* active direct mmaps */ + size_t direct_bytes; /* bytes in direct mmaps */ + size_t sys_current; /* sys allocator: current mmap'd bytes */ + size_t sys_peak; /* sys allocator: peak mmap'd bytes */ +} ray_mem_stats_t; + +/* ===== Forward Declarations (internal types) ===== */ + +typedef struct ray_heap ray_heap_t; +typedef struct ray_sym_table ray_sym_table_t; +typedef struct ray_sym_map ray_sym_map_t; +typedef struct ray_task ray_task_t; +typedef struct ray_dispatch ray_dispatch_t; + +/* ===== Heap Lifecycle ===== */ + +void ray_heap_init(void); +void ray_heap_destroy(void); +void ray_heap_merge(ray_heap_t* src); +void ray_heap_flush_foreign(void); +void ray_heap_push_pending(ray_heap_t* heap); +void ray_heap_drain_pending(void); +uint8_t ray_order_for_size(size_t data_size); +void ray_mem_stats(ray_mem_stats_t* out); + +void ray_heap_gc(void); +void ray_heap_release_pages(void); + +/* -------------------------------------------------------------------------- + * Constants + * -------------------------------------------------------------------------- */ +#define RAY_HEAP_POOL_ORDER 25 /* 32 MB standard pool */ +#define RAY_HEAP_MAX_ORDER 38 /* 256 GB max pool */ +#define RAY_HEAP_FL_SIZE (RAY_HEAP_MAX_ORDER + 1) +#define RAY_MAX_POOLS 512 + +/* -------------------------------------------------------------------------- + * Block size helper + * -------------------------------------------------------------------------- */ +#define BSIZEOF(o) ((size_t)1 << (o)) + +/* -------------------------------------------------------------------------- + * Pool header: first min-block (64B) of each self-aligned pool. + * + * Overlaid on bytes 0-15 of the ray_t at pool offset 0. + * The ray_t at pool offset 0 has rc=1 (prevents coalescing) and + * order=RAY_ORDER_MIN (correct for buddy math). + * -------------------------------------------------------------------------- */ +typedef struct { + uint16_t heap_id; /* owning heap ID (for cross-thread free) */ + uint8_t pool_order; /* pool's top order */ + uint8_t _pad[5]; + void* vm_base; /* original mmap base (for ray_vm_free on Windows) */ +} ray_pool_hdr_t; + +_Static_assert(sizeof(ray_pool_hdr_t) <= 16, + "ray_pool_hdr_t must fit in ray_t nullmap (16 bytes)"); + +/* -------------------------------------------------------------------------- + * Circular sentinel freelist (Rayforce-style) + * + * Each freelist[order] is a sentinel node with prev/next pointers at + * offsets 0/8 — same layout as ray_t.fl_prev/fl_next. This makes + * fl_remove() work without knowing which freelist the block belongs to, + * enabling safe cross-heap buddy coalescing. + * + * Empty list: sentinel.prev = sentinel.next = &sentinel. + * -------------------------------------------------------------------------- */ +typedef struct RAY_ALIGN(32) { + ray_t* fl_prev; /* offset 0 — same as ray_t.fl_prev */ + ray_t* fl_next; /* offset 8 — same as ray_t.fl_next */ +} ray_fl_head_t; + +static inline void fl_init(ray_fl_head_t* h) { + h->fl_prev = (ray_t*)h; + h->fl_next = (ray_t*)h; +} + +static inline bool fl_empty(const ray_fl_head_t* h) { + return h->fl_next == (const ray_t*)h; +} + +/* Unlink a block from whatever circular list it belongs to. + * Works across heaps — no head pointer needed. */ +static inline void fl_remove(ray_t* blk) { + blk->fl_prev->fl_next = blk->fl_next; + blk->fl_next->fl_prev = blk->fl_prev; +} + +/* -------------------------------------------------------------------------- + * Pool tracking entry (in ray_heap_t) + * + * Pools are normally backed by anonymous mmap. When anon mmap fails (the + * OS refuses an N-byte allocation because RAM+swap can't satisfy it), the + * allocator falls back to a file-backed mmap pointed at a tempfile in the + * heap's swap directory — this lets fresh allocations exceed RAM, with + * dirty pages flushed to disk by the kernel. + * + * backed=0: anonymous mmap (the common case). swap_fd unused, + * swap_path NULL. + * backed=1: file-backed mmap. swap_fd holds the open fd and + * swap_path holds the absolute path; teardown closes the + * fd, unlinks the file, and ray_sys_frees the path string. + * -------------------------------------------------------------------------- */ +typedef struct { + void* base; /* pool base address (self-aligned) */ + char* swap_path; /* tempfile path when backed=1; NULL otherwise (ray_sys_alloc'd) */ + int swap_fd; /* fd when backed=1; -1 otherwise */ + uint8_t pool_order; /* pool order for munmap sizing */ + uint8_t backed; /* 0 = anon mmap, 1 = file-backed swap */ + uint8_t _pad[2]; +} ray_pool_entry_t; + +/* -------------------------------------------------------------------------- + * Pool derivation helpers + * + * ray_pool_of: derive pool header from any block pointer. + * + * All pools are self-aligned (pool base = multiple of pool_size). Standard + * pools (32 MB) are derived in O(1) via a single AND mask. Oversized pools + * (> 32 MB) use a downward walk at 32 MB stride to find the pool header. + * + * Pool header validation: order == RAY_ORDER_MIN, mmod == 0, rc == 1. + * These conditions uniquely identify pool header blocks — cascade/split + * blocks always have order > RAY_ORDER_MIN. + * -------------------------------------------------------------------------- */ + +static inline ray_pool_hdr_t* ray_pool_of(ray_t* v) { + /* Standard pools (32 MB, self-aligned): one AND gives the base. + * Oversized pools need a downward walk but are rare. */ + uintptr_t stride = BSIZEOF(RAY_HEAP_POOL_ORDER); /* 32 MB */ + uintptr_t base = (uintptr_t)v & ~(stride - 1); + ray_pool_hdr_t* hdr = (ray_pool_hdr_t*)base; + + /* Fast path: standard pool header at 32 MB boundary (99%+ of calls) */ + if (RAY_LIKELY(hdr->pool_order == RAY_HEAP_POOL_ORDER)) + return hdr; + + /* Slow path: oversized pool — walk downward at 32 MB stride */ + if (hdr->pool_order > RAY_HEAP_POOL_ORDER && + hdr->pool_order <= RAY_HEAP_MAX_ORDER && + (uintptr_t)v < base + BSIZEOF(hdr->pool_order)) + return hdr; + + for (;;) { + if (base < stride) break; + base -= stride; + hdr = (ray_pool_hdr_t*)base; + ray_t* hdr_blk = (ray_t*)base; + if (hdr_blk->order == RAY_ORDER_MIN && + hdr_blk->mmod == 0 && + ray_atomic_load(&hdr_blk->rc) == 1) { + if (hdr->pool_order >= RAY_HEAP_POOL_ORDER && + hdr->pool_order <= RAY_HEAP_MAX_ORDER && + (uintptr_t)v < base + BSIZEOF(hdr->pool_order)) + return hdr; + } + } + ray_pool_hdr_t* fallback = (ray_pool_hdr_t*)((uintptr_t)v & ~(stride - 1)); + if (fallback->pool_order >= RAY_HEAP_POOL_ORDER && + fallback->pool_order <= RAY_HEAP_MAX_ORDER) + return fallback; + return NULL; +} + +/* -------------------------------------------------------------------------- + * Buddy derivation: uses self-aligned pool base + * -------------------------------------------------------------------------- */ +static inline ray_t* ray_buddy_of(ray_t* v, uint8_t order, uintptr_t pool_base) { + return (ray_t*)(pool_base + (((uintptr_t)v - pool_base) ^ BSIZEOF(order))); +} + +/* -------------------------------------------------------------------------- + * Slab cache for small blocks (orders 6-10, i.e., 64B-1024B) + * -------------------------------------------------------------------------- */ +typedef struct { + uint32_t count; + ray_t* stack[RAY_SLAB_CACHE_SIZE]; +} ray_slab_t; + +#define RAY_SLAB_MIN RAY_ORDER_MIN +#define RAY_SLAB_MAX (RAY_ORDER_MIN + RAY_SLAB_ORDERS - 1) +#define IS_SLAB_ORDER(o) ((o) >= RAY_SLAB_MIN && (o) <= RAY_SLAB_MAX) +#define SLAB_INDEX(o) ((o) - RAY_SLAB_MIN) + +/* -------------------------------------------------------------------------- + * Per-thread heap + * -------------------------------------------------------------------------- */ +typedef struct ray_heap { + uint64_t avail; /* bitmask: bit N set = freelist[N] non-empty */ + uint16_t id; /* heap identity (for cross-thread free) */ + ray_t* foreign; /* cross-heap freed blocks (lock-free LIFO via fl_next) */ + ray_slab_t slabs[RAY_SLAB_ORDERS]; /* small-block slab caches */ + ray_fl_head_t freelist[RAY_HEAP_FL_SIZE]; /* circular sentinel per order */ + ray_mem_stats_t stats; + uint32_t pool_count; /* number of tracked pools */ + ray_pool_entry_t pools[RAY_MAX_POOLS]; /* pool tracking for destroy/merge */ + struct ray_heap* pending_next; /* link for pending-merge LIFO queue */ + char swap_path[256]; /* dir for file-backed pool fallback (RAY_HEAP_SWAP env, default "./") */ +} ray_heap_t; + +/* -------------------------------------------------------------------------- + * Bitmap-based heap ID allocator (atomic CAS, reusable IDs) + * -------------------------------------------------------------------------- */ +#define RAY_HEAP_ID_WORDS 16 /* 16 * 64 = 1024 IDs (matches registry size) */ +#define RAY_HEAP_ID_BITS (RAY_HEAP_ID_WORDS * 64) + +/* Global pending-merge queue head (lock-free LIFO) */ +extern _Atomic(ray_heap_t*) ray_heap_pending_merge; + +/* -------------------------------------------------------------------------- + * Pool-list scan: find which pool a block belongs to without reading the + * remote pool header (avoids cold cache line 32MB away on hot path). + * Returns pool index in h->pools[], or -1 if block is foreign. + * -------------------------------------------------------------------------- */ +static inline int heap_find_pool(const ray_heap_t* h, const void* ptr) { + uintptr_t addr = (uintptr_t)ptr; + for (uint32_t i = 0; i < h->pool_count; i++) { + uintptr_t pb = (uintptr_t)h->pools[i].base; + if (addr >= pb && addr < pb + BSIZEOF(h->pools[i].pool_order)) + return (int)i; + } + return -1; +} + +/* -------------------------------------------------------------------------- + * Thread-local state + * -------------------------------------------------------------------------- */ +extern RAY_TLS ray_heap_t* ray_tl_heap; + +/* -------------------------------------------------------------------------- + * Global heap registry: look up any heap by ID so foreign blocks can be + * returned to their owning heap instead of accumulating on the freeing heap. + * -------------------------------------------------------------------------- */ +#define RAY_HEAP_REGISTRY_SIZE 1024 +extern ray_heap_t* ray_heap_registry[RAY_HEAP_REGISTRY_SIZE]; + +/* -------------------------------------------------------------------------- + * Scratch arena: bump-allocator backed by buddy-allocated pages. + * O(1) push (pointer bump), O(n_backing) reset (free all backing blocks). + * -------------------------------------------------------------------------- */ +#define RAY_ARENA_MAX_BACKING 64 +#define RAY_ARENA_BLOCK_ORDER 16 /* 64 KB backing blocks */ + +typedef struct { + ray_t* backing[RAY_ARENA_MAX_BACKING]; + int n_backing; + char* ptr; + char* end; +} ray_scratch_arena_t; + +static inline void ray_scratch_arena_init(ray_scratch_arena_t* a) { + a->n_backing = 0; + a->ptr = NULL; + a->end = NULL; +} + +/* Retain all child/owned refs inside a compound block (STR/LIST/TABLE/etc.). + * Used by ray_block_copy and ray_alloc_copy after shallow-copying a block. + * + * Returns true on success, false if a deep-clone of a uniquely-owned + * resource (e.g. an HNSW index) failed. On failure, any owned state that + * was memcpy'd into the copy has been neutralized (attr flags cleared, + * pointers zeroed) so the caller may safely ray_free(v) without leaks or + * double-frees. */ +bool ray_retain_owned_refs(ray_t* v); + +void* ray_scratch_arena_push(ray_scratch_arena_t* a, size_t nbytes); +void ray_scratch_arena_reset(ray_scratch_arena_t* a); + +#endif /* RAY_HEAP_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/mem/sys.c b/crates/rayforce-sys/vendor/rayforce/src/mem/sys.c new file mode 100644 index 0000000..4d0f34d --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/mem/sys.c @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "sys.h" +#include "core/platform.h" +#include +#include +#include + +/* 32-byte header prepended to every sys allocation. + * mmap returns page-aligned addresses; data at page+32 is 32-byte aligned, + * satisfying RAY_BLOCK_ALIGN for the weak ray_alloc stub. */ +#define SYS_HDR_SIZE 32 + +typedef struct { + size_t map_size; /* total mmap'd bytes (header + user, page-rounded) */ + size_t usr_size; /* user-requested bytes (for realloc memcpy) */ + /* Padding sized so the struct totals SYS_HDR_SIZE on both 32-bit + * (WASM, size_t=4 → pad=24) and 64-bit (Linux/macOS, size_t=8 → pad=16). */ + char _pad[SYS_HDR_SIZE - 2 * sizeof(size_t)]; +} sys_hdr_t; + +_Static_assert(sizeof(sys_hdr_t) == SYS_HDR_SIZE, "sys_hdr_t must be 32 bytes"); + +static _Atomic(int64_t) g_sys_current = 0; +static _Atomic(int64_t) g_sys_peak = 0; + +static inline size_t page_round(size_t n) { + return (n + 4095) & ~(size_t)4095; +} + +void* ray_sys_alloc(size_t size) { + if (size == 0) size = 1; + if (size > SIZE_MAX - SYS_HDR_SIZE) return NULL; + size_t total = page_round(SYS_HDR_SIZE + size); + void* p = ray_vm_alloc(total); + if (!p) return NULL; + + sys_hdr_t* hdr = (sys_hdr_t*)p; + hdr->map_size = total; + hdr->usr_size = size; + + int64_t cur = atomic_fetch_add_explicit(&g_sys_current, (int64_t)total, + memory_order_relaxed) + (int64_t)total; + int64_t pk = atomic_load_explicit(&g_sys_peak, memory_order_relaxed); + while (cur > pk) { + if (atomic_compare_exchange_weak_explicit(&g_sys_peak, &pk, cur, + memory_order_relaxed, + memory_order_relaxed)) + break; + } + + return (char*)p + SYS_HDR_SIZE; +} + +void ray_sys_free(void* ptr) { + if (!ptr) return; + sys_hdr_t* hdr = (sys_hdr_t*)((char*)ptr - SYS_HDR_SIZE); + size_t total = hdr->map_size; + ray_vm_free(hdr, total); + atomic_fetch_sub_explicit(&g_sys_current, (int64_t)total, + memory_order_relaxed); +} + +/* L5: ray_sys_realloc(ptr, 0) frees ptr and returns NULL, matching the + * behavior of some realloc implementations. Callers should not rely on + * this as a general-purpose free — use ray_sys_free() explicitly. */ +void* ray_sys_realloc(void* ptr, size_t new_size) { + if (!ptr) return ray_sys_alloc(new_size); + if (new_size == 0) { ray_sys_free(ptr); return NULL; } + if (new_size > SIZE_MAX - SYS_HDR_SIZE) return NULL; + + sys_hdr_t* old_hdr = (sys_hdr_t*)((char*)ptr - SYS_HDR_SIZE); + size_t old_usr = old_hdr->usr_size; + size_t new_total = page_round(SYS_HDR_SIZE + new_size); + + /* Same page count — just update user size */ + if (new_total == old_hdr->map_size) { + old_hdr->usr_size = new_size; + return ptr; + } + + void* new_ptr = ray_sys_alloc(new_size); + if (!new_ptr) return NULL; + memcpy(new_ptr, ptr, old_usr < new_size ? old_usr : new_size); + ray_sys_free(ptr); + return new_ptr; +} + +char* ray_sys_strdup(const char* s) { + if (!s) return NULL; + size_t len = strlen(s); + char* dup = (char*)ray_sys_alloc(len + 1); + if (!dup) return NULL; + memcpy(dup, s, len + 1); + return dup; +} + +void ray_sys_get_stat(int64_t* out_current, int64_t* out_peak) { + *out_current = atomic_load_explicit(&g_sys_current, memory_order_relaxed); + *out_peak = atomic_load_explicit(&g_sys_peak, memory_order_relaxed); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/mem/sys.h b/crates/rayforce-sys/vendor/rayforce/src/mem/sys.h new file mode 100644 index 0000000..eb53154 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/mem/sys.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_MEM_SYS_H +#define RAY_MEM_SYS_H + +#include +#include + +/* -------------------------------------------------------------------------- + * System-level mmap allocator for infrastructure that can't use the buddy + * allocator (cross-thread lifetime, bootstrap, global state). + * + * Every allocation is tracked. ray_mem_stats() reports the totals so users + * can see the full memory footprint. + * + * Each allocation prepends a 32-byte header (stores mmap size + user size), + * so ray_sys_free() needs no size argument. + * -------------------------------------------------------------------------- */ + +void* ray_sys_alloc(size_t size); +void* ray_sys_realloc(void* ptr, size_t new_size); +void ray_sys_free(void* ptr); +char* ray_sys_strdup(const char* s); + +/* Read current sys allocator counters (called by ray_mem_stats in arena.c) */ +void ray_sys_get_stat(int64_t* out_current, int64_t* out_peak); + +#endif /* RAY_MEM_SYS_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/agg.c b/crates/rayforce-sys/vendor/rayforce/src/ops/agg.c new file mode 100644 index 0000000..c8b5fa3 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/agg.c @@ -0,0 +1,509 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "lang/internal.h" +#include "ops/ops.h" +#include "mem/heap.h" + +#include /* qsort (introselect fallback) */ + +static int dbl_cmp(const void* a, const void* b) { + double da = *(const double*)a, db = *(const double*)b; + return (da > db) - (da < db); +} + +/* Partition vals[lo..hi] so that vals[k] holds the kth-smallest element, + * with everything to the left ≤ and everything to the right ≥. Average + * O(n) (Hoare quickselect with median-of-three), worst-case O(n log n) + * via qsort fallback when recursion exceeds 2*log2(range). Mirrors + * std::nth_element's contract; DuckDB's quantile path uses the same + * pattern (extension/core_functions/aggregate/holistic/quantile.cpp, + * quantile_sort_tree.hpp:191-195). */ +static void nth_element_dbl(double* a, int64_t lo, int64_t hi, int64_t k) { + int depth_limit = 0; + for (int64_t r = hi - lo + 1; r > 0; r >>= 1) depth_limit++; + depth_limit *= 2; + while (hi - lo > 16) { + if (depth_limit-- <= 0) { + qsort(a + lo, (size_t)(hi - lo + 1), sizeof(double), dbl_cmp); + return; + } + int64_t mid = lo + ((hi - lo) >> 1); + if (a[lo] > a[mid]) { double t = a[lo]; a[lo] = a[mid]; a[mid] = t; } + if (a[lo] > a[hi]) { double t = a[lo]; a[lo] = a[hi]; a[hi] = t; } + if (a[mid] > a[hi]) { double t = a[mid]; a[mid] = a[hi]; a[hi] = t; } + /* Park pivot at hi-1; partition (lo, hi-1) with sentinels at both ends. */ + { double t = a[mid]; a[mid] = a[hi - 1]; a[hi - 1] = t; } + double pivot = a[hi - 1]; + int64_t i = lo, j = hi - 1; + for (;;) { + while (a[++i] < pivot) {} + while (a[--j] > pivot) {} + if (i >= j) break; + double t = a[i]; a[i] = a[j]; a[j] = t; + } + /* Restore pivot to its final resting position i. */ + { double t = a[i]; a[i] = a[hi - 1]; a[hi - 1] = t; } + if (k < i) hi = i - 1; + else if (k > i) lo = i + 1; + else return; + } + /* Small range: insertion sort the slice covers vals[lo..hi]. */ + for (int64_t i = lo + 1; i <= hi; i++) { + double key = a[i]; + int64_t j = i - 1; + while (j >= lo && a[j] > key) { a[j + 1] = a[j]; j--; } + a[j + 1] = key; + } +} + +/* ══════════════════════════════════════════ + * Aggregation builtins + * ══════════════════════════════════════════ */ + +/* Build a one-op DAG over a single input vector and execute it. */ +#define AGG_VEC_VIA_DAG(x, ctor) do { \ + ray_graph_t* g = ray_graph_new(NULL); \ + if (!g) return ray_error("oom", NULL); \ + ray_op_t* in = ray_graph_input_vec(g, x); \ + ray_op_t* op = ctor(g, in); \ + return ray_lazy_materialize(ray_lazy_wrap(g, op)); \ +} while(0) + +/* DAG executor returns I64 for all integer types — cast back to original. */ +static ray_t* recast_i64_to_orig(ray_t* r, int8_t orig_type) { + if (!r || RAY_IS_ERR(r)) return r; + if (ray_is_atom(r) && r->type == -RAY_I64 && orig_type != RAY_I64 && orig_type != RAY_F64) { + int64_t v = r->i64; + ray_release(r); + if (orig_type == RAY_DATE) return ray_date((int32_t)v); + if (orig_type == RAY_TIME) return ray_time(v); + if (orig_type == RAY_TIMESTAMP) return ray_timestamp(v); + if (orig_type == RAY_I32) return make_i32((int32_t)v); + if (orig_type == RAY_I16) return make_i16((int16_t)v); + if (orig_type == RAY_U8) return make_u8((uint8_t)v); + } + return r; +} + +ray_t* ray_sum_fn(ray_t* x) { + if (ray_is_lazy(x)) return ray_lazy_append(x, OP_SUM); + if (ray_is_atom(x)) { + /* u8/i16 scalar sum promotes to i64 */ + if (x->type == -RAY_U8) return make_i64((int64_t)x->u8); + if (x->type == -RAY_I16) return make_i64((int64_t)x->i16); + ray_retain(x); return x; + } + if (ray_is_vec(x)) { + if (x->type == RAY_DATE) return ray_error("type", NULL); + /* Narrow/temporal types need specific return constructors that the + * DAG executor doesn't provide — use scalar path for these. */ + if (x->type == RAY_I32 || x->type == RAY_I16 || x->type == RAY_U8 || + x->type == RAY_TIME || x->type == RAY_TIMESTAMP) { + int64_t n = x->len; + bool has_nulls = (x->attrs & RAY_ATTR_HAS_NULLS) != 0; + int64_t sum = 0; + if (x->type == RAY_I32) { + int32_t* d = (int32_t*)ray_data(x); + if (has_nulls) { for (int64_t i = 0; i < n; i++) if (!ray_vec_is_null(x, i)) sum += d[i]; } + else { for (int64_t i = 0; i < n; i++) sum += d[i]; } + return make_i64(sum); + } else if (x->type == RAY_I16) { + int16_t* d = (int16_t*)ray_data(x); + if (has_nulls) { for (int64_t i = 0; i < n; i++) if (!ray_vec_is_null(x, i)) sum += d[i]; } + else { for (int64_t i = 0; i < n; i++) sum += d[i]; } + return make_i64(sum); + } else if (x->type == RAY_U8) { + uint8_t* d = (uint8_t*)ray_data(x); + if (has_nulls) { for (int64_t i = 0; i < n; i++) if (!ray_vec_is_null(x, i)) sum += d[i]; } + else { for (int64_t i = 0; i < n; i++) sum += d[i]; } + return make_i64(sum); + } else if (x->type == RAY_TIME) { + int32_t* d = (int32_t*)ray_data(x); + if (has_nulls) { for (int64_t i = 0; i < n; i++) if (!ray_vec_is_null(x, i)) sum += d[i]; } + else { for (int64_t i = 0; i < n; i++) sum += d[i]; } + return ray_time(sum); + } else { + int64_t* d = (int64_t*)ray_data(x); + if (has_nulls) { for (int64_t i = 0; i < n; i++) if (!ray_vec_is_null(x, i)) sum += d[i]; } + else { for (int64_t i = 0; i < n; i++) sum += d[i]; } + return ray_timestamp(sum); + } + } + /* I64/F64: parallel morsel-driven reduction via DAG executor */ + AGG_VEC_VIA_DAG(x, ray_sum); + } + if (!is_list(x)) return ray_error("type", NULL); + int64_t len = ray_len(x); + if (len == 0) return make_i64(0); + ray_t** elems = (ray_t**)ray_data(x); + int has_float = 0; + double fsum = 0.0; + int64_t isum = 0; + for (int64_t i = 0; i < len; i++) { + if (!is_numeric(elems[i])) return ray_error("type", NULL); + if (RAY_ATOM_IS_NULL(elems[i])) { + if (elems[i]->type == -RAY_F64) has_float = 1; + continue; + } + if (elems[i]->type == -RAY_F64) { has_float = 1; fsum += elems[i]->f64; } + else if (elems[i]->type == -RAY_I64) { isum += elems[i]->i64; fsum += (double)elems[i]->i64; } + else { int64_t v = (int64_t)as_f64(elems[i]); isum += v; fsum += (double)v; } + } + return has_float ? make_f64(fsum) : make_i64(isum); +} + +ray_t* ray_count_fn(ray_t* x) { + if (ray_is_lazy(x)) return ray_lazy_append(x, OP_COUNT); + if (x->type == RAY_TABLE) return make_i64(ray_table_nrows(x)); + if (x->type == RAY_DICT) return make_i64(ray_dict_len(x)); + /* String atom: count = string length */ + if (ray_is_atom(x) && (-x->type) == RAY_STR) + return make_i64((int64_t)ray_str_len(x)); + if (ray_is_vec(x)) + return make_i64(x->len); /* count = total length including nulls */ + if (!is_list(x)) { + /* Scalar atom → count 1 */ + if (ray_is_atom(x)) return make_i64(1); + return ray_error("type", NULL); + } + return make_i64(ray_len(x)); +} + +ray_t* ray_avg_fn(ray_t* x) { + if (ray_is_lazy(x)) return ray_lazy_append(x, OP_AVG); + if (ray_is_atom(x)) { + if (RAY_ATOM_IS_NULL(x)) return ray_typed_null(-RAY_F64); + if (is_numeric(x)) return make_f64(as_f64(x)); + ray_retain(x); return x; + } + if (ray_is_vec(x)) AGG_VEC_VIA_DAG(x, ray_avg); + if (!is_list(x)) return ray_error("type", NULL); + int64_t len = ray_len(x); + if (len == 0) return ray_error("domain", NULL); + ray_t** elems = (ray_t**)ray_data(x); + double sum = 0.0; + int64_t cnt = 0; + for (int64_t i = 0; i < len; i++) { + if (!is_numeric(elems[i])) return ray_error("type", NULL); + if (RAY_ATOM_IS_NULL(elems[i])) continue; + sum += as_f64(elems[i]); cnt++; + } + if (cnt == 0) return ray_typed_null(-RAY_F64); + return make_f64(sum / (double)cnt); +} + +ray_t* ray_min_fn(ray_t* x) { + if (ray_is_lazy(x)) return ray_lazy_append(x, OP_MIN); + if (ray_is_atom(x)) { ray_retain(x); return x; } + if (ray_is_vec(x)) { + int8_t orig_type = x->type; + ray_graph_t* g = ray_graph_new(NULL); + if (!g) return ray_error("oom", NULL); + ray_op_t* in = ray_graph_input_vec(g, x); + ray_op_t* op = ray_min_op(g, in); + ray_t* r = ray_lazy_materialize(ray_lazy_wrap(g, op)); + return recast_i64_to_orig(r, orig_type); + } + if (!is_list(x)) return ray_error("type", NULL); + int64_t len = ray_len(x); + if (len == 0) return ray_error("domain", NULL); + ray_t** elems = (ray_t**)ray_data(x); + int has_float = 0, found = 0; + double fmin = 0; int64_t imin = 0; + for (int64_t i = 0; i < len; i++) { + if (!is_numeric(elems[i])) return ray_error("type", NULL); + if (elems[i]->type == -RAY_F64) has_float = 1; + if (RAY_ATOM_IS_NULL(elems[i])) continue; + double v = as_f64(elems[i]); + if (!found || v < fmin) { fmin = v; imin = elems[i]->type == -RAY_I64 ? elems[i]->i64 : 0; found = 1; } + } + if (!found) return ray_typed_null(has_float ? -RAY_F64 : -RAY_I64); + return has_float ? make_f64(fmin) : make_i64(imin); +} + +ray_t* ray_max_fn(ray_t* x) { + if (ray_is_lazy(x)) return ray_lazy_append(x, OP_MAX); + if (ray_is_atom(x)) { ray_retain(x); return x; } + if (ray_is_vec(x)) { + int8_t orig_type = x->type; + ray_graph_t* g = ray_graph_new(NULL); + if (!g) return ray_error("oom", NULL); + ray_op_t* in = ray_graph_input_vec(g, x); + ray_op_t* op = ray_max_op(g, in); + ray_t* r = ray_lazy_materialize(ray_lazy_wrap(g, op)); + return recast_i64_to_orig(r, orig_type); + } + if (!is_list(x)) return ray_error("type", NULL); + int64_t len = ray_len(x); + if (len == 0) return ray_error("domain", NULL); + ray_t** elems = (ray_t**)ray_data(x); + int has_float = 0, found = 0; + double fmax = 0; int64_t imax = 0; + for (int64_t i = 0; i < len; i++) { + if (!is_numeric(elems[i])) return ray_error("type", NULL); + if (elems[i]->type == -RAY_F64) has_float = 1; + if (RAY_ATOM_IS_NULL(elems[i])) continue; + double v = as_f64(elems[i]); + if (!found || v > fmax) { fmax = v; imax = elems[i]->type == -RAY_I64 ? elems[i]->i64 : 0; found = 1; } + } + if (!found) return ray_typed_null(has_float ? -RAY_F64 : -RAY_I64); + return has_float ? make_f64(fmax) : make_i64(imax); +} + +ray_t* ray_first_fn(ray_t* x) { + if (ray_is_lazy(x)) return ray_lazy_append(x, OP_FIRST); + /* String first: return first char */ + if (ray_is_atom(x) && (-x->type) == RAY_STR) { + size_t slen = ray_str_len(x); + if (slen == 0) return ray_error("domain", NULL); + const char* p = ray_str_ptr(x); + return ray_str(p, 1); + } + if (ray_is_atom(x)) { ray_retain(x); return x; } + /* Table first: return first row as dict */ + if (x->type == RAY_TABLE) { + if (ray_table_nrows(x) == 0) return ray_error("domain", NULL); + ray_t* idx = make_i64(0); + ray_t* result = ray_at_fn(x, idx); + ray_release(idx); + return result; + } + if (ray_is_vec(x)) { + if (ray_len(x) == 0) return ray_typed_null(-x->type); + /* For non-I64/F64 types route through collection_elem which + * preserves the element type. The DAG path widens to i64 for + * DATE/TIME/TIMESTAMP/BOOL/U8 — bypass it. */ + if (x->type == RAY_SYM || x->type == RAY_I32 || x->type == RAY_I16 || + x->type == RAY_GUID || x->type == RAY_STR || x->type == RAY_BOOL || + x->type == RAY_U8 || x->type == RAY_DATE || x->type == RAY_TIME || + x->type == RAY_TIMESTAMP) { + int alloc = 0; + return collection_elem(x, 0, &alloc); + } + AGG_VEC_VIA_DAG(x, ray_first); + } + if (!is_list(x)) return ray_error("type", NULL); + if (ray_len(x) == 0) return ray_typed_null(-RAY_I64); + ray_t* elem = ((ray_t**)ray_data(x))[0]; + ray_retain(elem); + return elem; +} + +ray_t* ray_last_fn(ray_t* x) { + if (ray_is_lazy(x)) return ray_lazy_append(x, OP_LAST); + /* String last: return last char */ + if (ray_is_atom(x) && (-x->type) == RAY_STR) { + size_t slen = ray_str_len(x); + if (slen == 0) return ray_error("domain", NULL); + const char* p = ray_str_ptr(x); + return ray_str(p + slen - 1, 1); + } + if (ray_is_atom(x)) { ray_retain(x); return x; } + /* Table last: return last row as dict */ + if (x->type == RAY_TABLE) { + int64_t nrows = ray_table_nrows(x); + if (nrows == 0) return ray_error("domain", NULL); + ray_t* idx = make_i64(nrows - 1); + ray_t* result = ray_at_fn(x, idx); + ray_release(idx); + return result; + } + if (ray_is_vec(x)) { + if (ray_len(x) == 0) return ray_typed_null(-x->type); + /* See ray_first_fn for rationale on the type whitelist. */ + if (x->type == RAY_SYM || x->type == RAY_I32 || x->type == RAY_I16 || + x->type == RAY_GUID || x->type == RAY_STR || x->type == RAY_BOOL || + x->type == RAY_U8 || x->type == RAY_DATE || x->type == RAY_TIME || + x->type == RAY_TIMESTAMP) { + int alloc = 0; + return collection_elem(x, ray_len(x) - 1, &alloc); + } + AGG_VEC_VIA_DAG(x, ray_last); + } + if (!is_list(x)) return ray_error("type", NULL); + int64_t len = ray_len(x); + if (len == 0) return ray_typed_null(-RAY_I64); + ray_t* elem = ((ray_t**)ray_data(x))[len - 1]; + ray_retain(elem); + return elem; +} + +/* Helper: copy non-null vec elements to double scratch buffer, compacted. + * scratch->len is set to the number of non-null values copied. + * Returns scratch ray_t* (caller must ray_release), or error. */ +static ray_t* vec_to_f64_scratch(ray_t* x, double** out_vals) { + int64_t len = ray_len(x); + ray_t* scratch = ray_alloc(len * sizeof(double)); + if (!scratch) return ray_error("oom", NULL); + scratch->type = RAY_F64; + double* vals = (double*)ray_data(scratch); + int64_t cnt = 0; + if (x->type == RAY_I64) { + int64_t* d = (int64_t*)ray_data(x); + for (int64_t i = 0; i < len; i++) { if (!ray_vec_is_null(x, i)) vals[cnt++] = (double)d[i]; } + } else if (x->type == RAY_F64) { + double* d = (double*)ray_data(x); + for (int64_t i = 0; i < len; i++) { if (!ray_vec_is_null(x, i)) vals[cnt++] = d[i]; } + } else if (x->type == RAY_I32) { + int32_t* d = (int32_t*)ray_data(x); + for (int64_t i = 0; i < len; i++) { if (!ray_vec_is_null(x, i)) vals[cnt++] = (double)d[i]; } + } else if (x->type == RAY_I16) { + int16_t* d = (int16_t*)ray_data(x); + for (int64_t i = 0; i < len; i++) { if (!ray_vec_is_null(x, i)) vals[cnt++] = (double)d[i]; } + } else if (x->type == RAY_U8) { + uint8_t* d = (uint8_t*)ray_data(x); + for (int64_t i = 0; i < len; i++) { if (!ray_vec_is_null(x, i)) vals[cnt++] = (double)d[i]; } + } else { + ray_release(scratch); + return ray_error("type", NULL); + } + scratch->len = cnt; + *out_vals = vals; + return scratch; +} + +ray_t* ray_med_fn(ray_t* x) { + if (ray_is_lazy(x)) x = ray_lazy_materialize(x); + if (RAY_IS_ERR(x)) return x; + /* Scalar: median of single value → f64 */ + if (ray_is_atom(x)) { + if (RAY_ATOM_IS_NULL(x)) return ray_typed_null(-RAY_F64); + if (is_numeric(x)) return make_f64(as_f64(x)); + return ray_error("type", NULL); + } + int64_t len; + ray_t* scratch = NULL; + double* vals = NULL; + + if (ray_is_vec(x)) { + len = ray_len(x); + if (len == 0) return ray_typed_null(-RAY_F64); + scratch = vec_to_f64_scratch(x, &vals); + if (RAY_IS_ERR(scratch)) return scratch; + } else if (is_list(x)) { + len = ray_len(x); + if (len == 0) return ray_typed_null(-RAY_F64); + ray_t** elems = (ray_t**)ray_data(x); + scratch = ray_alloc(len * sizeof(double)); + if (!scratch) return ray_error("oom", NULL); + scratch->type = RAY_F64; + scratch->len = 0; + vals = (double*)ray_data(scratch); + int64_t cnt_l = 0; + for (int64_t i = 0; i < len; i++) { + if (ray_is_atom(elems[i]) && RAY_ATOM_IS_NULL(elems[i])) continue; + if (!is_numeric(elems[i])) { ray_release(scratch); return ray_error("type", NULL); } + vals[cnt_l++] = as_f64(elems[i]); + } + scratch->len = cnt_l; + } else { + return ray_error("type", NULL); + } + + /* scratch->len holds the count of non-null values (already compacted) */ + int64_t cnt = scratch->len; + if (cnt == 0) { ray_release(scratch); return ray_typed_null(-RAY_F64); } + + /* O(n) average partial-sort. Two-call pattern from DuckDB's + * QuantileInterpolator::Operation (quantile_sort_tree.hpp:191-195): + * for odd n one nth_element places the middle; for even n a second + * nth_element on the right half locates the upper middle. Replaces + * an O(n^2) insertion sort that hung for groups larger than ~10k. */ + int64_t k = cnt / 2; + double median; + if (cnt % 2 == 1) { + nth_element_dbl(vals, 0, cnt - 1, k); + median = vals[k]; + } else { + nth_element_dbl(vals, 0, cnt - 1, k - 1); + nth_element_dbl(vals, k, cnt - 1, k); + median = (vals[k - 1] + vals[k]) / 2.0; + } + ray_release(scratch); + return make_f64(median); +} + +static ray_t* var_stddev_core(ray_t* x, int sample, int take_sqrt); + + +ray_t* ray_dev_fn(ray_t* x) { return var_stddev_core(x, 0, 1); } + +/* Shared core for variance / stddev in sample or population mode. + * sample=1 -> divide sum-of-squares by (n-1); sample=0 -> divide by n. + * take_sqrt=1 -> stddev; take_sqrt=0 -> variance. */ +static ray_t* var_stddev_core(ray_t* x, int sample, int take_sqrt) { + if (ray_is_lazy(x)) x = ray_lazy_materialize(x); + if (RAY_IS_ERR(x)) return x; + if (ray_is_atom(x)) { + if (RAY_ATOM_IS_NULL(x)) return ray_typed_null(-RAY_F64); + if (is_numeric(x)) return sample ? ray_typed_null(-RAY_F64) : make_f64(0.0); + return ray_error("type", NULL); + } + + double* vals = NULL; + ray_t* scratch = NULL; + int64_t cnt = 0; + + if (ray_is_vec(x)) { + if (ray_len(x) == 0) return ray_typed_null(-RAY_F64); + scratch = vec_to_f64_scratch(x, &vals); + if (RAY_IS_ERR(scratch)) return scratch; + cnt = scratch->len; + } else if (is_list(x)) { + int64_t len = ray_len(x); + if (len == 0) return ray_typed_null(-RAY_F64); + ray_t** elems = (ray_t**)ray_data(x); + /* Use a fresh f64 vec as a scratch buffer so we reuse the vec path's cleanup. */ + scratch = ray_vec_new(RAY_F64, len); + if (RAY_IS_ERR(scratch)) return scratch; + vals = (double*)ray_data(scratch); + for (int64_t i = 0; i < len; i++) { + if (!is_numeric(elems[i])) { ray_release(scratch); return ray_error("type", NULL); } + if (!RAY_ATOM_IS_NULL(elems[i])) vals[cnt++] = as_f64(elems[i]); + } + scratch->len = cnt; + } else { + return ray_error("type", NULL); + } + + if (cnt == 0 || (sample && cnt <= 1)) { + ray_release(scratch); + return ray_typed_null(-RAY_F64); + } + + double sum = 0.0; + for (int64_t i = 0; i < cnt; i++) sum += vals[i]; + double mean = sum / (double)cnt; + double sqdiff = 0.0; + for (int64_t i = 0; i < cnt; i++) { double d = vals[i] - mean; sqdiff += d * d; } + ray_release(scratch); + double divisor = sample ? (double)(cnt - 1) : (double)cnt; + double v = sqdiff / divisor; + return make_f64(take_sqrt ? sqrt(v) : v); +} + +ray_t* ray_stddev_fn(ray_t* x) { return var_stddev_core(x, 1, 1); } +ray_t* ray_stddev_pop_fn(ray_t* x) { return var_stddev_core(x, 0, 1); } +ray_t* ray_var_fn(ray_t* x) { return var_stddev_core(x, 1, 0); } +ray_t* ray_var_pop_fn(ray_t* x) { return var_stddev_core(x, 0, 0); } diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/arith.c b/crates/rayforce-sys/vendor/rayforce/src/ops/arith.c new file mode 100644 index 0000000..72d92ba --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/arith.c @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "lang/internal.h" + +/* Arithmetic builtins (atom-only). + * Vector dispatch goes through the DAG executor. */ + +ray_t* ray_add_fn(ray_t* a, ray_t* b) { + /* Vector fast path — only when at least one operand is a typed vector */ + + /* Temporal + integer arithmetic (only int types, not float) */ + if (is_temporal(a) && is_numeric(b) && b->type != -RAY_F64) { + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) + return ray_typed_null(a->type); + + int64_t v = as_i64(b); + if (a->type == -RAY_DATE) return ray_date(a->i64 + v); + if (a->type == -RAY_TIME) return ray_time(a->i64 + v); + if (a->type == -RAY_TIMESTAMP) return ray_timestamp(a->i64 + v); + } + if (is_numeric(a) && a->type != -RAY_F64 && is_temporal(b)) { + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) + return ray_typed_null(b->type); + + int64_t v = as_i64(a); + if (b->type == -RAY_DATE) return ray_date(b->i64 + v); + if (b->type == -RAY_TIME) return ray_time(b->i64 + v); + if (b->type == -RAY_TIMESTAMP) return ray_timestamp(b->i64 + v); + } + /* Reject float + temporal */ + if ((a->type == -RAY_F64 && is_temporal(b)) || (is_temporal(a) && b->type == -RAY_F64)) + return ray_error("type", NULL); + /* Reject null_numeric + temporal (for null floats etc) */ + if (is_numeric(a) && RAY_ATOM_IS_NULL(a) && is_temporal(b)) + return ray_error("type", NULL); + if (is_temporal(a) && is_numeric(b) && RAY_ATOM_IS_NULL(b)) + return ray_error("type", NULL); + /* DATE + TIME → TIMESTAMP */ + if (a->type == -RAY_DATE && b->type == -RAY_TIME) { + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) return ray_typed_null(-RAY_TIMESTAMP); + return ray_timestamp(a->i64 * 86400000000000LL + b->i64 * 1000000LL); + } + if (a->type == -RAY_TIME && b->type == -RAY_DATE) { + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) return ray_typed_null(-RAY_TIMESTAMP); + return ray_timestamp(b->i64 * 86400000000000LL + a->i64 * 1000000LL); + } + /* TIME + TIME → TIME */ + if (a->type == -RAY_TIME && b->type == -RAY_TIME) { + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) return ray_typed_null(-RAY_TIME); + return ray_time(a->i64 + b->i64); + } + /* TIME + TIMESTAMP → TIMESTAMP (add ms as ns) */ + if (a->type == -RAY_TIME && b->type == -RAY_TIMESTAMP) { + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) return ray_typed_null(-RAY_TIMESTAMP); + return ray_timestamp(b->i64 + a->i64 * 1000000LL); + } + if (a->type == -RAY_TIMESTAMP && b->type == -RAY_TIME) { + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) return ray_typed_null(-RAY_TIMESTAMP); + return ray_timestamp(a->i64 + b->i64 * 1000000LL); + } + + if (!is_numeric(a) || !is_numeric(b)) + return ray_error("type", "cannot add %s and %s", + ray_type_name(a->type), ray_type_name(b->type)); + /* Null propagation */ + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) return null_for_promoted(a, b); + if (is_float_op(a, b)) return make_f64(as_f64(a) + as_f64(b)); + int8_t rt = promote_int_type(a, b); + return make_typed_int(rt, as_i64(a) + as_i64(b)); +} + +ray_t* ray_sub_fn(ray_t* a, ray_t* b) { + + /* Temporal - int null propagation (both operands) */ + if (is_temporal(a) && is_numeric(b)) { + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) + return ray_typed_null(a->type); + } + if (is_numeric(a) && is_temporal(b)) { + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) + return ray_typed_null(b->type); + } + /* DATE - int → DATE */ + if (a->type == -RAY_DATE && is_numeric(b)) { + return ray_date(a->i64 - as_i64(b)); + } + /* DATE - DATE → i32 (days difference) */ + if (a->type == -RAY_DATE && b->type == -RAY_DATE) { + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) return ray_typed_null(-RAY_I32); + return ray_i32((int32_t)(a->i64 - b->i64)); + } + /* DATE - TIME → TIMESTAMP */ + if (a->type == -RAY_DATE && b->type == -RAY_TIME) { + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) return ray_typed_null(-RAY_TIMESTAMP); + return ray_timestamp(a->i64 * 86400000000000LL - b->i64 * 1000000LL); + } + /* TIME - int → TIME */ + if (a->type == -RAY_TIME && is_numeric(b)) { + return ray_time(a->i64 - as_i64(b)); + } + /* int - TIME → TIME (negative) */ + if (is_numeric(a) && b->type == -RAY_TIME) { + return ray_time(as_i64(a) - b->i64); + } + /* TIME - TIME → TIME */ + if (a->type == -RAY_TIME && b->type == -RAY_TIME) { + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) return ray_typed_null(-RAY_TIME); + return ray_time(a->i64 - b->i64); + } + /* TIMESTAMP - int → TIMESTAMP */ + if (a->type == -RAY_TIMESTAMP && is_numeric(b)) { + return ray_timestamp(a->i64 - as_i64(b)); + } + /* TIMESTAMP - TIME → TIMESTAMP */ + if (a->type == -RAY_TIMESTAMP && b->type == -RAY_TIME) { + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) return ray_typed_null(-RAY_TIMESTAMP); + return ray_timestamp(a->i64 - b->i64 * 1000000LL); + } + /* TIMESTAMP - TIMESTAMP → int (nanos difference) */ + if (a->type == -RAY_TIMESTAMP && b->type == -RAY_TIMESTAMP) { + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) return ray_typed_null(-RAY_I64); + return make_i64(a->i64 - b->i64); + } + /* TIMESTAMP - DATE → error */ + if (a->type == -RAY_TIMESTAMP && b->type == -RAY_DATE) + return ray_error("type", NULL); + + if (!is_numeric(a) || !is_numeric(b)) + return ray_error("type", "cannot subtract %s and %s", + ray_type_name(a->type), ray_type_name(b->type)); + /* Null propagation */ + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) return null_for_promoted(a, b); + if (is_float_op(a, b)) { + double r = as_f64(a) - as_f64(b); + if (r == 0.0) r = 0.0; /* normalize -0.0 to +0.0 */ + return make_f64(r); + } + int8_t rt = promote_int_type_right(a, b); + return make_typed_int(rt, as_i64(a) - as_i64(b)); +} + +ray_t* ray_mul_fn(ray_t* a, ray_t* b) { + + /* int * TIME → TIME, TIME * int → TIME */ + if (is_numeric(a) && b->type == -RAY_TIME) { + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) return ray_typed_null(-RAY_TIME); + return ray_time(as_i64(a) * b->i64); + } + if (a->type == -RAY_TIME && is_numeric(b)) { + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) return ray_typed_null(-RAY_TIME); + return ray_time(a->i64 * as_i64(b)); + } + /* TIME * TIME → error */ + if (a->type == -RAY_TIME && b->type == -RAY_TIME) + return ray_error("type", NULL); + + if (!is_numeric(a) || !is_numeric(b)) + return ray_error("type", "cannot multiply %s and %s", + ray_type_name(a->type), ray_type_name(b->type)); + /* Null propagation */ + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) return null_for_promoted(a, b); + if (is_float_op(a, b)) return make_f64(as_f64(a) * as_f64(b)); + int8_t rt = promote_int_type(a, b); + return make_typed_int(rt, as_i64(a) * as_i64(b)); +} + +ray_t* ray_div_fn(ray_t* a, ray_t* b) { + /* Temporal / numeric → temporal (same type as left operand) */ + if (is_temporal(a) && is_numeric(b)) { + if (RAY_ATOM_IS_NULL(b) || RAY_ATOM_IS_NULL(a)) + return ray_typed_null(a->type); + if (is_float_op(a, b)) { + double bv = as_f64(b); + if (bv == 0.0) + return ray_typed_null(a->type); + int64_t result = (int64_t)floor((double)a->i64 / bv); + if (a->type == -RAY_TIME) return ray_time(result); + if (a->type == -RAY_DATE) return ray_date(result); + return ray_timestamp(result); + } + int64_t bv = as_i64(b); + if (bv == 0) + return ray_typed_null(a->type); + int64_t av = a->i64; + int64_t q = av / bv; + if ((av ^ bv) < 0 && q * bv != av) q--; + if (a->type == -RAY_TIME) return ray_time(q); + if (a->type == -RAY_DATE) return ray_date(q); + return ray_timestamp(q); + } + if (!is_numeric(a) || !is_numeric(b)) + return ray_error("type", "cannot divide %s by %s", + ray_type_name(a->type), ray_type_name(b->type)); + /* u8: unsigned byte division — div by 0 returns 0 */ + if (a->type == -RAY_U8) { + uint8_t bv = (uint8_t)as_i64(b); + if (bv == 0 || RAY_ATOM_IS_NULL(b)) return make_u8(0); + if (RAY_ATOM_IS_NULL(a)) return make_u8(0); + return make_u8((uint8_t)((uint8_t)as_i64(a) / bv)); + } + /* Null propagation — null operand → typed null matching left operand type */ + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) + return ray_typed_null(a->type); + + /* Integer (floor) division — always returns integer. + * Float operands are converted to i64 via floor(a/b). */ + if (is_float_op(a, b)) { + double bv = as_f64(b); + if (bv == 0.0) + return ray_typed_null(a->type); + double result = floor(as_f64(a) / bv); + /* Return type matches LEFT operand */ + if (a->type == -RAY_F64) return make_f64(result); + if (a->type == -RAY_I16) return make_i16((int16_t)(int64_t)result); + if (a->type == -RAY_I32) return make_i32((int32_t)(int64_t)result); + if (result >= (double)INT64_MIN && result <= (double)INT64_MAX) + return make_i64((int64_t)result); + return ray_typed_null(-RAY_I64); + } + int64_t bv = as_i64(b); + if (bv == 0) + return ray_typed_null(a->type); + + int64_t av = as_i64(a); + /* Floor division (toward -inf) */ + int64_t q = av / bv; + if ((av ^ bv) < 0 && q * bv != av) q--; + /* Return type matches LEFT operand for i16/i32 */ + if (a->type == -RAY_I16) return make_i16((int16_t)q); + if (a->type == -RAY_I32) return make_i32((int32_t)q); + return make_i64(q); +} + +ray_t* ray_mod_fn(ray_t* a, ray_t* b) { + /* Temporal % numeric → temporal (same type as left operand) */ + if (is_temporal(a) && is_numeric(b)) { + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) + return ray_typed_null(a->type); + int64_t bv; + if (b->type == -RAY_F64) { + double bvf = b->f64; + if (bvf == 0.0) + return ray_typed_null(a->type); + bv = (int64_t)bvf; + } else { + bv = as_i64(b); + } + if (bv == 0) + return ray_typed_null(a->type); + + int64_t av = a->i64; + int64_t q = av / bv; + if ((av ^ bv) < 0 && q * bv != av) q--; + int64_t result = av - bv * q; + if (a->type == -RAY_TIME) return ray_time(result); + if (a->type == -RAY_DATE) return ray_date(result); + return ray_timestamp(result); + } + if (!is_numeric(a) || !is_numeric(b)) + return ray_error("type", "cannot mod %s by %s", + ray_type_name(a->type), ray_type_name(b->type)); + + /* u8: unsigned byte modulo, no null sentinel — mod by 0 returns 0 */ + if (b->type == -RAY_U8) { + uint8_t bv = b->u8; + if (bv == 0) return make_u8(0); + return make_u8((uint8_t)((uint8_t)as_i64(a) % bv)); + } + if (a->type == -RAY_U8) { + /* a is u8 but b is not u8 — treat as integer, result follows b's type */ + } + + /* Null propagation and division by zero: null type follows RIGHT operand */ + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) { + int8_t rt = (b->type == -RAY_F64 || a->type == -RAY_F64) ? -RAY_F64 : b->type; + return ray_typed_null(rt); + } + + /* Float modulo: result = a - b * floor(a/b), type follows RIGHT or f64 */ + if (is_float_op(a, b)) { + double av = as_f64(a), bv = as_f64(b); + if (bv == 0.0) { + int8_t rt = (b->type == -RAY_F64 || a->type == -RAY_F64) ? -RAY_F64 : b->type; + return ray_typed_null(rt); + } + double result = av - bv * floor(av / bv); + /* Snap tiny residual to 0 */ + if (fabs(result) < 1e-12 || fabs(result - fabs(bv)) < 1e-12) result = bv > 0 ? 0.0 : -0.0; + if (b->type == -RAY_F64 || a->type == -RAY_F64) return make_f64(result); + if (b->type == -RAY_I32) return make_i32((int32_t)(int64_t)result); + if (b->type == -RAY_I16) return make_i16((int16_t)(int64_t)result); + return make_i64((int64_t)result); + } + + /* Integer modulo: result = a - b * floor(a/b), sign follows b (divisor) */ + int64_t av = as_i64(a), bv = as_i64(b); + if (bv == 0) + return ray_typed_null(b->type); + + int64_t q = av / bv; + if ((av ^ bv) < 0 && q * bv != av) q--; /* floor division */ + int64_t result = av - bv * q; + /* Result type follows RIGHT operand */ + if (b->type == -RAY_I32) return make_i32((int32_t)result); + if (b->type == -RAY_I16) return make_i16((int16_t)result); + if (b->type == -RAY_U8) return make_u8((uint8_t)result); + return make_i64(result); +} + +ray_t* ray_neg_fn(ray_t* x) { + if (RAY_ATOM_IS_NULL(x)) { ray_retain(x); return x; } + if (x->type == -RAY_F64) return make_f64(-x->f64); + /* INT_MIN is the lone overflow case for signed negation: -INT_MIN + * doesn't fit in the same width. Per k/q convention, surface this + * as a typed null of the same width — preserving type, avoiding UB, + * and giving the caller a `nil?`-detectable signal that overflow + * happened. Consistent with how `(neg 0Ni) → 0Ni` propagates. */ + if (x->type == -RAY_I64) { + if (RAY_UNLIKELY(x->i64 == INT64_MIN)) return ray_typed_null(-RAY_I64); + return make_i64(-x->i64); + } + if (x->type == -RAY_I32) { + if (RAY_UNLIKELY(x->i32 == INT32_MIN)) return ray_typed_null(-RAY_I32); + return make_i32(-x->i32); + } + if (x->type == -RAY_I16) { + if (RAY_UNLIKELY(x->i16 == INT16_MIN)) return ray_typed_null(-RAY_I16); + return make_i16(-x->i16); + } + return ray_error("type", NULL); +} + +/* round: round to nearest integer (ties go away from zero), returns f64 */ +ray_t* ray_round_fn(ray_t* x) { + if (RAY_ATOM_IS_NULL(x)) return ray_typed_null(-RAY_F64); + if (x->type == -RAY_F64) return make_f64(round(x->f64)); + if (is_numeric(x)) return make_f64(round(as_f64(x))); + return ray_error("type", NULL); +} + +/* floor: round toward -inf, returns f64 for f64, identity for int */ +ray_t* ray_floor_fn(ray_t* x) { + if (RAY_ATOM_IS_NULL(x)) { ray_retain(x); return x; } + if (x->type == -RAY_F64) return make_f64(floor(x->f64)); + if (is_numeric(x)) { ray_retain(x); return x; } + return ray_error("type", NULL); +} + +/* ceil: round toward +inf, returns f64 for f64, identity for int */ +ray_t* ray_ceil_fn(ray_t* x) { + if (RAY_ATOM_IS_NULL(x)) { ray_retain(x); return x; } + if (x->type == -RAY_F64) return make_f64(ceil(x->f64)); + if (is_numeric(x)) { ray_retain(x); return x; } + return ray_error("type", NULL); +} + +/* abs: absolute value, preserves type. INT_MIN has no representable + * positive in the same width — return a typed null instead (same + * convention as `neg`). Stops `(abs -32768h) → -32768h` (negative + * result from abs!) and `(abs INT_MIN)` UB simultaneously. */ +ray_t* ray_abs_fn(ray_t* x) { + if (RAY_ATOM_IS_NULL(x)) { ray_retain(x); return x; } + if (x->type == -RAY_F64) return make_f64(fabs(x->f64)); + if (x->type == -RAY_I64) { + if (RAY_UNLIKELY(x->i64 == INT64_MIN)) return ray_typed_null(-RAY_I64); + return make_i64(x->i64 < 0 ? -x->i64 : x->i64); + } + if (x->type == -RAY_I32) { + if (RAY_UNLIKELY(x->i32 == INT32_MIN)) return ray_typed_null(-RAY_I32); + return make_i32(x->i32 < 0 ? -x->i32 : x->i32); + } + if (x->type == -RAY_I16) { + if (RAY_UNLIKELY(x->i16 == INT16_MIN)) return ray_typed_null(-RAY_I16); + return make_i16(x->i16 < 0 ? -x->i16 : x->i16); + } + return ray_error("type", NULL); +} + +/* sqrt: square root, returns f64 */ +ray_t* ray_sqrt_fn(ray_t* x) { + if (RAY_ATOM_IS_NULL(x)) return ray_typed_null(-RAY_F64); + if (x->type == -RAY_F64) return make_f64(sqrt(x->f64)); + if (is_numeric(x)) return make_f64(sqrt(as_f64(x))); + return ray_error("type", NULL); +} + +/* log: natural logarithm, returns f64 */ +ray_t* ray_log_fn(ray_t* x) { + if (RAY_ATOM_IS_NULL(x)) return ray_typed_null(-RAY_F64); + if (x->type == -RAY_F64) return make_f64(log(x->f64)); + if (is_numeric(x)) return make_f64(log(as_f64(x))); + return ray_error("type", NULL); +} + +/* exp: e^x, returns f64 */ +ray_t* ray_exp_fn(ray_t* x) { + if (RAY_ATOM_IS_NULL(x)) return ray_typed_null(-RAY_F64); + if (x->type == -RAY_F64) return make_f64(exp(x->f64)); + if (is_numeric(x)) return make_f64(exp(as_f64(x))); + return ray_error("type", NULL); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/builtins.c b/crates/rayforce-sys/vendor/rayforce/src/ops/builtins.c new file mode 100644 index 0000000..756e39e --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/builtins.c @@ -0,0 +1,2681 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** I/O builtins, type casting, and misc builtins extracted from eval.c. + */ + +#include "lang/eval.h" +#include "lang/internal.h" +#include "lang/env.h" +#include "vec/vec.h" +#include "lang/nfo.h" +#include "lang/parse.h" +#include "core/pool.h" +#include "core/types.h" +#include "io/csv.h" +#include "ops/ops.h" +#include "table/sym.h" +#include "core/profile.h" +#include "mem/sys.h" +#include "lang/format.h" + +#include +#include +#include +#include +#include +#include +#if !defined(RAY_OS_WINDOWS) +#include +#include +#include +#include +#endif + +/* ══════════════════════════════════════════ + * I/O builtins: println, show, format, read-csv, write-csv, as, type + * ══════════════════════════════════════════ */ + +/* Helper: return the null literal string for a typed null atom (e.g. "0Ni" for I32). */ +static const char* null_literal_str(int8_t type) { + switch (-type) { + case RAY_I16: return "0Nh"; + case RAY_I32: return "0Ni"; + case RAY_I64: return "0Nl"; + case RAY_F32: return "0Ne"; + case RAY_F64: return "0Nf"; + case RAY_DATE: return "0Nd"; + case RAY_TIME: return "0Nt"; + case RAY_TIMESTAMP: return "0Np"; + case RAY_SYM: return "0Ns"; + default: return "null"; + } +} + +/* Helper: print a ray_t value to a file handle */ +void ray_lang_print(FILE* fp, ray_t* val) { + if (!val || RAY_IS_ERR(val)) { fprintf(fp, "error"); return; } + if (RAY_IS_NULL(val)) { fprintf(fp, "null"); return; } + /* Materialize lazy handles before printing */ + if (ray_is_lazy(val)) + val = ray_lazy_materialize(val); + if (!val || RAY_IS_ERR(val)) { fprintf(fp, "error"); return; } + if (RAY_ATOM_IS_NULL(val)) { + fprintf(fp, "%s", null_literal_str(val->type)); + return; + } + switch (val->type) { + case -RAY_I64: fprintf(fp, "%ld", (long)val->i64); break; + case -RAY_F64: { + double fv = val->f64; + if (fv == 0.0 && signbit(fv)) fv = 0.0; + fprintf(fp, "%g", fv); + break; + } + case -RAY_BOOL: fprintf(fp, "%s", val->b8 ? "true" : "false"); break; + case -RAY_SYM: { + ray_t* s = ray_sym_str(val->i64); + if (s) fprintf(fp, "'%.*s", (int)ray_str_len(s), ray_str_ptr(s)); + else fprintf(fp, "'?"); + break; + } + case -RAY_STR: { + const char* s = ray_str_ptr(val); + size_t slen = ray_str_len(val); + fprintf(fp, "%.*s", (int)slen, s); + break; + } + case RAY_LIST: { + fprintf(fp, "["); + int64_t len = ray_len(val); + ray_t** elems = (ray_t**)ray_data(val); + for (int64_t i = 0; i < len; i++) { + if (i > 0) fprintf(fp, " "); + ray_lang_print(fp, elems[i]); + } + fprintf(fp, "]"); + break; + } + case RAY_TABLE: + fprintf(fp, "
", + (long)ray_table_nrows(val), (long)ray_table_ncols(val)); + break; + case RAY_UNARY: case RAY_BINARY: case RAY_VARY: { + const char* name = ray_fn_name(val); + fprintf(fp, "%s", name[0] ? name : "builtin"); + break; + } + default: { + /* Fall back to ray_fmt for everything else: i16, i32, u8, all + * vector types (I16/I32/F64/SYM/...), DICT, GUID, temporal, etc. + * Without this println on (println 5i) printed "" — a + * debug placeholder, not the value. */ + ray_t* s = ray_fmt(val, 0); + if (s && !RAY_IS_ERR(s)) { + fprintf(fp, "%.*s", (int)ray_str_len(s), ray_str_ptr(s)); + ray_release(s); + } else { + fprintf(fp, "", val->type); + if (s) ray_release(s); + } + break; + } + } +} + +/* Helper: format string with % placeholders, substituting args. + * Returns a heap-allocated char* (caller must ray_sys_free) and sets *out_len. + * If fmt has no %, returns NULL (caller falls back to plain print). */ +static char* fmt_interpolate(const char* fmt, size_t flen, ray_t** args, int64_t nargs, int64_t arg_start, size_t* out_len) { + /* Quick scan: any % in fmt? */ + int has_pct = 0; + for (size_t i = 0; i < flen; i++) if (fmt[i] == '%') { has_pct = 1; break; } + if (!has_pct) return NULL; + + /* Build result in a dynamic buffer */ + size_t cap = flen + 256; + char* buf = ray_sys_alloc(cap); + if (!buf) return NULL; + size_t pos = 0; + int64_t ai = arg_start; + + for (size_t i = 0; i < flen; i++) { + if (fmt[i] == '%' && ai < nargs) { + /* Format the arg into a temp buffer */ + char tmp[256]; + ray_t* a = args[ai++]; + if (ray_is_lazy(a)) a = ray_lazy_materialize(a); + int tlen = 0; + if (!a || RAY_IS_ERR(a)) { + tlen = snprintf(tmp, sizeof(tmp), "error"); + } else if (RAY_ATOM_IS_NULL(a)) { + tlen = snprintf(tmp, sizeof(tmp), "%s", null_literal_str(a->type)); + } else if (a->type == -RAY_I64) { + tlen = snprintf(tmp, sizeof(tmp), "%ld", (long)a->i64); + } else if (a->type == -RAY_F64) { + double fv = a->f64; + if (fv == 0.0 && signbit(fv)) fv = 0.0; + tlen = snprintf(tmp, sizeof(tmp), "%g", fv); + } else if (a->type == -RAY_BOOL) { + tlen = snprintf(tmp, sizeof(tmp), "%s", a->b8 ? "true" : "false"); + } else if (a->type == -RAY_STR) { + const char* sp = ray_str_ptr(a); + size_t sl = ray_str_len(a); + while (pos + sl + 1 > cap) { cap *= 2; buf = ray_sys_realloc(buf, cap); } + memcpy(buf + pos, sp, sl); + pos += sl; + continue; + } else if (a->type == -RAY_SYM) { + ray_t* ss = ray_sym_str(a->i64); + if (ss) { + const char* sp = ray_str_ptr(ss); + size_t sl = ray_str_len(ss); + while (pos + sl + 1 > cap) { cap *= 2; buf = ray_sys_realloc(buf, cap); } + memcpy(buf + pos, sp, sl); + pos += sl; + ray_release(ss); + continue; + } + tlen = snprintf(tmp, sizeof(tmp), "'?"); + } else { + /* Fall back to ray_fmt */ + ray_t* formatted = ray_fmt(a, 0); + if (formatted && !RAY_IS_ERR(formatted)) { + const char* sp = ray_str_ptr(formatted); + size_t sl = ray_str_len(formatted); + while (pos + sl + 1 > cap) { cap *= 2; buf = ray_sys_realloc(buf, cap); } + memcpy(buf + pos, sp, sl); + pos += sl; + ray_release(formatted); + continue; + } + if (formatted) ray_release(formatted); + tlen = snprintf(tmp, sizeof(tmp), "", a->type); + } + while (pos + (size_t)tlen + 1 > cap) { cap *= 2; buf = ray_sys_realloc(buf, cap); } + memcpy(buf + pos, tmp, (size_t)tlen); + pos += (size_t)tlen; + } else { + if (pos + 2 > cap) { cap *= 2; buf = ray_sys_realloc(buf, cap); } + buf[pos++] = fmt[i]; + } + } + buf[pos] = '\0'; + *out_len = pos; + return buf; +} + +/* (println val1 val2 ...) — print values to stdout, newline at end. + * If first arg is a string with % placeholders, substitutes remaining args. */ +ray_t* ray_println_fn(ray_t** args, int64_t n) { + for (int64_t i = 0; i < n; i++) + if (ray_is_lazy(args[i])) args[i] = ray_lazy_materialize(args[i]); + + /* Format string mode: first arg is a string with % placeholders */ + if (n >= 2 && args[0] && args[0]->type == -RAY_STR) { + const char* fmt = ray_str_ptr(args[0]); + size_t flen = ray_str_len(args[0]); + size_t out_len = 0; + char* result = fmt_interpolate(fmt, flen, args, n, 1, &out_len); + if (result) { + fwrite(result, 1, out_len, stdout); + fputc('\n', stdout); + fflush(stdout); + ray_sys_free(result); + return RAY_NULL_OBJ; + } + } + + for (int64_t i = 0; i < n; i++) { + if (i > 0) fputc(' ', stdout); + ray_lang_print(stdout, args[i]); + } + fputc('\n', stdout); + fflush(stdout); + return RAY_NULL_OBJ; +} + +/* (print val1 val2 ...) — like println but without trailing newline */ +ray_t* ray_print_fn(ray_t** args, int64_t n) { + for (int64_t i = 0; i < n; i++) + if (ray_is_lazy(args[i])) args[i] = ray_lazy_materialize(args[i]); + + /* Format string mode: first arg is a string with % placeholders */ + if (n >= 2 && args[0] && args[0]->type == -RAY_STR) { + const char* fmt = ray_str_ptr(args[0]); + size_t flen = ray_str_len(args[0]); + size_t out_len = 0; + char* result = fmt_interpolate(fmt, flen, args, n, 1, &out_len); + if (result) { + fwrite(result, 1, out_len, stdout); + fflush(stdout); + ray_sys_free(result); + return RAY_NULL_OBJ; + } + } + + for (int64_t i = 0; i < n; i++) { + if (i > 0) fputc(' ', stdout); + ray_lang_print(stdout, args[i]); + } + fflush(stdout); + return RAY_NULL_OBJ; +} + +/* (show val1 val2 ...) — print values to stdout using ray_fmt, newline at end */ +ray_t* ray_show_fn(ray_t** args, int64_t n) { + for (int64_t i = 0; i < n; i++) { + if (ray_is_lazy(args[i])) args[i] = ray_lazy_materialize(args[i]); + if (!args[i] || RAY_IS_ERR(args[i])) { fprintf(stdout, "error"); continue; } + ray_t* formatted = ray_fmt(args[i], 1); + if (formatted && !RAY_IS_ERR(formatted)) { + const char* sp = ray_str_ptr(formatted); + size_t sl = ray_str_len(formatted); + fwrite(sp, 1, sl, stdout); + ray_release(formatted); + } else { + if (formatted) ray_release(formatted); + ray_lang_print(stdout, args[i]); + } + } + fputc('\n', stdout); + fflush(stdout); + return RAY_NULL_OBJ; +} + +/* (format "hello % world %" a b) — string formatting with % placeholders */ +ray_t* ray_format_fn(ray_t** args, int64_t n) { + if (n < 1) return ray_error("domain", NULL); + for (int64_t i = 0; i < n; i++) + if (ray_is_lazy(args[i])) args[i] = ray_lazy_materialize(args[i]); + if (!args[0] || args[0]->type != -RAY_STR) return ray_error("type", NULL); + const char* fmt = ray_str_ptr(args[0]); + size_t flen = ray_str_len(args[0]); + size_t out_len = 0; + char* result = fmt_interpolate(fmt, flen, args, n, 1, &out_len); + if (result) { + ray_t* s = ray_str(result, out_len); + ray_sys_free(result); + return s; + } + /* No placeholders: return fmt as-is */ + ray_retain(args[0]); + return args[0]; +} + +/* (resolve 'name) — check if name exists in env, return value or null. + * SPECIAL_FORM: does not evaluate args. */ +/* (resolve tbl) — replace I64 columns with SYM columns where values are valid sym IDs. + * This makes query results human-readable (sym names instead of intern IDs). + * Also accepts (resolve db tbl) for compat — just ignores db. */ +ray_t* ray_resolve_fn(ray_t** args, int64_t n) { + if (n < 1) return ray_error("arity", "resolve expects at least 1 argument"); + + /* Evaluate all args */ + ray_t* tbl = NULL; + if (n == 1) { + tbl = ray_eval(args[0]); + } else { + /* (resolve db tbl) — ignore db, use tbl */ + ray_t* db = ray_eval(args[0]); + if (db && !RAY_IS_ERR(db)) ray_release(db); + tbl = ray_eval(args[1]); + } + if (!tbl || RAY_IS_ERR(tbl)) return tbl ? tbl : ray_error("type", "resolve: null argument"); + + /* Materialize lazy tables */ + if (ray_is_lazy(tbl)) { + ray_t* mat = ray_lazy_materialize(tbl); + ray_release(tbl); + if (!mat || RAY_IS_ERR(mat)) return mat ? mat : ray_error("domain", "resolve: materialization failed"); + tbl = mat; + } + + /* If not a table, return as-is */ + if (tbl->type != RAY_TABLE) { + if (tbl->type == -RAY_SYM) { + ray_t* val = ray_env_get(tbl->i64); + ray_release(tbl); + if (!val) return NULL; + ray_retain(val); + return val; + } + return tbl; + } + + int64_t ncols = ray_table_ncols(tbl); + int64_t nrows = ray_table_nrows(tbl); + + /* Build a new table replacing I64 columns with SYM columns where possible */ + ray_t* result = ray_table_new(ncols); + if (RAY_IS_ERR(result)) { ray_release(tbl); return result; } + + for (int64_t c = 0; c < ncols; c++) { + ray_t* col = ray_table_get_col_idx(tbl, c); + int64_t col_name = ray_table_col_name(tbl, c); + if (!col) continue; + + if (col->type == RAY_I64) { + /* Try to resolve: convert to SYM only if ALL positive values + * are valid sym IDs. This avoids converting entity-ID columns + * where values are plain integers that happen to collide with + * low sym IDs. */ + int64_t* data = (int64_t*)ray_data(col); + bool all_user_sym = (nrows > 0); + /* Only convert if ALL values resolve to user-defined symbols + * (length >= 2, not single-char operators). This distinguishes + * symbol references (name='Alice') from entity IDs (e=1). */ + for (int64_t r = 0; r < nrows; r++) { + if (data[r] <= 0) { all_user_sym = false; break; } + ray_t* sn = ray_sym_str(data[r]); + if (!sn) { all_user_sym = false; break; } + size_t slen = ray_str_len(sn); + const char* sp = ray_str_ptr(sn); + /* Single-char or starts with digit/operator -> not a user symbol */ + if (slen < 2 || (sp[0] >= '0' && sp[0] <= '9') || + sp[0] == '+' || sp[0] == '-' || sp[0] == '*' || sp[0] == '/' || + sp[0] == '<' || sp[0] == '>' || sp[0] == '=' || sp[0] == '!' || + sp[0] == '?' || sp[0] == '_') { + all_user_sym = false; break; + } + } + if (all_user_sym) { + /* Convert to SYM column */ + ray_t* sym_col = ray_vec_new(RAY_SYM, nrows); + if (RAY_IS_ERR(sym_col)) { ray_release(result); ray_release(tbl); return sym_col; } + for (int64_t r = 0; r < nrows; r++) { + sym_col = ray_vec_append(sym_col, &data[r]); + if (RAY_IS_ERR(sym_col)) { ray_release(result); ray_release(tbl); return sym_col; } + } + result = ray_table_add_col(result, col_name, sym_col); + ray_release(sym_col); + } else { + /* Keep as I64 */ + result = ray_table_add_col(result, col_name, col); + } + } else { + /* Non-I64 column: keep as-is */ + result = ray_table_add_col(result, col_name, col); + } + if (RAY_IS_ERR(result)) { ray_release(tbl); return result; } + } + + ray_release(tbl); + return result; +} + +/* (timeit expr) — evaluate expression and return time in ms as F64. + * SPECIAL_FORM: does not pre-evaluate args. */ +ray_t* ray_timeit_fn(ray_t** args, int64_t n) { + if (n < 1) return ray_error("domain", NULL); + int64_t t0 = ray_profile_now_ns(); + ray_t* result = ray_eval(args[0]); + int64_t t1 = ray_profile_now_ns(); + if (result && !RAY_IS_ERR(result)) ray_release(result); + double ms = (double)(t1 - t0) / 1e6; + return make_f64(ms); +} + +/* (exit code) — exit the process */ +ray_t* ray_exit_fn(ray_t* arg) { + int code = 0; + if (arg && is_numeric(arg)) code = (int)as_i64(arg); + exit(code); + return NULL; /* unreachable */ +} + +/* (read-csv path) — read CSV file, return RAY_TABLE */ +/* Helper: resolve a type name symbol to a ray type code */ +static int8_t resolve_type_name(int64_t sym_id) { + ray_t* s = ray_sym_str(sym_id); + if (!s) return -1; + const char* name = ray_str_ptr(s); + size_t len = ray_str_len(s); + int8_t result = -1; + if (len == 3 && memcmp(name, "I64", 3) == 0) result = RAY_I64; + else if (len == 3 && memcmp(name, "I32", 3) == 0) result = RAY_I32; + else if (len == 3 && memcmp(name, "I16", 3) == 0) result = RAY_I16; + else if (len == 3 && memcmp(name, "F64", 3) == 0) result = RAY_F64; + else if (len == 2 && memcmp(name, "B8", 2) == 0) result = RAY_BOOL; + else if (len == 2 && memcmp(name, "U8", 2) == 0) result = RAY_U8; + else if (len == 6 && memcmp(name, "SYMBOL", 6) == 0) result = RAY_SYM; + else if (len == 3 && memcmp(name, "STR", 3) == 0) result = RAY_STR; + else if (len == 3 && memcmp(name, "F32", 3) == 0) result = RAY_F32; + else if (len == 4 && memcmp(name, "DATE", 4) == 0) result = RAY_DATE; + else if (len == 4 && memcmp(name, "TIME", 4) == 0) result = RAY_TIME; + else if (len == 9 && memcmp(name, "TIMESTAMP", 9) == 0) result = RAY_TIMESTAMP; + else if (len == 4 && memcmp(name, "GUID", 4) == 0) result = RAY_GUID; + ray_release(s); + return result; +} + +ray_t* ray_read_csv_fn(ray_t** args, int64_t n) { + if (n < 1) return ray_error("domain", NULL); + + /* (read-csv [types] "path") or (read-csv "path") */ + ray_t* path_obj = NULL; + ray_t* schema = NULL; + if (n >= 2 && ray_is_vec(args[0]) && args[0]->type == RAY_SYM) { + schema = args[0]; + path_obj = args[1]; + } else { + path_obj = args[0]; + } + + const char* path = NULL; + if (path_obj->type == -RAY_STR) + path = ray_str_ptr(path_obj); + else + return ray_error("type", NULL); + if (!path) return ray_error("domain", NULL); + + if (schema) { + int64_t ncols = schema->len; + int8_t col_types[256]; + if (ncols > 256) return ray_error("limit", NULL); + int64_t* sym_ids = (int64_t*)ray_data(schema); + for (int64_t i = 0; i < ncols; i++) { + col_types[i] = resolve_type_name(sym_ids[i]); + if (col_types[i] < 0) return ray_error("type", NULL); + } + ray_t* tbl = ray_read_csv_opts(path, 0, true, col_types, (int32_t)ncols); + if (!tbl || RAY_IS_ERR(tbl)) return ray_error("io", NULL); + return tbl; + } + + ray_t* tbl = ray_read_csv(path); + if (!tbl || RAY_IS_ERR(tbl)) return ray_error("io", NULL); + return tbl; +} + +/* (write-csv table path) — write table to CSV file */ +ray_t* ray_write_csv_fn(ray_t** args, int64_t n) { + if (n < 2) return ray_error("domain", NULL); + ray_t* tbl = args[0]; + ray_t* path_obj = args[1]; + if (tbl->type != RAY_TABLE) return ray_error("type", NULL); + const char* path = NULL; + if (path_obj->type == -RAY_STR) + path = ray_str_ptr(path_obj); + else + return ray_error("type", NULL); + if (!path) return ray_error("domain", NULL); + ray_err_t err = ray_write_csv(tbl, path); + if (err != RAY_OK) return ray_error(ray_err_code_str(err), NULL); + return make_i64(0); +} + +/* (as 'TypeName value) — type cast */ +/* Case-insensitive type name match helper */ +static int cast_match(const char* tname, size_t tlen, const char* target) { + size_t tgt_len = strlen(target); + if (tlen != tgt_len) return 0; + for (size_t i = 0; i < tlen; i++) { + char a = tname[i], b = target[i]; + if (a >= 'a' && a <= 'z') a -= 32; + if (b >= 'a' && b <= 'z') b -= 32; + if (a != b) return 0; + } + return 1; +} + +/* Helper: copy null bitmap from source vec/list to destination vec. */ +static ray_t* cast_vec_copy_nulls(ray_t* vec, ray_t* val) { + if (ray_is_vec(val)) { + if (ray_vec_copy_nulls(vec, val) != RAY_OK) + { ray_release(vec); return ray_error("oom", NULL); } + } else if (val->type == RAY_LIST) { + ray_t** le = (ray_t**)ray_data(val); + for (int64_t j = 0; j < vec->len; j++) + if (le[j] && RAY_ATOM_IS_NULL(le[j])) + ray_vec_set_null(vec, j, true); + } + return vec; +} + +/* Bulk-cast loop over [_lo, _hi). Reads `R` from `_src_p`, writes `W` + * to `_dst_p`. No atom allocations. The single-threaded path passes + * the whole [0, n2) range; the parallel worker passes its slice. */ +#define CAST_LOOP_RANGE(R, W, EXPR, _lo, _hi) do { \ + const R* _src = (const R*)_src_p; \ + W* _dst = (W*)_dst_p; \ + for (int64_t _i = (_lo); _i < (_hi); _i++) { \ + R _v = _src[_i]; \ + _dst[_i] = (EXPR); \ + } \ +} while (0) +#define CAST_LOOP(R, W, EXPR) CAST_LOOP_RANGE(R, W, EXPR, 0, n2) + +/* Same-byte-rep type relabels (I64↔TIMESTAMP, I32↔DATE↔TIME): the + * per-element data is identical, so a single memcpy populates the new + * vector. Returns true on hit. */ +static bool cast_vec_relabel_compat(int8_t a, int8_t b) { + if (a == b) return true; + if ((a == RAY_I64 || a == RAY_TIMESTAMP) && + (b == RAY_I64 || b == RAY_TIMESTAMP)) return true; + if ((a == RAY_I32 || a == RAY_DATE || a == RAY_TIME) && + (b == RAY_I32 || b == RAY_DATE || b == RAY_TIME)) return true; + return false; +} + +/* Vec→vec numeric cast on raw arrays (no per-element atom allocs). + * Returns the populated `vec` on success, or NULL if the (in_type, + * out_type) pair is unsupported here — caller falls back to the generic + * path. + * + * Temporal cross-unit pairs (matched between the per-atom slow path + * and the fast path): + * DATE → TIMESTAMP : days * NS_PER_DAY + * TIMESTAMP → DATE : floor-div by NS_PER_DAY (so ns=-1 → -1 day, + * i.e. 1999-12-31, not 2000-01-01). + * TIMESTAMP → TIME : floor-mod by NS_PER_DAY then /1_000_000 + * (ns→ms within day, always in [0, 86_400_000)). + * Plain `% / /` would truncate toward zero per C semantics and give + * wrong components for pre-2000 timestamps; the helpers below give + * Python-style floor semantics for a positive divisor. */ +#define NS_PER_DAY 86400000000000LL + +static inline int64_t ts_days_floor(int64_t ns) { + int64_t q = ns / NS_PER_DAY; + int64_t r = ns - q * NS_PER_DAY; + if (r < 0) q -= 1; + return q; +} +static inline int64_t ts_ns_in_day(int64_t ns) { + int64_t r = ns % NS_PER_DAY; + if (r < 0) r += NS_PER_DAY; + return r; +} + +/* Element-wise cast worker: writes _dst_p[lo..hi) from _src_p[lo..hi). + * Used by both the single-threaded fast path and the parallel dispatch. + * Returns true on hit; false means caller falls back to the generic + * (atom) path. */ +static bool cast_range_worker(const void* _src_p, void* _dst_p, + int64_t lo, int64_t hi, + int8_t in_type, int8_t out_type) { + /* Temporal unit conversions. */ + if (in_type == RAY_DATE && out_type == RAY_TIMESTAMP) { + CAST_LOOP_RANGE(int32_t, int64_t, (int64_t)_v * NS_PER_DAY, lo, hi); + return true; + } + if (in_type == RAY_TIMESTAMP && out_type == RAY_DATE) { + /* Floor-div, not truncate-toward-zero: ns=-1 must give -1 day + * (1999-12-31), not 0 (2000-01-01). */ + CAST_LOOP_RANGE(int64_t, int32_t, (int32_t)ts_days_floor(_v), lo, hi); + return true; + } + if (in_type == RAY_TIMESTAMP && out_type == RAY_TIME) { + /* Floor-mod ns within day, then ns→ms. */ + CAST_LOOP_RANGE(int64_t, int32_t, + (int32_t)(ts_ns_in_day(_v) / 1000000LL), lo, hi); + return true; + } + /* Generic numeric pairs. The big switch dispatches on (out_type, + * in_type); each leaf is a tight typed loop the compiler vectorizes. */ +#define CL(R, W, EXPR) do { CAST_LOOP_RANGE(R, W, EXPR, lo, hi); return true; } while (0) + switch (out_type) { + case RAY_I64: case RAY_TIMESTAMP: + switch (in_type) { + case RAY_BOOL: CL(uint8_t, int64_t, _v ? 1 : 0); + case RAY_U8: CL(uint8_t, int64_t, (int64_t)_v); + case RAY_I16: CL(int16_t, int64_t, (int64_t)_v); + case RAY_I32: case RAY_DATE: case RAY_TIME: + CL(int32_t, int64_t, (int64_t)_v); + case RAY_F64: CL(double, int64_t, (int64_t)_v); + } + break; + case RAY_I32: case RAY_DATE: case RAY_TIME: + switch (in_type) { + case RAY_BOOL: CL(uint8_t, int32_t, _v ? 1 : 0); + case RAY_U8: CL(uint8_t, int32_t, (int32_t)_v); + case RAY_I16: CL(int16_t, int32_t, (int32_t)_v); + case RAY_I64: case RAY_TIMESTAMP: + CL(int64_t, int32_t, (int32_t)_v); + case RAY_F64: CL(double, int32_t, (int32_t)_v); + } + break; + case RAY_I16: + switch (in_type) { + case RAY_BOOL: CL(uint8_t, int16_t, _v ? 1 : 0); + case RAY_U8: CL(uint8_t, int16_t, (int16_t)_v); + case RAY_I32: case RAY_DATE: case RAY_TIME: + CL(int32_t, int16_t, (int16_t)_v); + case RAY_I64: case RAY_TIMESTAMP: + CL(int64_t, int16_t, (int16_t)_v); + case RAY_F64: CL(double, int16_t, (int16_t)_v); + } + break; + case RAY_U8: + switch (in_type) { + case RAY_BOOL: CL(uint8_t, uint8_t, _v ? 1 : 0); + case RAY_I16: CL(int16_t, uint8_t, (uint8_t)_v); + case RAY_I32: case RAY_DATE: case RAY_TIME: + CL(int32_t, uint8_t, (uint8_t)_v); + case RAY_I64: case RAY_TIMESTAMP: + CL(int64_t, uint8_t, (uint8_t)_v); + case RAY_F64: CL(double, uint8_t, (uint8_t)_v); + } + break; + case RAY_F64: + switch (in_type) { + case RAY_BOOL: CL(uint8_t, double, _v ? 1.0 : 0.0); + case RAY_U8: CL(uint8_t, double, (double)_v); + case RAY_I16: CL(int16_t, double, (double)_v); + case RAY_I32: case RAY_DATE: case RAY_TIME: + CL(int32_t, double, (double)_v); + case RAY_I64: case RAY_TIMESTAMP: + CL(int64_t, double, (double)_v); + } + break; + case RAY_BOOL: + switch (in_type) { + case RAY_U8: CL(uint8_t, uint8_t, _v != 0 ? 1 : 0); + case RAY_I16: CL(int16_t, uint8_t, _v != 0 ? 1 : 0); + case RAY_I32: case RAY_DATE: case RAY_TIME: + CL(int32_t, uint8_t, _v != 0 ? 1 : 0); + case RAY_I64: case RAY_TIMESTAMP: + CL(int64_t, uint8_t, _v != 0 ? 1 : 0); + case RAY_F64: CL(double, uint8_t, _v != 0.0 ? 1 : 0); + } + break; + } +#undef CL + return false; +} + +typedef struct { + const void* src; + void* dst; + int8_t in_type; + int8_t out_type; +} cast_par_ctx_t; + +static void cast_par_fn(void* arg, uint32_t worker_id, int64_t lo, int64_t hi) { + (void)worker_id; + /* Honor SIGINT (ray_request_interrupt / ray_interrupted) per task — + * the pool's own per-task gate checks `pool->cancelled` only, so + * a Ctrl-C arriving during dispatch wouldn't otherwise short- + * circuit the workers. Skip the task on interrupt; the caller + * post-checks via CANCELLED() and returns an error. */ + if (ray_interrupted()) return; + cast_par_ctx_t* ctx = (cast_par_ctx_t*)arg; + cast_range_worker(ctx->src, ctx->dst, lo, hi, ctx->in_type, ctx->out_type); +} + +/* Threshold below which the dispatch overhead outweighs the speedup. + * Memory-bound conversions saturate ~3 GB/s single-thread; with 8 + * workers we approach DRAM peak (~25 GB/s). Below ~256 K elements the + * 50 µs dispatch cost dominates. */ +#define CAST_PAR_MIN_ELEMS 262144 + +static ray_t* cast_vec_numeric_fast(ray_t* val, ray_t* vec, int8_t out_type) { + int8_t in_type = val->type; + int64_t n2 = val->len; + ray_pool_t* pool = ray_pool_get(); + +/* A cast is "cancelled" if EITHER: + * (a) the pool's per-query cancel flag is set (e.g. via ray_cancel + * from another thread or a long-query timeout), or + * (b) the eval-loop interrupt flag is set (Ctrl-C / SIGINT, signalled + * by ray_request_interrupt and observed via ray_interrupted). + * Both must be polled — they're independent signals and either one + * means the user wants the operation to abort. */ +#define CANCELLED() ((pool && atomic_load_explicit(&pool->cancelled, \ + memory_order_acquire)) \ + || ray_interrupted()) +#define CHECK_CANCEL_OR(retval) do { \ + if (CANCELLED()) return ray_error("cancel", NULL); \ + return (retval); \ +} while (0) + + /* Function-entry cancel check — gates ALL paths below (relabel, + * parallel, and chunked single-thread). Without this, a cancel + * pending at entry would still execute the first ~50 µs of any + * path before being observed. */ + if (CANCELLED()) return ray_error("cancel", NULL); + + /* Same byte-rep types: chunked memcpy. A single + * memcpy(_, _, n*esz) on a 10M-element TIMESTAMP relabel is ~80 MB + * and ~10 ms of opaque work — cancel arriving during it can't + * interrupt the libc copy, so we'd happily return `vec` even if + * the user asked to abort. Break the copy into ~1 MB chunks and + * poll cancel between them; max in-flight work between checks is + * one chunk (~100 µs at realistic bandwidth). */ + if (cast_vec_relabel_compat(in_type, out_type)) { + size_t esz = (size_t)ray_elem_size(out_type); + if (n2 > 0 && esz > 0) { + const char* sp = (const char*)ray_data(val); + char* dp = (char*)ray_data(vec); + size_t total = (size_t)n2 * esz; + const size_t chunk_bytes = (size_t)1 << 20; /* 1 MiB */ + size_t off = 0; + while (off < total) { + if (CANCELLED()) return ray_error("cancel", NULL); + size_t cn = total - off; + if (cn > chunk_bytes) cn = chunk_bytes; + memcpy(dp + off, sp + off, cn); + off += cn; + } + } + /* Post-check: a cancel landing in the final chunk would have + * been missed by the in-loop check (we copy then exit). */ + if (CANCELLED()) return ray_error("cancel", NULL); + return vec; + } + + const void* src_p = ray_data(val); + void* dst_p = ray_data(vec); + + /* Three return states from this point on (helper does NOT touch + * `vec`'s reference count): + * + * - `vec` : success, fully populated, no cancel observed + * - error pointer : cancellation observed at any point — the + * helper bails out as soon as it notices, + * even mid-loop in the single-thread path + * - NULL : (in_type, out_type) pair unsupported here + * AND no cancellation observed — caller may + * safely fall through to the per-atom slow + * path with `vec` still valid */ + + if (pool && n2 >= CAST_PAR_MIN_ELEMS && ray_pool_total_workers(pool) >= 2) { + cast_par_ctx_t pctx = { .src = src_p, .dst = dst_p, + .in_type = in_type, .out_type = out_type }; + /* Probe the worker on a single element to verify the pair is + * supported here. If unsupported, fall through (NULL) — but + * still re-check cancel first so a cancel raced into the probe + * window is not swallowed. */ + if (n2 > 0 && cast_range_worker(src_p, dst_p, 0, 1, in_type, out_type)) { + ray_pool_dispatch(pool, cast_par_fn, &pctx, n2); + if (CANCELLED()) return ray_error("cancel", NULL); + return vec; + } + CHECK_CANCEL_OR(NULL); + } + + /* Chunked single-thread path. Tight typed loops vectorize well + * but block cancellation for the whole `n2` range — chunk into + * cache-sized pieces so cancel is honored within ~one chunk + * (64K elements ≈ 50 µs at realistic bandwidth). */ + if (n2 == 0) + CHECK_CANCEL_OR(vec); + /* Re-check cancel right before the first chunk runs (entry cancel + * check above is over the whole helper, but if a cancel raced in + * between the relabel path and here we want to bail before doing + * any work). */ + if (CANCELLED()) return ray_error("cancel", NULL); + int64_t chunk = (int64_t)65536; + int64_t lo = 0; + int64_t hi = (n2 < chunk) ? n2 : chunk; + /* Probe the first chunk; if it fails, the (in, out) pair is + * unsupported here and the caller falls through. */ + if (!cast_range_worker(src_p, dst_p, lo, hi, in_type, out_type)) + CHECK_CANCEL_OR(NULL); + lo = hi; + while (lo < n2) { + if (CANCELLED()) return ray_error("cancel", NULL); + hi = lo + chunk; + if (hi > n2) hi = n2; + cast_range_worker(src_p, dst_p, lo, hi, in_type, out_type); + lo = hi; + } + CHECK_CANCEL_OR(vec); +#undef CHECK_CANCEL_OR +#undef CANCELLED +} + +/* Helper: cast a vector/list to a numeric/temporal/bool type. + * Handles I64, I32, I16, U8, F64, BOOL, DATE, TIME, TIMESTAMP, SYM. + * Fast path for typed numeric input vectors (no per-element atoms); + * generic path for RAY_LIST and other shapes. */ +static ray_t* cast_vec_numeric(ray_t* type_sym, ray_t* val, int8_t out_type) { + int64_t n2 = val->len; + ray_t* vec = ray_vec_new(out_type, n2); + if (RAY_IS_ERR(vec)) return vec; + vec->len = n2; + + /* Fast path: typed numeric vec → numeric vec, no list/string. */ + if (ray_is_vec(val) && val->type != RAY_STR && val->type != RAY_SYM && + val->type != RAY_GUID && out_type != RAY_SYM) { + ray_t* fast = cast_vec_numeric_fast(val, vec, out_type); + /* Three return states (helper does NOT release `vec`): + * - vec on success + * - error pointer on cancellation — caller releases `vec` + * - NULL on unsupported (in_type, out_type) — fall through */ + if (RAY_IS_ERR(fast)) { ray_release(vec); return fast; } + if (fast != NULL) { + /* Close the cancellation gap that surrounds the post-cast + * nullmap copy. cast_vec_copy_nulls runs after the + * cancel-aware fast cast — for nullable inputs it does a + * bitmap copy (and a per-element scan on RAY_LIST inputs + * of length n2). A cancel arriving in that window would + * otherwise be masked by the success return. Pre-check + * gates the nullmap work; post-check catches a cancel + * landing during it. */ + ray_pool_t* fp = ray_pool_get(); +#define _FP_CANCELLED() ((fp && atomic_load_explicit(&fp->cancelled, \ + memory_order_acquire)) \ + || ray_interrupted()) + if (_FP_CANCELLED()) { ray_release(vec); return ray_error("cancel", NULL); } + ray_t* result = cast_vec_copy_nulls(vec, val); + if (RAY_IS_ERR(result)) return result; + if (_FP_CANCELLED()) { ray_release(vec); return ray_error("cancel", NULL); } +#undef _FP_CANCELLED + return vec; + } + } + + /* Fast path: STR vec → SYM vec. Direct intern from each element's + * (ptr, len), no atom alloc or recursive cast. ray_sym_intern uses + * the table's coarse lock so this stays single-threaded — but it + * skips ~150 ns of overhead per row. */ + if (out_type == RAY_SYM && ray_is_vec(val) && val->type == RAY_STR) { + int64_t* ids = (int64_t*)ray_data(vec); + for (int64_t i = 0; i < n2; i++) { + size_t slen = 0; + const char* sp = ray_str_vec_get(val, i, &slen); + int64_t id = ray_sym_intern(sp ? sp : "", sp ? slen : 0); + if (id < 0) { ray_release(vec); return ray_error("oom", NULL); } + ids[i] = id; + } + ray_t* result = cast_vec_copy_nulls(vec, val); + if (RAY_IS_ERR(result)) return result; + return vec; + } + + void* out = ray_data(vec); + for (int64_t i = 0; i < n2; i++) { + int alloc = 0; + ray_t* elem = collection_elem(val, i, &alloc); + if (RAY_IS_ERR(elem)) { ray_release(vec); return elem; } + ray_t* cast = ray_cast_fn(type_sym, elem); + if (alloc) ray_release(elem); + if (RAY_IS_ERR(cast)) { ray_release(vec); return cast; } + switch (out_type) { + case RAY_I64: case RAY_TIMESTAMP: case RAY_SYM: + ((int64_t*)out)[i] = cast->i64; break; + case RAY_I32: case RAY_DATE: case RAY_TIME: + ((int32_t*)out)[i] = cast->i32; break; + case RAY_I16: ((int16_t*)out)[i] = cast->i16; break; + case RAY_U8: ((uint8_t*)out)[i] = cast->u8; break; + case RAY_F64: ((double*)out)[i] = cast->f64; break; + case RAY_BOOL: ((bool*)out)[i] = cast->b8; break; + default: break; + } + ray_release(cast); + } + ray_t* result = cast_vec_copy_nulls(vec, val); + if (RAY_IS_ERR(result)) return result; + return vec; +} + +ray_t* ray_cast_fn(ray_t* type_sym, ray_t* val) { + if (type_sym->type != -RAY_SYM) return ray_error("type", NULL); + /* Null propagation: casting a typed null atom produces a typed null of target type */ + if (ray_is_atom(val) && RAY_ATOM_IS_NULL(val)) { + ray_t* s2 = ray_sym_str(type_sym->i64); + if (!s2) return ray_error("domain", NULL); + const char* tn = ray_str_ptr(s2); + size_t tl = ray_str_len(s2); + int8_t tt = 0; + if (cast_match(tn, tl, "I64") || cast_match(tn, tl, "i64")) tt = -RAY_I64; + else if (cast_match(tn, tl, "I32") || cast_match(tn, tl, "i32")) tt = -RAY_I32; + else if (cast_match(tn, tl, "I16") || cast_match(tn, tl, "i16")) tt = -RAY_I16; + else if (cast_match(tn, tl, "U8") || cast_match(tn, tl, "u8")) tt = -RAY_U8; + else if (cast_match(tn, tl, "F64") || cast_match(tn, tl, "f64")) tt = -RAY_F64; + else if (cast_match(tn, tl, "BOOL") || cast_match(tn, tl, "bool") || cast_match(tn, tl, "B8") || cast_match(tn, tl, "b8")) tt = -RAY_BOOL; + else if (cast_match(tn, tl, "SYMBOL") || cast_match(tn, tl, "symbol") || cast_match(tn, tl, "sym")) tt = -RAY_SYM; + else if (cast_match(tn, tl, "DATE") || cast_match(tn, tl, "date")) tt = -RAY_DATE; + else if (cast_match(tn, tl, "TIME") || cast_match(tn, tl, "time")) tt = -RAY_TIME; + else if (cast_match(tn, tl, "TIMESTAMP") || cast_match(tn, tl, "timestamp")) tt = -RAY_TIMESTAMP; + else if (cast_match(tn, tl, "GUID") || cast_match(tn, tl, "guid")) tt = -RAY_GUID; + else if (cast_match(tn, tl, "STR") || cast_match(tn, tl, "str")) { ray_release(s2); return ray_str("", 0); } + ray_release(s2); + if (tt) return ray_typed_null(tt); + return ray_error("domain", NULL); + } + ray_t* s = ray_sym_str(type_sym->i64); + if (!s) return ray_error("domain", NULL); + const char* tname = ray_str_ptr(s); + size_t tlen = ray_str_len(s); + + /* Cast to I64 / i64 */ + if (cast_match(tname, tlen, "I64") || cast_match(tname, tlen, "i64")) { + ray_release(s); + if (val->type == -RAY_I64) { ray_retain(val); return val; } + if (val->type == -RAY_F64) return make_i64((int64_t)val->f64); + if (val->type == -RAY_BOOL) return make_i64(val->b8 ? 1 : 0); + if (val->type == -RAY_I32 || val->type == -RAY_DATE || val->type == -RAY_TIME) + return make_i64(val->i32); + if (val->type == -RAY_TIMESTAMP) return make_i64(val->i64); + if (val->type == -RAY_I16) return make_i64(val->i16); + if (val->type == -RAY_U8) return make_i64(val->u8); + if (val->type == -RAY_STR) { + const char* sp = ray_str_ptr(val); + if (!sp) return ray_error("domain", NULL); + char* end; + int64_t v = strtoll(sp, &end, 10); + if (end == sp) return ray_error("domain", NULL); + return make_i64(v); + } + /* Vector/list cast */ + if (ray_is_vec(val) || val->type == RAY_LIST) + return cast_vec_numeric(type_sym, val, RAY_I64); + return ray_error("type", NULL); + } + /* Cast to I32 / i32 */ + if (cast_match(tname, tlen, "I32") || cast_match(tname, tlen, "i32")) { + ray_release(s); + if (val->type == -RAY_I32) { ray_retain(val); return val; } + if (val->type == -RAY_BOOL) return ray_i32(val->b8 ? 1 : 0); + if (val->type == -RAY_U8) return ray_i32((int32_t)val->u8); + if (val->type == -RAY_I16) return ray_i32(val->i16); + if (val->type == -RAY_I64) return ray_i32((int32_t)val->i64); + if (val->type == -RAY_F64) return ray_i32((int32_t)val->f64); + if (val->type == -RAY_DATE || val->type == -RAY_TIME) return ray_i32(val->i32); + if (val->type == -RAY_TIMESTAMP) return ray_i32((int32_t)val->i64); + if (val->type == -RAY_STR) { + const char* sp = ray_str_ptr(val); char* end; + long v = strtol(sp, &end, 10); + if (end == sp) return ray_error("domain", NULL); + return ray_i32((int32_t)v); + } + /* Vector cast */ + if (ray_is_vec(val) || val->type == RAY_LIST) + return cast_vec_numeric(type_sym, val, RAY_I32); + return ray_error("type", NULL); + } + /* Cast to I16 / i16 */ + if (cast_match(tname, tlen, "I16") || cast_match(tname, tlen, "i16")) { + ray_release(s); + if (val->type == -RAY_I16) { ray_retain(val); return val; } + if (val->type == -RAY_BOOL) return ray_i16(val->b8 ? 1 : 0); + if (val->type == -RAY_U8) return ray_i16((int16_t)val->u8); + if (val->type == -RAY_I32) return ray_i16((int16_t)val->i32); + if (val->type == -RAY_I64) return ray_i16((int16_t)val->i64); + if (val->type == -RAY_F64) return ray_i16((int16_t)val->f64); + if (val->type == -RAY_DATE || val->type == -RAY_TIME) return ray_i16((int16_t)val->i32); + if (val->type == -RAY_TIMESTAMP) return ray_i16((int16_t)val->i64); + if (val->type == -RAY_STR) { + const char* sp = ray_str_ptr(val); char* end; + long v = strtol(sp, &end, 10); + if (end == sp) return ray_error("domain", NULL); + return ray_i16((int16_t)v); + } + /* Vector cast */ + if (ray_is_vec(val) || val->type == RAY_LIST) + return cast_vec_numeric(type_sym, val, RAY_I16); + return ray_error("type", NULL); + } + /* Cast to F64 / f64 */ + if (cast_match(tname, tlen, "F64") || cast_match(tname, tlen, "f64")) { + ray_release(s); + if (val->type == -RAY_F64) { ray_retain(val); return val; } + if (val->type == -RAY_BOOL) return make_f64(val->b8 ? 1.0 : 0.0); + if (val->type == -RAY_I64) return make_f64((double)val->i64); + if (val->type == -RAY_I32) return make_f64((double)val->i32); + if (val->type == -RAY_I16) return make_f64((double)val->i16); + if (val->type == -RAY_U8) return make_f64((double)val->u8); + if (val->type == -RAY_DATE || val->type == -RAY_TIME) return make_f64((double)val->i32); + if (val->type == -RAY_TIMESTAMP) return make_f64((double)val->i64); + if (val->type == -RAY_STR) { + const char* sp = ray_str_ptr(val); + if (!sp) return ray_error("domain", NULL); + char* end; + double v = strtod(sp, &end); + if (end == sp) return ray_error("domain", NULL); + return make_f64(v); + } + /* Vector cast */ + if (ray_is_vec(val) || val->type == RAY_LIST) + return cast_vec_numeric(type_sym, val, RAY_F64); + return ray_error("type", NULL); + } + /* Cast to B8/BOOL/b8 */ + if (cast_match(tname, tlen, "BOOL") || cast_match(tname, tlen, "B8") || cast_match(tname, tlen, "b8")) { + ray_release(s); + if (val->type == -RAY_BOOL) { ray_retain(val); return val; } + if (val->type == -RAY_I64) return make_bool(val->i64 != 0 ? 1 : 0); + if (val->type == -RAY_I32) return make_bool(val->i32 != 0 ? 1 : 0); + if (val->type == -RAY_I16) return make_bool(val->i16 != 0 ? 1 : 0); + if (val->type == -RAY_U8) return make_bool(val->u8 != 0 ? 1 : 0); + if (val->type == -RAY_F64) return make_bool(val->f64 != 0.0 ? 1 : 0); + if (val->type == -RAY_DATE) return make_bool(val->i32 != 0 ? 1 : 0); + if (val->type == -RAY_TIME) return make_bool(val->i32 != 0 ? 1 : 0); + if (val->type == -RAY_TIMESTAMP) return make_bool(val->i64 != 0 ? 1 : 0); + if (val->type == -RAY_STR) return make_bool(ray_str_len(val) > 0 ? 1 : 0); + /* Vector cast: b8/B8 */ + if (ray_is_vec(val) || val->type == RAY_LIST) + return cast_vec_numeric(type_sym, val, RAY_BOOL); + return ray_error("type", NULL); + } + /* Cast to STR/str */ + if (cast_match(tname, tlen, "STR") || cast_match(tname, tlen, "str")) { + ray_release(s); + if (val->type == -RAY_STR) { ray_retain(val); return val; } + if (val->type == -RAY_SYM) { + ray_t* sym_str = ray_sym_str(val->i64); + return sym_str ? sym_str : ray_str("", 0); + } + if (val->type == -RAY_I64) { + char buf[32]; int n2 = snprintf(buf, sizeof(buf), "%lld", (long long)val->i64); + return ray_str(buf, (size_t)n2); + } + if (val->type == -RAY_I32) { + char buf[32]; int n2 = snprintf(buf, sizeof(buf), "%d", (int)val->i32); + return ray_str(buf, (size_t)n2); + } + if (val->type == -RAY_I16) { + char buf[32]; int n2 = snprintf(buf, sizeof(buf), "%d", (int)val->i16); + return ray_str(buf, (size_t)n2); + } + if (val->type == -RAY_F64) { + double fv = val->f64; + if (fv == 0.0 && signbit(fv)) fv = 0.0; + char buf[32]; int n2 = snprintf(buf, sizeof(buf), "%g", fv); + return ray_str(buf, (size_t)n2); + } + if (val->type == -RAY_BOOL) { + return val->b8 ? ray_str("true", 4) : ray_str("false", 5); + } + /* Fallback: use ray_fmt for any other atom type */ + if (ray_is_atom(val)) { + ray_t* formatted = ray_fmt(val, 0); + if (formatted && !RAY_IS_ERR(formatted)) return formatted; + if (formatted) ray_release(formatted); + } + /* Vector/list -> STR vector: cast each element to string */ + if (ray_is_vec(val) || val->type == RAY_LIST) { + int64_t n2 = val->len; + ray_t* vec = ray_vec_new(RAY_STR, n2); + if (!vec || RAY_IS_ERR(vec)) return vec ? vec : ray_error("oom", NULL); + for (int64_t i = 0; i < n2; i++) { + int alloc = 0; + ray_t* elem = collection_elem(val, i, &alloc); + if (RAY_IS_ERR(elem)) { ray_release(vec); return elem; } + ray_t* cast = ray_cast_fn(type_sym, elem); + if (alloc) ray_release(elem); + if (RAY_IS_ERR(cast)) { ray_release(vec); return cast; } + const char* sp = ray_str_ptr(cast); + size_t slen = ray_str_len(cast); + vec = ray_str_vec_append(vec, sp ? sp : "", sp ? slen : 0); + ray_release(cast); + if (RAY_IS_ERR(vec)) return vec; + } + ray_t* result = cast_vec_copy_nulls(vec, val); + if (RAY_IS_ERR(result)) return result; + return vec; + } + return ray_error("type", NULL); + } + /* Cast to SYMBOL/sym */ + if (cast_match(tname, tlen, "SYMBOL") || cast_match(tname, tlen, "sym") || cast_match(tname, tlen, "symbol")) { + ray_release(s); + if (val->type == -RAY_SYM) { ray_retain(val); return val; } + if (val->type == -RAY_STR) { + const char* sp = ray_str_ptr(val); + size_t slen = ray_str_len(val); + int64_t id = ray_sym_intern(sp, slen); + return ray_sym(id); + } + /* Integer/bool atom -> symbol: convert to plain number string */ + if (ray_is_atom(val) && (is_numeric(val) || val->type == -RAY_BOOL)) { + char buf[64]; int n2; + if (val->type == -RAY_BOOL) n2 = snprintf(buf, sizeof(buf), "%d", (int)val->b8); + else if (val->type == -RAY_U8) n2 = snprintf(buf, sizeof(buf), "%u", (unsigned)val->u8); + else if (val->type == -RAY_I16) n2 = snprintf(buf, sizeof(buf), "%d", (int)val->i16); + else if (val->type == -RAY_I32) n2 = snprintf(buf, sizeof(buf), "%d", (int)val->i32); + else if (val->type == -RAY_F64) { + double fv = val->f64; + if (fv == 0.0 && signbit(fv)) fv = 0.0; + n2 = snprintf(buf, sizeof(buf), "%.17g", fv); + } + else n2 = snprintf(buf, sizeof(buf), "%lld", (long long)as_i64(val)); + if (n2 > 0) { + int64_t id = ray_sym_intern(buf, (size_t)n2); + return ray_sym(id); + } + } + /* Temporal/guid atom -> symbol: use ray_fmt for formatting */ + if (ray_is_atom(val) && (is_temporal(val) || val->type == -RAY_GUID)) { + ray_t* formatted = ray_fmt(val, 0); + if (formatted && !RAY_IS_ERR(formatted)) { + const char* sp = ray_str_ptr(formatted); + size_t slen = ray_str_len(formatted); + int64_t id = ray_sym_intern(sp, slen); + ray_release(formatted); + return ray_sym(id); + } + if (formatted) ray_release(formatted); + } + /* Vector cast: SYMBOL vec from other vecs */ + if (ray_is_vec(val) || val->type == RAY_LIST) + return cast_vec_numeric(type_sym, val, RAY_SYM); + return ray_error("type", NULL); + } + /* Cast to DATE/date */ + if (cast_match(tname, tlen, "DATE") || cast_match(tname, tlen, "date")) { + ray_release(s); + if (val->type == -RAY_DATE) { ray_retain(val); return val; } + if (val->type == -RAY_BOOL) return ray_date((int64_t)val->b8); + if (val->type == -RAY_U8) return ray_date((int64_t)val->u8); + if (val->type == -RAY_I16) return ray_date((int64_t)val->i16); + if (val->type == -RAY_I32) return ray_date((int64_t)val->i32); + if (val->type == -RAY_I64) return ray_date(val->i64); + if (val->type == -RAY_F64) return ray_date((int64_t)val->f64); + if (val->type == -RAY_TIME) return ray_date((int64_t)val->i32); + if (val->type == -RAY_TIMESTAMP) return ray_date(ts_days_floor(val->i64)); + if (val->type == -RAY_STR) { + /* Parse "YYYY.MM.DD" format */ + const char* sp = ray_str_ptr(val); + int y, m, d2; + if (sscanf(sp, "%d.%d.%d", &y, &m, &d2) != 3) return ray_error("domain", NULL); + int64_t days = 0; + { int ty; + for (ty = 2000; ty < y; ty++) days += (ty % 4 == 0 && (ty % 100 != 0 || ty % 400 == 0)) ? 366 : 365; + for (ty = y; ty < 2000; ty++) days -= (ty % 4 == 0 && (ty % 100 != 0 || ty % 400 == 0)) ? 366 : 365; + } + { static const int md2[] = {0,31,28,31,30,31,30,31,31,30,31,30,31}; + int leap = (y % 4 == 0 && (y % 100 != 0 || y % 400 == 0)); + for (int mi = 1; mi < m; mi++) days += md2[mi] + (mi == 2 && leap ? 1 : 0); + days += d2 - 1; + } + return ray_date(days); + } + /* Vector cast */ + if (ray_is_vec(val) || val->type == RAY_LIST) + return cast_vec_numeric(type_sym, val, RAY_DATE); + return ray_error("type", NULL); + } + /* Cast to TIME/time */ + if (cast_match(tname, tlen, "TIME") || cast_match(tname, tlen, "time")) { + ray_release(s); + if (val->type == -RAY_TIME) { ray_retain(val); return val; } + if (val->type == -RAY_BOOL) return ray_time((int64_t)val->b8); + if (val->type == -RAY_U8) return ray_time((int64_t)val->u8); + if (val->type == -RAY_I16) return ray_time((int64_t)val->i16); + if (val->type == -RAY_I32) return ray_time((int64_t)val->i32); + if (val->type == -RAY_I64) return ray_time(val->i64); + if (val->type == -RAY_F64) return ray_time((int64_t)val->f64); + if (val->type == -RAY_DATE) return ray_time((int64_t)val->i32); + if (val->type == -RAY_TIMESTAMP) + /* TIMESTAMP is ns since epoch; TIME stores ms-of-day. Use + * floor-mod (not C-style truncate-toward-zero %) so pre- + * 2000 timestamps give time-of-day in [0, 86_400_000) ms, + * matching wall-clock semantics. */ + return ray_time((int64_t)(ts_ns_in_day(val->i64) / 1000000LL)); + if (val->type == -RAY_STR) { + /* Parse "HH:MM:SS[.mmm]" */ + const char* sp = ray_str_ptr(val); + int th = 0, tm = 0, ts = 0, tms = 0; + int nr = sscanf(sp, "%d:%d:%d", &th, &tm, &ts); + if (nr < 2) return ray_error("domain", NULL); + const char* dot = strchr(sp, '.'); + if (dot) { + dot++; + char mbuf[4] = "000"; + int mi = 0; + while (*dot >= '0' && *dot <= '9' && mi < 3) mbuf[mi++] = *dot++; + tms = (int)strtol(mbuf, NULL, 10); + } + int32_t ms = (int32_t)th * 3600000 + (int32_t)tm * 60000 + (int32_t)ts * 1000 + tms; + return ray_time((int64_t)ms); + } + /* Vector cast */ + if (ray_is_vec(val) || val->type == RAY_LIST) + return cast_vec_numeric(type_sym, val, RAY_TIME); + return ray_error("type", NULL); + } + /* Cast to TIMESTAMP/timestamp */ + if (cast_match(tname, tlen, "TIMESTAMP") || cast_match(tname, tlen, "timestamp")) { + ray_release(s); + if (val->type == -RAY_TIMESTAMP) { ray_retain(val); return val; } + if (val->type == -RAY_BOOL) return ray_timestamp((int64_t)val->b8); + if (val->type == -RAY_U8) return ray_timestamp((int64_t)val->u8); + if (val->type == -RAY_I16) return ray_timestamp((int64_t)val->i16); + if (val->type == -RAY_I32) return ray_timestamp((int64_t)val->i32); + if (val->type == -RAY_I64) return ray_timestamp(val->i64); + if (val->type == -RAY_F64) return ray_timestamp((int64_t)val->f64); + if (val->type == -RAY_TIME) return ray_timestamp((int64_t)val->i32); + if (val->type == -RAY_DATE) { + int64_t days = val->i32; + return ray_timestamp(days * 24LL * 60 * 60 * 1000000000LL); + } + /* ISO string -> timestamp: "YYYY-MM-DD[T ]HH:MM:SS[.nnn...]" or "YYYY.MM.DDDHH:MM:SS.nnn..." */ + if (val->type == -RAY_STR) { + const char* sp = ray_str_ptr(val); + size_t sl = ray_str_len(val); + if (sl < 10) return ray_error("domain", NULL); + int y, m, d, hh = 0, mm = 0, ss = 0; + long long frac = 0; + /* Try both formats: YYYY-MM-DD and YYYY.MM.DD */ + int parsed = sscanf(sp, "%d-%d-%d", &y, &m, &d); + /* parse date: try YYYY-MM-DD then YYYY.MM.DD */ + if (parsed != 3) { + parsed = sscanf(sp, "%d.%d.%d", &y, &m, &d); + /* YYYY.MM.DD format */ + } + if (parsed != 3) return ray_error("domain", NULL); + /* Parse optional time part */ + if (sl > 10 && (sp[10] == 'T' || sp[10] == ' ' || sp[10] == 'D')) { + sscanf(sp + 11, "%d:%d:%d", &hh, &mm, &ss); + /* Parse fractional seconds */ + const char* dot = memchr(sp + 11, '.', sl - 11); + if (dot) { + dot++; + char fbuf[10] = "000000000"; + int fi = 0; + while (*dot >= '0' && *dot <= '9' && fi < 9) fbuf[fi++] = *dot++; + frac = strtoll(fbuf, NULL, 10); + } + } + /* Convert to days since 2000-01-01 */ + int64_t days = 0; + { int ty; + for (ty = 2000; ty < y; ty++) days += (ty % 4 == 0 && (ty % 100 != 0 || ty % 400 == 0)) ? 366 : 365; + for (ty = y; ty < 2000; ty++) days -= (ty % 4 == 0 && (ty % 100 != 0 || ty % 400 == 0)) ? 366 : 365; + } + { static const int md[] = {0,31,28,31,30,31,30,31,31,30,31,30,31}; + int leap = (y % 4 == 0 && (y % 100 != 0 || y % 400 == 0)); + for (int mi = 1; mi < m; mi++) days += md[mi] + (mi == 2 && leap ? 1 : 0); + days += d - 1; + } + int64_t ns = days * 86400000000000LL + (int64_t)hh * 3600000000000LL + + (int64_t)mm * 60000000000LL + (int64_t)ss * 1000000000LL + frac; + /* Handle timezone offset: Z, +HH:MM, -HH:MM, +HHMM, -HHMM */ + if (sl > 19) { + const char* tz = sp + 19; /* after YYYY-MM-DDTHH:MM:SS */ + /* Skip fractional seconds */ + if (tz < sp + sl && *tz == '.') { + tz++; + while (tz < sp + sl && *tz >= '0' && *tz <= '9') tz++; + } + if (tz < sp + sl) { + if (*tz == 'Z') { + /* UTC, no adjustment */ + } else if (*tz == '+' || *tz == '-') { + int tz_sign = (*tz == '+') ? 1 : -1; + int tz_hh = 0, tz_mm = 0; + tz++; + /* Parse HH:MM or HHMM */ + if (tz + 4 < sp + sl && tz[2] == ':') { + sscanf(tz, "%2d:%2d", &tz_hh, &tz_mm); + } else { + sscanf(tz, "%2d%2d", &tz_hh, &tz_mm); + } + int64_t tz_ns = ((int64_t)tz_hh * 3600 + (int64_t)tz_mm * 60) * 1000000000LL; + ns -= tz_sign * tz_ns; + } + } + } + return ray_timestamp(ns); + } + /* Vector cast */ + if (ray_is_vec(val) || val->type == RAY_LIST) + return cast_vec_numeric(type_sym, val, RAY_TIMESTAMP); + return ray_error("type", NULL); + } + /* Cast to GUID/guid */ + if (cast_match(tname, tlen, "GUID") || cast_match(tname, tlen, "guid")) { + ray_release(s); + if (val->type == -RAY_GUID) { ray_retain(val); return val; } + if (val->type == -RAY_STR) { + /* Parse UUID string: "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" */ + const char* sp = ray_str_ptr(val); + size_t sl = ray_str_len(val); + if (sl < 36) return ray_error("domain", NULL); + uint8_t bytes[16]; + const char* p = sp; + for (int bi = 0; bi < 16; bi++) { + if (*p == '-') p++; + char hi = *p++; + char lo = *p++; + int h = (hi >= 'a') ? hi - 'a' + 10 : (hi >= 'A') ? hi - 'A' + 10 : hi - '0'; + int l = (lo >= 'a') ? lo - 'a' + 10 : (lo >= 'A') ? lo - 'A' + 10 : lo - '0'; + bytes[bi] = (uint8_t)((h << 4) | l); + } + return ray_guid(bytes); + } + /* Vector of GUIDs: empty vector cast */ + if (ray_is_vec(val) && val->len == 0) + return ray_vec_new(RAY_GUID, 0); + /* List of strings -> GUID vector */ + if (val->type == RAY_LIST) { + int64_t n2 = val->len; + ray_t* vec = ray_vec_new(RAY_GUID, n2); + if (RAY_IS_ERR(vec)) return vec; + vec->len = n2; + uint8_t* data = (uint8_t*)ray_data(vec); + ray_t** items = (ray_t**)ray_data(val); + for (int64_t i = 0; i < n2; i++) { + ray_t* cast = ray_cast_fn(type_sym, items[i]); + if (RAY_IS_ERR(cast)) { ray_release(vec); return cast; } + if (cast->obj) memcpy(data + i * 16, ray_data(cast->obj), 16); + else memcpy(data + i * 16, ray_data(cast), 16); + ray_release(cast); + } + ray_t* result = cast_vec_copy_nulls(vec, val); + if (RAY_IS_ERR(result)) return result; + return vec; + } + return ray_error("type", NULL); + } + /* Cast to U8/u8 */ + if (cast_match(tname, tlen, "U8") || cast_match(tname, tlen, "u8")) { + ray_release(s); + if (val->type == -RAY_U8) { ray_retain(val); return val; } + if (val->type == -RAY_BOOL) return ray_u8(val->b8 ? 1 : 0); + if (val->type == -RAY_I16) return ray_u8((uint8_t)val->i16); + if (val->type == -RAY_I32) return ray_u8((uint8_t)val->i32); + if (val->type == -RAY_I64) return ray_u8((uint8_t)val->i64); + if (val->type == -RAY_F64) return ray_u8((uint8_t)val->f64); + if (val->type == -RAY_STR) { + const char* sp = ray_str_ptr(val); + char* end; long v = strtol(sp, &end, 10); + if (end == sp) return ray_error("domain", NULL); + return ray_u8((uint8_t)v); + } + /* Vector cast */ + if (ray_is_vec(val) || val->type == RAY_LIST) + return cast_vec_numeric(type_sym, val, RAY_U8); + return ray_error("type", NULL); + } + /* Cast to DICT */ + if (cast_match(tname, tlen, "DICT")) { + ray_release(s); + if (val->type == RAY_DICT) { ray_retain(val); return val; } + /* Table -> Dict */ + if (val->type == RAY_TABLE) { + int64_t ncols = ray_table_ncols(val); + ray_t* keys = ray_sym_vec_new(RAY_SYM_W64, ncols); + if (RAY_IS_ERR(keys)) return keys; + ray_t* vals = ray_list_new(ncols); + if (RAY_IS_ERR(vals)) { ray_release(keys); return vals; } + for (int64_t c = 0; c < ncols; c++) { + int64_t col_name = ray_table_col_name(val, c); + ray_t* col_val = ray_table_get_col_idx(val, c); + keys = ray_vec_append(keys, &col_name); + if (RAY_IS_ERR(keys)) { ray_release(vals); return keys; } + vals = ray_list_append(vals, col_val); + if (RAY_IS_ERR(vals)) { ray_release(keys); return vals; } + } + return ray_dict_new(keys, vals); + } + return ray_error("type", NULL); + } + /* Cast to TABLE */ + if (cast_match(tname, tlen, "TABLE")) { + ray_release(s); + if (val->type == RAY_TABLE) { ray_retain(val); return val; } + /* Dict -> Table */ + if (val->type == RAY_DICT) { + ray_t* dkeys = ray_dict_keys(val); + ray_t* dvals = ray_dict_vals(val); + int64_t ncols = dkeys ? dkeys->len : 0; + if (!dkeys || dkeys->type != RAY_SYM || !dvals || dvals->type != RAY_LIST) + return ray_error("type", NULL); + ray_t** col_ptrs = (ray_t**)ray_data(dvals); + ray_t* tbl = ray_table_new(ncols); + if (RAY_IS_ERR(tbl)) return tbl; + for (int64_t c = 0; c < ncols; c++) { + int64_t col_name = ray_read_sym(ray_data(dkeys), c, RAY_SYM, dkeys->attrs); + ray_t* col_val = col_ptrs[c]; + ray_retain(col_val); + tbl = ray_table_add_col(tbl, col_name, col_val); + ray_release(col_val); + if (RAY_IS_ERR(tbl)) return tbl; + } + return tbl; + } + return ray_error("type", NULL); + } + ray_release(s); + return ray_error("domain", NULL); +} + +/* (type val) — return the type code of a value */ +/* ray_type_name moved to internal.h */ + +ray_t* ray_type_fn(ray_t* val) { + if (!val || RAY_IS_NULL(val)) return ray_sym(ray_sym_intern("null", 4)); + const char* name = ray_type_name(val->type); + int64_t id = ray_sym_intern(name, strlen(name)); + return ray_sym(id); +} + +/* (read path) — read a file's contents as a string */ +ray_t* ray_read_file_fn(ray_t* path_obj) { + if (path_obj->type != -RAY_STR) return ray_error("type", NULL); + const char* path = ray_str_ptr(path_obj); + if (!path) return ray_error("domain", NULL); + FILE* fp = fopen(path, "rb"); + if (!fp) return ray_error("io", NULL); + fseek(fp, 0, SEEK_END); + long sz = ftell(fp); + fseek(fp, 0, SEEK_SET); + if (sz < 0) { fclose(fp); return ray_error("io", NULL); } + /* Use ray_alloc for the buffer */ + ray_t* buf = ray_alloc((size_t)sz + 1); + if (!buf || RAY_IS_ERR(buf)) { fclose(fp); return ray_error("oom", NULL); } + char* data = (char*)ray_data(buf); + size_t rd = fread(data, 1, (size_t)sz, fp); + fclose(fp); + data[rd] = '\0'; + ray_t* result = ray_str(data, rd); + ray_release(buf); + return result; +} + +/* (load path) — read and evaluate a Rayfall script file via mmap */ +ray_t* ray_load_file_fn(ray_t* path_obj) { + if (path_obj->type != -RAY_STR) return ray_error("type", NULL); + const char* path = ray_str_ptr(path_obj); + if (!path) return ray_error("domain", NULL); + size_t path_len = ray_str_len(path_obj); + +#if defined(RAY_OS_WINDOWS) + /* Windows: fall back to fread */ + FILE* fp = fopen(path, "r"); + if (!fp) return ray_error("io", NULL); + fseek(fp, 0, SEEK_END); + long sz = ftell(fp); + fseek(fp, 0, SEEK_SET); + if (sz < 0) { fclose(fp); return ray_error("io", NULL); } + if (sz == 0) { fclose(fp); return ray_i64(0); } + char* buf = (char*)malloc((size_t)sz + 1); + if (!buf) { fclose(fp); return ray_error("oom", NULL); } + size_t rd = fread(buf, 1, (size_t)sz, fp); + fclose(fp); + buf[rd] = '\0'; + + ray_t* nfo = ray_nfo_create(path, path_len, buf, rd); + ray_t* parsed = ray_parse_with_nfo(buf, nfo); + if (RAY_IS_ERR(parsed)) { ray_release(nfo); free(buf); return parsed; } + + ray_t* prev_nfo = ray_eval_get_nfo(); + ray_eval_set_nfo(nfo); + ray_t* result = ray_eval(parsed); + ray_eval_set_nfo(prev_nfo); + + ray_release(parsed); + ray_release(nfo); + free(buf); + return result; +#else + int fd = open(path, O_RDONLY); + if (fd < 0) return ray_error("io", NULL); + struct stat st; + if (fstat(fd, &st) < 0 || st.st_size < 0) { close(fd); return ray_error("io", NULL); } + size_t sz = (size_t)st.st_size; + if (sz == 0) { close(fd); return ray_i64(0); } + char* map = (char*)mmap(NULL, sz, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + if (map == MAP_FAILED) return ray_error("io", NULL); + /* Copy to NUL-terminated buffer -- mmap region may not have a trailing NUL */ + char* buf = (char*)malloc(sz + 1); + if (!buf) { munmap(map, sz); return ray_error("oom", NULL); } + memcpy(buf, map, sz); + buf[sz] = '\0'; + munmap(map, sz); + + ray_t* nfo = ray_nfo_create(path, path_len, buf, sz); + ray_t* parsed = ray_parse_with_nfo(buf, nfo); + if (RAY_IS_ERR(parsed)) { ray_release(nfo); free(buf); return parsed; } + + ray_t* prev_nfo = ray_eval_get_nfo(); + ray_eval_set_nfo(nfo); + ray_t* result = ray_eval(parsed); + ray_eval_set_nfo(prev_nfo); + + ray_release(parsed); + ray_release(nfo); + free(buf); + return result; +#endif +} + +/* (write path content) — write string to a file */ +ray_t* ray_write_file_fn(ray_t* path_obj, ray_t* content) { + if (path_obj->type != -RAY_STR) return ray_error("type", NULL); + if (content->type != -RAY_STR) return ray_error("type", NULL); + const char* path = ray_str_ptr(path_obj); + const char* data = ray_str_ptr(content); + size_t len = ray_str_len(content); + if (!path || !data) return ray_error("domain", NULL); + FILE* fp = fopen(path, "wb"); + if (!fp) return ray_error("io", NULL); + size_t written = fwrite(data, 1, len, fp); + fclose(fp); + if (written != len) return ray_error("io", NULL); + return make_i64(0); +} + +/* ══════════════════════════════════════════ + * Additional builtins (ported from rayforce) + * ══════════════════════════════════════════ */ + +/* (enlist a b c ...) -> typed vector from atoms */ +ray_t* ray_enlist_fn(ray_t** args, int64_t n) { + if (n == 0) return ray_vec_new(RAY_I64, 0); + /* Determine type from first arg */ + int8_t atype = args[0]->type; + bool homogeneous = true; + bool has_float = (atype == -RAY_F64); + bool has_int = (atype == -RAY_I64); + for (int64_t i = 1; i < n; i++) { + if (args[i]->type != atype) homogeneous = false; + if (args[i]->type == -RAY_F64) has_float = true; + if (args[i]->type == -RAY_I64) has_int = true; + } + /* Mixed int/float -> promote to f64 */ + if (!homogeneous && has_float && has_int) { + ray_t* vec = ray_vec_new(RAY_F64, n); + if (RAY_IS_ERR(vec)) return vec; + double* d = (double*)ray_data(vec); + for (int64_t i = 0; i < n; i++) + d[i] = (args[i]->type == -RAY_F64) ? args[i]->f64 : (double)args[i]->i64; + vec->len = n; + for (int64_t i = 0; i < n; i++) { + if (RAY_ATOM_IS_NULL(args[i])) + ray_vec_set_null(vec, i, true); + } + return vec; + } + if (homogeneous && atype < 0) { + int8_t vtype = -atype; + ray_t* vec = ray_vec_new(vtype, n); + if (RAY_IS_ERR(vec)) return vec; + switch (vtype) { + case RAY_I64: case RAY_TIMESTAMP: { + int64_t* d = (int64_t*)ray_data(vec); + for (int64_t i = 0; i < n; i++) d[i] = args[i]->i64; + break; + } + case RAY_F64: { + double* d = (double*)ray_data(vec); + for (int64_t i = 0; i < n; i++) d[i] = args[i]->f64; + break; + } + case RAY_I32: case RAY_DATE: case RAY_TIME: { + int32_t* d = (int32_t*)ray_data(vec); + for (int64_t i = 0; i < n; i++) d[i] = args[i]->i32; + break; + } + case RAY_I16: { + int16_t* d = (int16_t*)ray_data(vec); + for (int64_t i = 0; i < n; i++) d[i] = args[i]->i16; + break; + } + case RAY_BOOL: { + bool* d = (bool*)ray_data(vec); + for (int64_t i = 0; i < n; i++) d[i] = args[i]->b8; + break; + } + case RAY_SYM: { + int64_t* d = (int64_t*)ray_data(vec); + for (int64_t i = 0; i < n; i++) d[i] = args[i]->i64; + break; + } + case RAY_U8: { + uint8_t* d = (uint8_t*)ray_data(vec); + for (int64_t i = 0; i < n; i++) d[i] = args[i]->u8; + break; + } + case RAY_STR: { + ray_t* svec = ray_vec_new(RAY_STR, n); + if (RAY_IS_ERR(svec)) { ray_free(vec); return svec; } + for (int64_t i = 0; i < n; i++) { + svec = ray_str_vec_append(svec, ray_str_ptr(args[i]), ray_str_len(args[i])); + if (RAY_IS_ERR(svec)) return svec; + } + ray_free(vec); + return svec; + } + case RAY_GUID: { + uint8_t* d = (uint8_t*)ray_data(vec); + for (int64_t i = 0; i < n; i++) { + const uint8_t* gd = args[i]->obj ? (const uint8_t*)ray_data(args[i]->obj) : (const uint8_t*)ray_data(args[i]); + memcpy(d + i * 16, gd, 16); + } + break; + } + default: goto as_list; + } + vec->len = n; + for (int64_t i = 0; i < n; i++) { + if (RAY_ATOM_IS_NULL(args[i])) + ray_vec_set_null(vec, i, true); + } + return vec; + } +as_list:; + /* Heterogeneous -> list */ + ray_t* lst = ray_list_new((int32_t)n); + if (RAY_IS_ERR(lst)) return lst; + for (int64_t i = 0; i < n; i++) { + ray_retain(args[i]); + lst = ray_list_append(lst, args[i]); + ray_release(args[i]); + if (RAY_IS_ERR(lst)) return lst; + } + return lst; +} + +/* (dict keys vals) -> dict. Wraps two parallel containers as a [keys, + * vals] block. When vals is shorter than keys, the tail is filled with + * typed null I64. Both inputs are copied (refs retained) — caller keeps + * ownership of the originals. */ +ray_t* ray_dict_fn(ray_t* keys, ray_t* vals) { + if (!ray_is_vec(keys)) return ray_error("type", NULL); + int64_t n = keys->len; + + /* Hold a fresh ref to keys so ownership is transferred into the dict. */ + ray_retain(keys); + + /* Materialize vals as RAY_LIST of length n. */ + ray_t* vlist = ray_list_new(n); + if (RAY_IS_ERR(vlist)) { ray_release(keys); return vlist; } + for (int64_t i = 0; i < n; i++) { + ray_t* v; + int alloc = 0; + if (vals->type == RAY_LIST) { + v = (i < vals->len) ? ((ray_t**)ray_data(vals))[i] : NULL; + } else if (ray_is_vec(vals)) { + v = collection_elem(vals, i, &alloc); + } else { + v = vals; + } + if (v && !RAY_IS_ERR(v)) { + vlist = ray_list_append(vlist, v); + if (alloc) ray_release(v); + } else { + ray_t* null_val = ray_typed_null(-RAY_I64); + vlist = ray_list_append(vlist, null_val); + ray_release(null_val); + } + if (RAY_IS_ERR(vlist)) { ray_release(keys); return vlist; } + } + return ray_dict_new(keys, vlist); +} + +/* (nil? x) -> true if x is null */ +ray_t* ray_nil_fn(ray_t* x) { + if (!x || RAY_IS_NULL(x)) return ray_bool(true); + if (ray_is_atom(x) && RAY_ATOM_IS_NULL(x)) return ray_bool(true); + return ray_bool(false); +} + +/* (where bool-vec) -> indices of true values */ +ray_t* ray_where_fn(ray_t* x) { + if (!ray_is_vec(x) || x->type != RAY_BOOL) + return ray_error("type", NULL); + bool* data = (bool*)ray_data(x); + int64_t n = x->len; + /* Count trues */ + int64_t cnt = 0; + for (int64_t i = 0; i < n; i++) if (data[i]) cnt++; + ray_t* result = ray_vec_new(RAY_I64, cnt); + if (RAY_IS_ERR(result)) return result; + int64_t* out = (int64_t*)ray_data(result); + int64_t j = 0; + for (int64_t i = 0; i < n; i++) if (data[i]) out[j++] = i; + result->len = cnt; + return result; +} + +/* (group vec) -> dict mapping each unique value to its indices */ +/* --------------------------------------------------------------------------- + * Open-address hash set for ray_group_fn's scalar / GUID fast paths. + * + * Each slot holds either GHT_EMPTY or an already-allocated group index. + * Lookups compare keys by calling back into the caller with the stored + * group index — the caller already knows whether the key shape is a + * plain int64 (scalar) or 16 bytes of guid material in the source + * column. Load factor is capped at 0.5; grow on overflow. + * + * The table is ref-counted via ray_alloc so the main bookkeeping code + * can free it in one place on every exit path. + * ------------------------------------------------------------------------- */ + +#define GHT_EMPTY 0xFFFFFFFFu + +typedef struct group_ht_t { + ray_t* block; /* backing ray_alloc block */ + uint32_t* slots; /* cap entries */ + uint32_t cap; /* power of 2 */ + uint32_t mask; /* cap - 1 */ + uint32_t count; /* live entries */ +} group_ht_t; + +static bool group_ht_init(group_ht_t* h, uint32_t initial_cap) { + uint32_t cap = 16; + while (cap < initial_cap) cap *= 2; + h->block = ray_alloc((size_t)cap * sizeof(uint32_t)); + if (!h->block || RAY_IS_ERR(h->block)) { h->block = NULL; return false; } + h->slots = (uint32_t*)ray_data(h->block); + h->cap = cap; + h->mask = cap - 1; + h->count = 0; + for (uint32_t i = 0; i < cap; i++) h->slots[i] = GHT_EMPTY; + return true; +} + +static void group_ht_free(group_ht_t* h) { + if (h->block) ray_free(h->block); + h->block = NULL; + h->slots = NULL; + h->cap = h->mask = h->count = 0; +} + +/* Rehash callback: given the stored group index, return the hash for + * it. This lets us grow without recomputing raw keys — caller knows + * how to translate gi back to a key. */ +typedef uint64_t (*group_ht_gi_hash_fn)(uint32_t gi, void* ctx); + +static bool group_ht_grow(group_ht_t* h, group_ht_gi_hash_fn hash_gi, void* ctx) { + uint32_t new_cap = h->cap * 2; + if (new_cap < h->cap) return false; /* overflow */ + ray_t* new_block = ray_alloc((size_t)new_cap * sizeof(uint32_t)); + if (!new_block || RAY_IS_ERR(new_block)) return false; + uint32_t* new_slots = (uint32_t*)ray_data(new_block); + uint32_t new_mask = new_cap - 1; + for (uint32_t i = 0; i < new_cap; i++) new_slots[i] = GHT_EMPTY; + for (uint32_t i = 0; i < h->cap; i++) { + uint32_t gi = h->slots[i]; + if (gi == GHT_EMPTY) continue; + uint64_t hh = hash_gi(gi, ctx); + uint32_t slot = (uint32_t)(hh & new_mask); + while (new_slots[slot] != GHT_EMPTY) slot = (slot + 1) & new_mask; + new_slots[slot] = gi; + } + ray_free(h->block); + h->block = new_block; + h->slots = new_slots; + h->cap = new_cap; + h->mask = new_mask; + return true; +} + +static inline uint64_t mix64(uint64_t h) { + /* Murmur3 fmix64 */ + h ^= h >> 33; h *= 0xFF51AFD7ED558CCDULL; + h ^= h >> 33; h *= 0xC4CEB9FE1A85EC53ULL; + h ^= h >> 33; + return h; +} + +static inline uint64_t hash_guid(const uint8_t* g) { + uint64_t a, b; + memcpy(&a, g, 8); + memcpy(&b, g + 8, 8); + return mix64(a ^ (b * 0x9E3779B97F4A7C15ULL)); +} + +static inline uint64_t hash_i64(int64_t v) { + return mix64((uint64_t)v); +} + +/* Context for GUID rehash: the 16-byte source base and, indirectly, + * gvals — which stores the row_idx of the first occurrence per group. */ +typedef struct { + const uint8_t* base; + const int64_t* gvals; +} ght_guid_ctx_t; + +static uint64_t ght_guid_hash_gi(uint32_t gi, void* ctx) { + ght_guid_ctx_t* c = (ght_guid_ctx_t*)ctx; + return hash_guid(c->base + c->gvals[gi] * 16); +} + +typedef struct { const int64_t* gvals; } ght_i64_ctx_t; +static uint64_t ght_i64_hash_gi(uint32_t gi, void* ctx) { + ght_i64_ctx_t* c = (ght_i64_ctx_t*)ctx; + return hash_i64(c->gvals[gi]); +} + +/* Grow the per-group bookkeeping arrays used by ray_group_fn. + * Doubles capacity; copies existing entries; returns false on OOM. + * Caller is responsible for cleaning up and returning an error if this fails. */ +static bool group_grow(ray_t** val_block, ray_t** ivblock, + int64_t** gvals, ray_t*** idx_vecs, + int64_t cur_count, int64_t* max_groups) { + int64_t new_max = *max_groups * 2; + if (new_max <= *max_groups) return false; /* overflow */ + ray_t* new_val = ray_alloc((size_t)new_max * sizeof(int64_t)); + if (!new_val || RAY_IS_ERR(new_val)) return false; + ray_t* new_iv = ray_alloc((size_t)new_max * sizeof(ray_t*)); + if (!new_iv || RAY_IS_ERR(new_iv)) { ray_free(new_val); return false; } + memcpy(ray_data(new_val), *gvals, (size_t)cur_count * sizeof(int64_t)); + memcpy(ray_data(new_iv), *idx_vecs, (size_t)cur_count * sizeof(ray_t*)); + ray_free(*val_block); + ray_free(*ivblock); + *val_block = new_val; + *ivblock = new_iv; + *gvals = (int64_t*)ray_data(new_val); + *idx_vecs = (ray_t**)ray_data(new_iv); + *max_groups = new_max; + return true; +} + +ray_t* ray_group_fn(ray_t* x) { + if (!ray_is_vec(x) && x->type != RAY_LIST) + return ray_error("type", NULL); + int64_t n = x->len; + if (n == 0) { + ray_t* keys = ray_list_new(0); + if (RAY_IS_ERR(keys)) return keys; + ray_t* vals = ray_list_new(0); + if (RAY_IS_ERR(vals)) { ray_release(keys); return vals; } + return ray_dict_new(keys, vals); + } + + /* Collect unique values; the scalar and RAY_GUID paths grow these + * arrays on demand via group_grow(). The RAY_LIST and RAY_STR + * paths below still cap at this initial size (they have their own + * side buffers that aren't yet wired into group_grow); starting at + * 1024 preserves their prior behaviour. */ + int64_t max_groups = n < 1024 ? n : 1024; + ray_t* val_block = ray_alloc((size_t)(max_groups * sizeof(int64_t))); + if (RAY_IS_ERR(val_block)) return val_block; + int64_t* gvals = (int64_t*)ray_data(val_block); + + /* For each group, store indices in a separate i64 vector */ + ray_t** idx_vecs = NULL; + ray_t* ivblock = ray_alloc((size_t)(max_groups * sizeof(ray_t*))); + if (RAY_IS_ERR(ivblock)) { ray_free(val_block); return ivblock; } + idx_vecs = (ray_t**)ray_data(ivblock); + int64_t ngroups = 0; + + /* For LIST type, use atom_eq-based grouping with stored keys */ + if (x->type == RAY_LIST) { + ray_t** elems = (ray_t**)ray_data(x); + /* Store group keys as ray_t* pointers */ + ray_t* kblock = ray_alloc((size_t)(max_groups * sizeof(ray_t*))); + if (RAY_IS_ERR(kblock)) { ray_free(val_block); ray_free(ivblock); return kblock; } + ray_t** gkeys = (ray_t**)ray_data(kblock); + + for (int64_t i = 0; i < n; i++) { + ray_t* elem = elems[i]; + int64_t gi = -1; + for (int64_t g = 0; g < ngroups; g++) { + if (atom_eq(gkeys[g], elem)) { gi = g; break; } + } + if (gi < 0) { + if (ngroups >= max_groups) { + for (int64_t g = 0; g < ngroups; g++) ray_release(idx_vecs[g]); + ray_free(val_block); ray_free(ivblock); ray_free(kblock); + return ray_error("limit", NULL); + } + gi = ngroups++; + gkeys[gi] = elem; + idx_vecs[gi] = ray_vec_new(RAY_I64, 0); + } + idx_vecs[gi] = ray_vec_append(idx_vecs[gi], &i); + } + /* Build dict: keys as RAY_LIST (heterogeneous atoms), vals as + * RAY_LIST of I64 idx vectors. */ + ray_t* keys_lst = ray_list_new(ngroups); + if (RAY_IS_ERR(keys_lst)) { ray_free(kblock); goto gfail; } + ray_t* vals_lst = ray_list_new(ngroups); + if (RAY_IS_ERR(vals_lst)) { ray_release(keys_lst); ray_free(kblock); goto gfail; } + for (int64_t g = 0; g < ngroups; g++) { + keys_lst = ray_list_append(keys_lst, gkeys[g]); + if (RAY_IS_ERR(keys_lst)) { ray_release(vals_lst); ray_free(kblock); goto gfail; } + vals_lst = ray_list_append(vals_lst, idx_vecs[g]); + ray_release(idx_vecs[g]); + idx_vecs[g] = NULL; + if (RAY_IS_ERR(vals_lst)) { ray_release(keys_lst); ray_free(kblock); goto gfail; } + } + ray_free(val_block); ray_free(ivblock); ray_free(kblock); + return ray_dict_new(keys_lst, vals_lst); + } + + /* RAY_GUID: 16-byte fixed-width grouping via open-address hash set + * keyed on the guid bytes. Previously this was an O(N²) linear + * scan against every existing group, which made (group guid_col) + * and (select ... by: OrderId) on a 10M row table effectively + * infinite. */ + if (x->type == RAY_GUID) { + const uint8_t* base = (const uint8_t*)ray_data(x); + group_ht_t ht; + uint32_t seed_cap = (uint32_t)(n < 64 ? 64 : (n < 1048576 ? (n * 2) : 2097152)); + if (!group_ht_init(&ht, seed_cap)) { + ray_free(val_block); ray_free(ivblock); + return ray_error("oom", NULL); + } + ght_guid_ctx_t gctx = { .base = base, .gvals = gvals }; + ray_progress_update("group", "guid-scan", 0, (uint64_t)n); + for (int64_t i = 0; i < n; i++) { + if (((i) & 65535) == 0) { + if (ray_interrupted()) { + for (int64_t g = 0; g < ngroups; g++) ray_release(idx_vecs[g]); + group_ht_free(&ht); + ray_free(val_block); ray_free(ivblock); + return ray_error("cancel", "interrupted"); + } + ray_progress_update(NULL, NULL, (uint64_t)i, (uint64_t)n); + } + const uint8_t* cur = base + i * 16; + uint64_t h = hash_guid(cur); + uint32_t slot = (uint32_t)(h & ht.mask); + uint32_t gi_found = GHT_EMPTY; + while (ht.slots[slot] != GHT_EMPTY) { + uint32_t gi = ht.slots[slot]; + if (memcmp(base + gvals[gi] * 16, cur, 16) == 0) { + gi_found = gi; + break; + } + slot = (slot + 1) & ht.mask; + } + int64_t gi; + if (gi_found != GHT_EMPTY) { + gi = gi_found; + } else { + if (ngroups >= max_groups) { + if (!group_grow(&val_block, &ivblock, &gvals, &idx_vecs, + ngroups, &max_groups)) { + for (int64_t g = 0; g < ngroups; g++) ray_release(idx_vecs[g]); + group_ht_free(&ht); + ray_free(val_block); ray_free(ivblock); + return ray_error("oom", NULL); + } + gctx.gvals = gvals; + } + gi = ngroups++; + gvals[gi] = i; /* store row index of first occurrence */ + idx_vecs[gi] = ray_vec_new(RAY_I64, 0); + ht.slots[slot] = (uint32_t)gi; + ht.count++; + /* Grow at load factor 0.5 */ + if (ht.count * 2 > ht.cap) { + if (!group_ht_grow(&ht, ght_guid_hash_gi, &gctx)) { + for (int64_t g = 0; g < ngroups; g++) ray_release(idx_vecs[g]); + group_ht_free(&ht); + ray_free(val_block); ray_free(ivblock); + return ray_error("oom", NULL); + } + } + } + idx_vecs[gi] = ray_vec_append(idx_vecs[gi], &i); + } + group_ht_free(&ht); + /* Keys: dense GUID vector built from collected gvals; vals: LIST of idx vecs. */ + ray_t* keys_vec = ray_vec_new(RAY_GUID, ngroups); + if (RAY_IS_ERR(keys_vec)) goto gfail; + for (int64_t g = 0; g < ngroups; g++) + keys_vec = ray_vec_append(keys_vec, base + gvals[g] * 16); + if (RAY_IS_ERR(keys_vec)) goto gfail; + ray_t* vals_lst = ray_list_new(ngroups); + if (RAY_IS_ERR(vals_lst)) { ray_release(keys_vec); goto gfail; } + for (int64_t g = 0; g < ngroups; g++) { + vals_lst = ray_list_append(vals_lst, idx_vecs[g]); + ray_release(idx_vecs[g]); + idx_vecs[g] = NULL; + if (RAY_IS_ERR(vals_lst)) { ray_release(keys_vec); goto gfail; } + } + ray_free(val_block); ray_free(ivblock); + return ray_dict_new(keys_vec, vals_lst); + } + + /* RAY_STR: string-based grouping using ray_str_vec_get */ + if (x->type == RAY_STR) { + /* Store group keys as (ptr, len) pairs -- use a scratch block for strings */ + ray_t* skblock = ray_alloc((size_t)(max_groups * sizeof(ray_t*))); + if (RAY_IS_ERR(skblock)) { ray_free(val_block); ray_free(ivblock); return skblock; } + ray_t** str_keys = (ray_t**)ray_data(skblock); + + for (int64_t i = 0; i < n; i++) { + size_t slen = 0; + const char* sp = ray_str_vec_get(x, i, &slen); + + int64_t gi = -1; + for (int64_t g = 0; g < ngroups; g++) { + size_t gsl = ray_str_len(str_keys[g]); + const char* gsp = ray_str_ptr(str_keys[g]); + if (gsl == slen && (slen == 0 || memcmp(gsp, sp, slen) == 0)) { + gi = g; break; + } + } + if (gi < 0) { + if (ngroups >= max_groups) { + for (int64_t g = 0; g < ngroups; g++) { + ray_release(str_keys[g]); + ray_release(idx_vecs[g]); + } + ray_free(val_block); ray_free(ivblock); ray_free(skblock); + return ray_error("limit", NULL); + } + gi = ngroups++; + str_keys[gi] = ray_str(sp ? sp : "", slen); + idx_vecs[gi] = ray_vec_new(RAY_I64, 0); + } + idx_vecs[gi] = ray_vec_append(idx_vecs[gi], &i); + } + + /* Build dict: keys as RAY_STR vec from str_keys, vals as LIST of idx vecs. */ + ray_t* keys_vec = ray_vec_new(RAY_STR, ngroups); + if (RAY_IS_ERR(keys_vec)) { + for (int64_t g = 0; g < ngroups; g++) { + ray_release(str_keys[g]); + ray_release(idx_vecs[g]); + } + ray_free(val_block); ray_free(ivblock); ray_free(skblock); + return ray_error("domain", NULL); + } + for (int64_t g = 0; g < ngroups; g++) { + keys_vec = ray_str_vec_append(keys_vec, ray_str_ptr(str_keys[g]), ray_str_len(str_keys[g])); + ray_release(str_keys[g]); + } + ray_t* vals_lst = ray_list_new(ngroups); + if (RAY_IS_ERR(vals_lst)) { + ray_release(keys_vec); ray_free(skblock); goto gfail; + } + for (int64_t g = 0; g < ngroups; g++) { + vals_lst = ray_list_append(vals_lst, idx_vecs[g]); + ray_release(idx_vecs[g]); + idx_vecs[g] = NULL; + if (RAY_IS_ERR(vals_lst)) { ray_release(keys_vec); ray_free(skblock); goto gfail; } + } + ray_free(val_block); ray_free(ivblock); ray_free(skblock); + return ray_dict_new(keys_vec, vals_lst); + } + + /* Scalar fast path: every primitive-typed vector packs its group + * key into an int64 (sym id, raw integer, date/time/timestamp, bool). + * Use an open-address hash set so high-cardinality group-by stays + * linear in n rather than the historical O(N²) per-row linear scan. */ + group_ht_t ht; + uint32_t seed_cap = (uint32_t)(n < 64 ? 64 : (n < 1048576 ? (n * 2) : 2097152)); + if (!group_ht_init(&ht, seed_cap)) { + ray_free(val_block); ray_free(ivblock); + return ray_error("oom", NULL); + } + ght_i64_ctx_t sctx = { .gvals = gvals }; + /* Null routing: null inputs share the same storage value as a legitimate + * zero/sentinel (e.g. NULL_I64's atom stores i64=0, NULL_I32 stores + * i32=0). Without a separate null bucket the hash table would conflate + * `0Nl` with a real `0`, silently merging two semantically distinct + * groups. Track a single `null_gi` and route every null row there; + * non-null rows continue to use the value-keyed hash table. */ + int64_t null_gi = -1; + for (int64_t i = 0; i < n; i++) { + if (ray_vec_is_null(x, i)) { + if (null_gi < 0) { + if (ngroups >= max_groups) { + if (!group_grow(&val_block, &ivblock, &gvals, &idx_vecs, + ngroups, &max_groups)) { + for (int64_t g = 0; g < ngroups; g++) ray_release(idx_vecs[g]); + group_ht_free(&ht); + ray_free(val_block); ray_free(ivblock); + return ray_error("oom", NULL); + } + sctx.gvals = gvals; + } + null_gi = ngroups++; + gvals[null_gi] = 0; /* placeholder; key value set later */ + idx_vecs[null_gi] = ray_vec_new(RAY_I64, 0); + } + idx_vecs[null_gi] = ray_vec_append(idx_vecs[null_gi], &i); + continue; + } + int64_t v; + if (x->type == RAY_SYM || x->type == RAY_I64 || x->type == RAY_TIMESTAMP) + v = ((int64_t*)ray_data(x))[i]; + else if (x->type == RAY_I32 || x->type == RAY_DATE || x->type == RAY_TIME) + v = ((int32_t*)ray_data(x))[i]; + else if (x->type == RAY_I16) + v = ((int16_t*)ray_data(x))[i]; + else if (x->type == RAY_BOOL || x->type == RAY_U8) + v = ((uint8_t*)ray_data(x))[i]; + else if (x->type == RAY_F64 || x->type == RAY_F32) { + /* Hash by IEEE-754 bit pattern, not row index — the previous + * `v = i` fallback put every float row in its own group and + * the keys_vec build path then reinterpreted those row + * indices as floats. Two adjustments keep the bit-pattern + * approach consistent with atom_eq's IEEE semantics + * (`a->f64 == b->f64`): + * - +0.0 and -0.0 hash equal: canonicalise -0.0 to 0.0. + * - Each NaN is its own group (NaN != NaN under IEEE). + * Route NaN rows through the dedicated nan-group path + * below so the hash table never matches them. */ + double f = (x->type == RAY_F64) + ? ((double*)ray_data(x))[i] + : (double)((float*)ray_data(x))[i]; + if (f != f) { + /* NaN — own bucket per row, just like the null routing. */ + if (ngroups >= max_groups) { + if (!group_grow(&val_block, &ivblock, &gvals, &idx_vecs, + ngroups, &max_groups)) { + for (int64_t g = 0; g < ngroups; g++) ray_release(idx_vecs[g]); + group_ht_free(&ht); + ray_free(val_block); ray_free(ivblock); + return ray_error("oom", NULL); + } + sctx.gvals = gvals; + } + int64_t gi_nan = ngroups++; + memcpy(&gvals[gi_nan], &f, sizeof(f)); + idx_vecs[gi_nan] = ray_vec_new(RAY_I64, 0); + idx_vecs[gi_nan] = ray_vec_append(idx_vecs[gi_nan], &i); + continue; + } + if (f == 0.0) f = 0.0; /* canonicalise -0.0 → +0.0 */ + memcpy(&v, &f, sizeof(v)); + } else + v = i; + + uint64_t h = hash_i64(v); + uint32_t slot = (uint32_t)(h & ht.mask); + uint32_t gi_found = GHT_EMPTY; + while (ht.slots[slot] != GHT_EMPTY) { + uint32_t gi = ht.slots[slot]; + if (gvals[gi] == v) { gi_found = gi; break; } + slot = (slot + 1) & ht.mask; + } + int64_t gi; + if (gi_found != GHT_EMPTY) { + gi = gi_found; + } else { + if (ngroups >= max_groups) { + if (!group_grow(&val_block, &ivblock, &gvals, &idx_vecs, + ngroups, &max_groups)) { + for (int64_t g = 0; g < ngroups; g++) ray_release(idx_vecs[g]); + group_ht_free(&ht); + ray_free(val_block); ray_free(ivblock); + return ray_error("oom", NULL); + } + sctx.gvals = gvals; + } + gi = ngroups++; + gvals[gi] = v; + idx_vecs[gi] = ray_vec_new(RAY_I64, 0); + ht.slots[slot] = (uint32_t)gi; + ht.count++; + if (ht.count * 2 > ht.cap) { + if (!group_ht_grow(&ht, ght_i64_hash_gi, &sctx)) { + for (int64_t g = 0; g < ngroups; g++) ray_release(idx_vecs[g]); + group_ht_free(&ht); + ray_free(val_block); ray_free(ivblock); + return ray_error("oom", NULL); + } + } + } + idx_vecs[gi] = ray_vec_append(idx_vecs[gi], &i); + } + group_ht_free(&ht); + + /* Build dict: keys vec mirrors x's element type; vals LIST of idx vecs. */ + int8_t key_type = x->type; + ray_t* keys_vec; + if (key_type == RAY_SYM) keys_vec = ray_sym_vec_new(RAY_SYM_W64, ngroups); + else keys_vec = ray_vec_new(key_type, ngroups); + if (RAY_IS_ERR(keys_vec)) goto gfail; + + for (int64_t g = 0; g < ngroups; g++) { + switch (key_type) { + case RAY_SYM: + case RAY_I64: + case RAY_TIMESTAMP: { + int64_t v = gvals[g]; + keys_vec = ray_vec_append(keys_vec, &v); break; + } + case RAY_I32: + case RAY_DATE: + case RAY_TIME: { + int32_t v = (int32_t)gvals[g]; + keys_vec = ray_vec_append(keys_vec, &v); break; + } + case RAY_I16: { int16_t v = (int16_t)gvals[g]; keys_vec = ray_vec_append(keys_vec, &v); break; } + case RAY_BOOL: + case RAY_U8: { uint8_t v = (uint8_t)gvals[g]; keys_vec = ray_vec_append(keys_vec, &v); break; } + case RAY_F64: { + /* gvals[g] holds the IEEE-754 bit pattern packed by the + * row-loop above; reinterpret rather than int->double + * cast (which would produce 0.0/1.0/2.0… instead of the + * actual float values). */ + double v; + memcpy(&v, &gvals[g], sizeof(v)); + keys_vec = ray_vec_append(keys_vec, &v); + break; + } + case RAY_F32: { + double f; + memcpy(&f, &gvals[g], sizeof(f)); + float v = (float)f; + keys_vec = ray_vec_append(keys_vec, &v); + break; + } + default: keys_vec = ray_vec_append(keys_vec, &gvals[g]); break; + } + if (RAY_IS_ERR(keys_vec)) goto gfail; + /* If the source column had a null at any row in this group, mark + * the group's key as null so dict rendering / lookup can recover + * the null semantics (the integer-value key alone collides with a + * legitimate zero/sentinel value). All rows in a value-equality + * group share the same null-or-not status, so a single probe of + * the first row index suffices. */ + if (idx_vecs[g] && idx_vecs[g]->len > 0) { + int64_t first_row = ((int64_t*)ray_data(idx_vecs[g]))[0]; + if (ray_vec_is_null(x, first_row)) + ray_vec_set_null(keys_vec, g, true); + } + } + + ray_t* vals_lst = ray_list_new(ngroups); + if (RAY_IS_ERR(vals_lst)) { ray_release(keys_vec); goto gfail; } + for (int64_t g = 0; g < ngroups; g++) { + vals_lst = ray_list_append(vals_lst, idx_vecs[g]); + ray_release(idx_vecs[g]); + idx_vecs[g] = NULL; + if (RAY_IS_ERR(vals_lst)) { ray_release(keys_vec); goto gfail; } + } + ray_free(val_block); + ray_free(ivblock); + return ray_dict_new(keys_vec, vals_lst); + +gfail: + for (int64_t g = 0; g < ngroups; g++) + if (idx_vecs[g]) ray_release(idx_vecs[g]); + ray_free(val_block); + ray_free(ivblock); + return ray_error("domain", NULL); +} + +/* (concat a b) -> concatenate vectors/strings/dicts/tables */ +ray_t* ray_concat_fn(ray_t* a, ray_t* b) { + /* Helper: get string content from atom (STR or CHAR), stripping trailing nulls */ + { + int a_is_str = ray_is_atom(a) && ((-a->type) == RAY_STR); + int b_is_str = ray_is_atom(b) && ((-b->type) == RAY_STR); + if (a_is_str && b_is_str) { + const char *ap, *bp; + size_t la, lb; + ap = ray_str_ptr(a); la = ray_str_len(a); + bp = ray_str_ptr(b); lb = ray_str_len(b); + /* Strip trailing null bytes */ + while (la > 0 && ap[la - 1] == '\0') la--; + while (lb > 0 && bp[lb - 1] == '\0') lb--; + char buf[8192]; + if (la + lb > sizeof(buf)) return ray_error("limit", NULL); + memcpy(buf, ap, la); + memcpy(buf + la, bp, lb); + return ray_str(buf, la + lb); + } + } + /* Vector concat: same type — delegate to ray_vec_concat which handles + * null bitmap propagation, SYM width promotion, and STR pool merging. */ + if (ray_is_vec(a) && ray_is_vec(b) && a->type == b->type) + return ray_vec_concat(a, b); + /* Concat typed vec + boxed list or boxed list + typed vec -> boxed list */ + if ((ray_is_vec(a) && b->type == RAY_LIST) || (a->type == RAY_LIST && ray_is_vec(b))) { + ray_t* la = (a->type == RAY_LIST) ? a : NULL; + ray_t* lb = (b->type == RAY_LIST) ? b : NULL; + ray_t* va = ray_is_vec(a) ? a : NULL; + ray_t* vb = ray_is_vec(b) ? b : NULL; + int64_t na = a->len, nb = b->len; + ray_t* result = ray_alloc((na + nb) * sizeof(ray_t*)); + if (!result) return ray_error("oom", NULL); + result->type = RAY_LIST; + result->len = na + nb; + ray_t** out = (ray_t**)ray_data(result); + for (int64_t i = 0; i < na; i++) { + if (va) { + int alloc = 0; + out[i] = collection_elem(va, i, &alloc); + } else { + out[i] = ((ray_t**)ray_data(la))[i]; + ray_retain(out[i]); + } + } + for (int64_t i = 0; i < nb; i++) { + if (vb) { + int alloc = 0; + out[na + i] = collection_elem(vb, i, &alloc); + } else { + out[na + i] = ((ray_t**)ray_data(lb))[i]; + ray_retain(out[na + i]); + } + } + return result; + } + /* Boxed list concat */ + if (a->type == RAY_LIST && b->type == RAY_LIST) { + int64_t na = a->len, nb = b->len; + ray_t* result = ray_alloc((na + nb) * sizeof(ray_t*)); + if (!result) return ray_error("oom", NULL); + result->type = RAY_LIST; + result->len = na + nb; + ray_t** out = (ray_t**)ray_data(result); + ray_t** ae = (ray_t**)ray_data(a); + ray_t** be = (ray_t**)ray_data(b); + for (int64_t i = 0; i < na; i++) { ray_retain(ae[i]); out[i] = ae[i]; } + for (int64_t i = 0; i < nb; i++) { ray_retain(be[i]); out[na + i] = be[i]; } + return result; + } + /* Vector concat: mixed types -> boxed list (preserves original element types) */ + if (ray_is_vec(a) && ray_is_vec(b) && a->type != b->type) { + int64_t na = a->len, nb = b->len; + ray_t* result = ray_alloc((na + nb) * sizeof(ray_t*)); + if (!result) return ray_error("oom", NULL); + result->type = RAY_LIST; + result->len = na + nb; + ray_t** out = (ray_t**)ray_data(result); + for (int64_t i = 0; i < na; i++) { + int alloc = 0; + out[i] = collection_elem(a, i, &alloc); + /* collection_elem always allocates for typed vecs, so ownership transfers */ + } + for (int64_t i = 0; i < nb; i++) { + int alloc = 0; + out[na + i] = collection_elem(b, i, &alloc); + } + return result; + } + /* Atom + vector or vector + atom -> append */ + if (ray_is_atom(a) && ray_is_vec(b) && (-a->type) == b->type) { + int64_t nb = b->len; + int esz = ray_elem_size(b->type); + ray_t* result = ray_vec_new(b->type, 1 + nb); + if (RAY_IS_ERR(result)) return result; + /* Copy atom value as first element */ + switch (b->type) { + case RAY_I64: case RAY_TIMESTAMP: case RAY_SYM: + ((int64_t*)ray_data(result))[0] = a->i64; break; + case RAY_F64: + ((double*)ray_data(result))[0] = a->f64; break; + case RAY_I32: case RAY_DATE: case RAY_TIME: + ((int32_t*)ray_data(result))[0] = a->i32; break; + case RAY_I16: + ((int16_t*)ray_data(result))[0] = a->i16; break; + case RAY_BOOL: + ((bool*)ray_data(result))[0] = a->b8; break; + case RAY_U8: + ((uint8_t*)ray_data(result))[0] = a->u8; break; + case RAY_GUID: { + const uint8_t* gd = a->obj ? (const uint8_t*)ray_data(a->obj) : (const uint8_t*)ray_data((ray_t*)a); + memcpy(ray_data(result), gd, 16); break; + } + default: ray_free(result); return ray_error("type", NULL); + } + memcpy((char*)ray_data(result) + esz, ray_data(b), (size_t)(nb * esz)); + result->len = 1 + nb; + return result; + } + if (ray_is_vec(a) && ray_is_atom(b) && a->type == (-b->type)) { + int64_t na = a->len; + int esz = ray_elem_size(a->type); + ray_t* result = ray_vec_new(a->type, na + 1); + if (RAY_IS_ERR(result)) return result; + memcpy(ray_data(result), ray_data(a), (size_t)(na * esz)); + switch (a->type) { + case RAY_I64: case RAY_TIMESTAMP: case RAY_SYM: + ((int64_t*)ray_data(result))[na] = b->i64; break; + case RAY_F64: + ((double*)ray_data(result))[na] = b->f64; break; + case RAY_I32: case RAY_DATE: case RAY_TIME: + ((int32_t*)ray_data(result))[na] = b->i32; break; + case RAY_I16: + ((int16_t*)ray_data(result))[na] = b->i16; break; + case RAY_BOOL: + ((bool*)ray_data(result))[na] = b->b8; break; + case RAY_U8: + ((uint8_t*)ray_data(result))[na] = b->u8; break; + case RAY_GUID: { + const uint8_t* gd = b->obj ? (const uint8_t*)ray_data(b->obj) : (const uint8_t*)ray_data((ray_t*)b); + memcpy((uint8_t*)ray_data(result) + na * 16, gd, 16); break; + } + default: ray_free(result); return ray_error("type", NULL); + } + result->len = na + 1; + return result; + } + /* Atom + atom of same type -> 2-element vector */ + if (ray_is_atom(a) && ray_is_atom(b) && a->type == b->type && a->type != -RAY_STR) { + int8_t vtype = -(a->type); + ray_t* result = ray_vec_new(vtype, 2); + if (RAY_IS_ERR(result)) return result; + result->len = 2; + switch (vtype) { + case RAY_I64: case RAY_TIMESTAMP: case RAY_SYM: + ((int64_t*)ray_data(result))[0] = a->i64; + ((int64_t*)ray_data(result))[1] = b->i64; + break; + case RAY_F64: + ((double*)ray_data(result))[0] = a->f64; + ((double*)ray_data(result))[1] = b->f64; + break; + case RAY_I32: case RAY_DATE: case RAY_TIME: + ((int32_t*)ray_data(result))[0] = a->i32; + ((int32_t*)ray_data(result))[1] = b->i32; + break; + case RAY_I16: + ((int16_t*)ray_data(result))[0] = a->i16; + ((int16_t*)ray_data(result))[1] = b->i16; + break; + case RAY_BOOL: + ((bool*)ray_data(result))[0] = a->b8; + ((bool*)ray_data(result))[1] = b->b8; + break; + case RAY_U8: + ((uint8_t*)ray_data(result))[0] = a->u8; + ((uint8_t*)ray_data(result))[1] = b->u8; + break; + case RAY_GUID: { + const uint8_t* ga = a->obj ? (const uint8_t*)ray_data(a->obj) : (const uint8_t*)ray_data((ray_t*)a); + const uint8_t* gb = b->obj ? (const uint8_t*)ray_data(b->obj) : (const uint8_t*)ray_data((ray_t*)b); + memcpy(ray_data(result), ga, 16); + memcpy((uint8_t*)ray_data(result) + 16, gb, 16); + break; + } + default: ray_free(result); return ray_error("type", NULL); + } + return result; + } + /* Dict concat: merge — keys/vals from b overwrite a's. */ + if (a->type == RAY_DICT && b->type == RAY_DICT) { + ray_retain(a); + ray_t* out = a; + ray_t* bk = ray_dict_keys(b); + ray_t* bv = ray_dict_vals(b); + if (!bk || !bv) return out; + int64_t bn = bk->len; + for (int64_t i = 0; i < bn; i++) { + /* Synthesize a key atom view from bk and the value pointer from bv. */ + ray_t k_storage; memset(&k_storage, 0, sizeof(k_storage)); + ray_t* k = NULL; + if (bk->type == RAY_LIST) { + k = ((ray_t**)ray_data(bk))[i]; + } else if (bk->type == RAY_SYM) { + k_storage.type = -RAY_SYM; + k_storage.i64 = ray_read_sym(ray_data(bk), i, RAY_SYM, bk->attrs); + k = &k_storage; + } else if (bk->type == RAY_I64 || bk->type == RAY_TIMESTAMP) { + k_storage.type = -bk->type; + k_storage.i64 = ((int64_t*)ray_data(bk))[i]; + k = &k_storage; + } else { + /* Heterogeneous element types fall back to boxing via collection_elem. */ + int alloc = 0; + k = collection_elem(bk, i, &alloc); + ray_t* v; + if (bv->type == RAY_LIST) v = ((ray_t**)ray_data(bv))[i]; + else { int va = 0; v = collection_elem(bv, i, &va); (void)va; } + out = ray_dict_upsert(out, k, v); + if (alloc) ray_release(k); + if (!out || RAY_IS_ERR(out)) return out; + continue; + } + ray_t* v; + if (bv->type == RAY_LIST) v = ((ray_t**)ray_data(bv))[i]; + else { int va = 0; v = collection_elem(bv, i, &va); (void)va; } + out = ray_dict_upsert(out, k, v); + if (!out || RAY_IS_ERR(out)) return out; + } + return out; + } + /* Table concat: append rows */ + if (a->type == RAY_TABLE && b->type == RAY_TABLE) { + int64_t ncols_a = ray_table_ncols(a); + int64_t ncols_b = ray_table_ncols(b); + /* Match columns of a in b by name */ + ray_t* result = ray_table_new((int32_t)ncols_a); + if (RAY_IS_ERR(result)) return result; + for (int64_t c = 0; c < ncols_a; c++) { + int64_t col_name_a = ray_table_col_name(a, c); + ray_t* acol = ray_table_get_col_idx(a, c); + /* Find matching column in b by name */ + ray_t* bcol = NULL; + for (int64_t j = 0; j < ncols_b; j++) { + if (ray_table_col_name(b, j) == col_name_a) { + bcol = ray_table_get_col_idx(b, j); + break; + } + } + if (!bcol) { + /* Column not present in b — schema mismatch is a "value" + * error (the table values have incompatible columns), not + * a "domain" error (which semantically means out-of-range). */ + ray_release(result); + return ray_error("value", NULL); + } + /* Type check: columns must have the same type */ + if (acol->type != bcol->type) { + ray_release(result); + return ray_error("type", NULL); + } + ray_t* col = ray_concat_fn(acol, bcol); + if (RAY_IS_ERR(col)) { ray_release(result); return col; } + result = ray_table_add_col(result, col_name_a, col); + ray_release(col); + if (RAY_IS_ERR(result)) return result; + } + return result; + } + /* Atom + boxed list -> prepend atom to list */ + if (ray_is_atom(a) && b->type == RAY_LIST && b->type != RAY_DICT) { + int64_t nb = b->len; + ray_t* result = ray_alloc((1 + nb) * sizeof(ray_t*)); + if (!result) return ray_error("oom", NULL); + result->type = RAY_LIST; + result->len = 1 + nb; + ray_t** out = (ray_t**)ray_data(result); + ray_retain(a); + out[0] = a; + ray_t** be = (ray_t**)ray_data(b); + for (int64_t i = 0; i < nb; i++) { ray_retain(be[i]); out[1 + i] = be[i]; } + return result; + } + /* Boxed list + atom -> append atom to list */ + if (a->type == RAY_LIST && a->type != RAY_DICT && ray_is_atom(b)) { + int64_t na = a->len; + ray_t* result = ray_alloc((na + 1) * sizeof(ray_t*)); + if (!result) return ray_error("oom", NULL); + result->type = RAY_LIST; + result->len = na + 1; + ray_t** out = (ray_t**)ray_data(result); + ray_t** ae = (ray_t**)ray_data(a); + for (int64_t i = 0; i < na; i++) { ray_retain(ae[i]); out[i] = ae[i]; } + ray_retain(b); + out[na] = b; + return result; + } + /* Atom + atom of different types -> 2-element boxed list */ + if (ray_is_atom(a) && ray_is_atom(b) && a->type != b->type) { + ray_t* result = ray_alloc(2 * sizeof(ray_t*)); + if (!result) return ray_error("oom", NULL); + result->type = RAY_LIST; + result->len = 2; + ray_t** out = (ray_t**)ray_data(result); + ray_retain(a); out[0] = a; + ray_retain(b); out[1] = b; + return result; + } + return ray_error("type", NULL); +} + +/* (raze list-of-vecs) -> flattened vector */ +ray_t* ray_raze_fn(ray_t* x) { + /* Scalar passthrough */ + if (ray_is_atom(x)) { ray_retain(x); return x; } + /* Typed vector passthrough */ + if (ray_is_vec(x)) { ray_retain(x); return x; } + if (x->type != RAY_LIST) + return ray_error("type", NULL); + int64_t n = x->len; + if (n == 0) return ray_list_new(0); + ray_t** items = (ray_t**)ray_data(x); + /* Try to concat all items */ + ray_t* result = items[0]; + ray_retain(result); + for (int64_t i = 1; i < n; i++) { + ray_t* next = ray_concat_fn(result, items[i]); + ray_release(result); + if (RAY_IS_ERR(next)) return next; + result = next; + } + return result; +} + +/* (within vals [lo hi]) -> bool vector, true where lo <= val <= hi */ +ray_t* ray_within_fn(ray_t* vals, ray_t* range) { + if (!ray_is_vec(vals) || !ray_is_vec(range) || range->len != 2) + return ray_error("type", NULL); + int64_t n = vals->len; + ray_t* result = ray_vec_new(RAY_BOOL, n); + if (RAY_IS_ERR(result)) return result; + bool* out = (bool*)ray_data(result); + + if (vals->type == RAY_I64) { + int64_t* d = (int64_t*)ray_data(vals); + int64_t* r = (int64_t*)ray_data(range); + int64_t lo = r[0], hi = r[1]; + for (int64_t i = 0; i < n; i++) out[i] = (d[i] >= lo && d[i] <= hi); + } else if (vals->type == RAY_F64) { + double* d = (double*)ray_data(vals); + double* r = (double*)ray_data(range); + double lo = r[0], hi = r[1]; + for (int64_t i = 0; i < n; i++) out[i] = (d[i] >= lo && d[i] <= hi); + } else if (vals->type == RAY_I32 || vals->type == RAY_DATE || vals->type == RAY_TIME) { + int32_t* d = (int32_t*)ray_data(vals); + int32_t* r = (int32_t*)ray_data(range); + int32_t lo = r[0], hi = r[1]; + for (int64_t i = 0; i < n; i++) out[i] = (d[i] >= lo && d[i] <= hi); + } else { + ray_free(result); + return ray_error("type", NULL); + } + result->len = n; + return result; +} + +/* (div a b) -> float division (always returns f64) */ +ray_t* ray_fdiv_fn(ray_t* a, ray_t* b) { + if (!ray_is_atom(a) || !ray_is_atom(b)) return ray_error("type", NULL); + if (!is_numeric(a) || !is_numeric(b)) return ray_error("type", NULL); + /* Null propagation */ + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) return ray_typed_null(-RAY_F64); + double fa = as_f64(a), fb = as_f64(b); + if (fb == 0.0) return ray_typed_null(-RAY_F64); + return make_f64(fa / fb); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/cmp.c b/crates/rayforce-sys/vendor/rayforce/src/ops/cmp.c new file mode 100644 index 0000000..f0beae6 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/cmp.c @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "lang/internal.h" +#include "ops/ops.h" /* RAY_LAZY, ray_is_lazy, ray_lazy_materialize */ + +#include + +/* Helper: compare char atom vs string atom. + * Returns: -1 if no char/string pair, else memcmp-like result via *out. */ +int char_str_cmp(ray_t* a, ray_t* b, int *out) { + const char *ap, *bp; + size_t al, bl; + int a_cs = (a->type == -RAY_STR); + int b_cs = (b->type == -RAY_STR); + if (!a_cs || !b_cs) return -1; + ap = ray_str_ptr(a); al = ray_str_len(a); + bp = ray_str_ptr(b); bl = ray_str_len(b); + size_t mn = al < bl ? al : bl; + int c = memcmp(ap, bp, mn); + if (c != 0) { *out = c; return 0; } + *out = (al > bl) ? 1 : (al < bl) ? -1 : 0; + return 0; +} + +/* Lexicographic compare of two SYM atoms. Fast path: equal interned + * ids ⇒ identical text ⇒ 0, no global-table lookup. Slow path: pull + * the backing STR via ray_sym_str and delegate to ray_str_cmp, which + * uses the 12-byte SSO inline path for short symbols. + * + * Invariant: any valid SYM atom resolves to its interned string. A + * NULL from ray_sym_str means corruption (uninitialised intern table, + * out-of-range id, or evicted slot) — no defensible total order exists + * in that state. We assert and let the process abort rather than + * fabricate an answer (returning 0 silently collapses distinct symbols; + * returning ±1 by raw id invents a non-lexicographic ordering that + * still lies about the contract). Matches v1 behaviour, which also + * trusts the invariant (and would SIGSEGV via strcmp(NULL,...) if it + * broke). */ +int sym_atom_cmp(ray_t* a, ray_t* b) { + if (a->i64 == b->i64) return 0; + ray_t* sa = ray_sym_str(a->i64); + ray_t* sb = ray_sym_str(b->i64); + assert(sa && sb && "sym_atom_cmp: corrupted intern table — " + "valid SYM atom must resolve to interned string"); + int r = ray_str_cmp(sa, sb); + ray_release(sa); + ray_release(sb); + return r; +} + +/* Comparison */ +ray_t* ray_gt_fn(ray_t* a, ray_t* b) { + { int c; if (char_str_cmp(a, b, &c) == 0) return make_bool(c > 0 ? 1 : 0); } + if (a->type == -RAY_SYM && b->type == -RAY_SYM) + return make_bool(sym_atom_cmp(a, b) > 0 ? 1 : 0); + if (a->type == -RAY_GUID && b->type == -RAY_GUID) + return make_bool(memcmp(ray_data(a->obj), ray_data(b->obj), 16) > 0 ? 1 : 0); + /* Temporal comparison (same or cross-temporal via nanosecond conversion) */ + if (is_temporal(a) && is_temporal(b)) { + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) + return make_bool(RAY_ATOM_IS_NULL(b) && !RAY_ATOM_IS_NULL(a) ? 1 : 0); + return make_bool(temporal_as_ns(a) > temporal_as_ns(b) ? 1 : 0); + } + if (!is_numeric(a) || !is_numeric(b)) + return ray_error("type", "cannot compare %s and %s", + ray_type_name(a->type), ray_type_name(b->type)); + int na = RAY_ATOM_IS_NULL(a), nb = RAY_ATOM_IS_NULL(b); + if (na && nb) return make_bool(0); /* null == null → not > */ + if (na) return make_bool(0); /* null > X → false */ + if (nb) return make_bool(1); /* X > null → true */ + return make_bool(as_f64(a) > as_f64(b) ? 1 : 0); +} + +ray_t* ray_lt_fn(ray_t* a, ray_t* b) { + { int c; if (char_str_cmp(a, b, &c) == 0) return make_bool(c < 0 ? 1 : 0); } + if (a->type == -RAY_SYM && b->type == -RAY_SYM) + return make_bool(sym_atom_cmp(a, b) < 0 ? 1 : 0); + if (a->type == -RAY_GUID && b->type == -RAY_GUID) + return make_bool(memcmp(ray_data(a->obj), ray_data(b->obj), 16) < 0 ? 1 : 0); + if (is_temporal(a) && is_temporal(b)) { + if (RAY_ATOM_IS_NULL(a) || RAY_ATOM_IS_NULL(b)) + return make_bool(RAY_ATOM_IS_NULL(a) && !RAY_ATOM_IS_NULL(b) ? 1 : 0); + return make_bool(temporal_as_ns(a) < temporal_as_ns(b) ? 1 : 0); + } + if (!is_numeric(a) || !is_numeric(b)) + return ray_error("type", "cannot compare %s and %s", + ray_type_name(a->type), ray_type_name(b->type)); + int na = RAY_ATOM_IS_NULL(a), nb = RAY_ATOM_IS_NULL(b); + if (na && nb) return make_bool(0); /* null == null → not < */ + if (na) return make_bool(1); /* null < X → true */ + if (nb) return make_bool(0); /* X < null → false */ + return make_bool(as_f64(a) < as_f64(b) ? 1 : 0); +} + +ray_t* ray_gte_fn(ray_t* a, ray_t* b) { + { int c; if (char_str_cmp(a, b, &c) == 0) return make_bool(c >= 0 ? 1 : 0); } + if (a->type == -RAY_SYM && b->type == -RAY_SYM) + return make_bool(sym_atom_cmp(a, b) >= 0 ? 1 : 0); + if (a->type == -RAY_GUID && b->type == -RAY_GUID) + return make_bool(memcmp(ray_data(a->obj), ray_data(b->obj), 16) >= 0 ? 1 : 0); + if (is_temporal(a) && is_temporal(b)) { + if (RAY_ATOM_IS_NULL(a) && RAY_ATOM_IS_NULL(b)) return make_bool(1); + if (RAY_ATOM_IS_NULL(a)) return make_bool(0); + if (RAY_ATOM_IS_NULL(b)) return make_bool(1); + return make_bool(temporal_as_ns(a) >= temporal_as_ns(b) ? 1 : 0); + } + if (!is_numeric(a) || !is_numeric(b)) + return ray_error("type", "cannot compare %s and %s", + ray_type_name(a->type), ray_type_name(b->type)); + int na = RAY_ATOM_IS_NULL(a), nb = RAY_ATOM_IS_NULL(b); + if (na && nb) return make_bool(1); /* null == null → >= true */ + if (na) return make_bool(0); /* null >= X → false */ + if (nb) return make_bool(1); /* X >= null → true */ + return make_bool(as_f64(a) >= as_f64(b) ? 1 : 0); +} + +ray_t* ray_lte_fn(ray_t* a, ray_t* b) { + { int c; if (char_str_cmp(a, b, &c) == 0) return make_bool(c <= 0 ? 1 : 0); } + if (a->type == -RAY_SYM && b->type == -RAY_SYM) + return make_bool(sym_atom_cmp(a, b) <= 0 ? 1 : 0); + if (a->type == -RAY_GUID && b->type == -RAY_GUID) + return make_bool(memcmp(ray_data(a->obj), ray_data(b->obj), 16) <= 0 ? 1 : 0); + if (is_temporal(a) && is_temporal(b)) { + if (RAY_ATOM_IS_NULL(a) && RAY_ATOM_IS_NULL(b)) return make_bool(1); + if (RAY_ATOM_IS_NULL(a)) return make_bool(1); + if (RAY_ATOM_IS_NULL(b)) return make_bool(0); + return make_bool(temporal_as_ns(a) <= temporal_as_ns(b) ? 1 : 0); + } + if (!is_numeric(a) || !is_numeric(b)) + return ray_error("type", "cannot compare %s and %s", + ray_type_name(a->type), ray_type_name(b->type)); + int na = RAY_ATOM_IS_NULL(a), nb = RAY_ATOM_IS_NULL(b); + if (na && nb) return make_bool(1); /* null == null → <= true */ + if (na) return make_bool(1); /* null <= X → true */ + if (nb) return make_bool(0); /* X <= null → false */ + return make_bool(as_f64(a) <= as_f64(b) ? 1 : 0); +} + +/* Check if comparable (numeric or temporal) */ +int is_comparable(ray_t* x) { + return is_numeric(x) || is_temporal(x); +} + +ray_t* ray_eq_fn(ray_t* a, ray_t* b) { + /* Handle all null forms (C NULL, RAY_NULL_OBJ, typed null atoms) */ + int na = (!a || RAY_ATOM_IS_NULL(a)), nb = (!b || RAY_ATOM_IS_NULL(b)); + if (na && nb) return make_bool(1); + if (na || nb) return make_bool(0); + { int c; if (char_str_cmp(a, b, &c) == 0) return make_bool(c == 0 ? 1 : 0); } + if (a->type == -RAY_BOOL && b->type == -RAY_BOOL) + return make_bool(a->b8 == b->b8 ? 1 : 0); + if (a->type == -RAY_SYM && b->type == -RAY_SYM) + return make_bool(a->i64 == b->i64 ? 1 : 0); + if (a->type == -RAY_GUID && b->type == -RAY_GUID) + return make_bool(memcmp(ray_data(a->obj), ray_data(b->obj), 16) == 0 ? 1 : 0); + /* Temporal comparison (same or cross-temporal via nanosecond conversion) */ + if (is_temporal(a) && is_temporal(b)) + return make_bool(temporal_as_ns(a) == temporal_as_ns(b) ? 1 : 0); + if (!is_numeric(a) || !is_numeric(b)) return ray_error("type", NULL); + if (is_float_op(a, b)) + return make_bool(as_f64(a) == as_f64(b) ? 1 : 0); + return make_bool(as_i64(a) == as_i64(b) ? 1 : 0); +} + +ray_t* ray_neq_fn(ray_t* a, ray_t* b) { + /* Handle all null forms (C NULL, RAY_NULL_OBJ, typed null atoms) */ + int na = (!a || RAY_ATOM_IS_NULL(a)), nb = (!b || RAY_ATOM_IS_NULL(b)); + if (na && nb) return make_bool(0); + if (na || nb) return make_bool(1); + { int c; if (char_str_cmp(a, b, &c) == 0) return make_bool(c != 0 ? 1 : 0); } + if (a->type == -RAY_BOOL && b->type == -RAY_BOOL) + return make_bool(a->b8 != b->b8 ? 1 : 0); + if (a->type == -RAY_SYM && b->type == -RAY_SYM) + return make_bool(a->i64 != b->i64 ? 1 : 0); + if (a->type == -RAY_GUID && b->type == -RAY_GUID) + return make_bool(memcmp(ray_data(a->obj), ray_data(b->obj), 16) != 0 ? 1 : 0); + /* Temporal comparison (same or cross-temporal via nanosecond conversion) */ + if (is_temporal(a) && is_temporal(b)) + return make_bool(temporal_as_ns(a) != temporal_as_ns(b) ? 1 : 0); + if (!is_numeric(a) || !is_numeric(b)) return ray_error("type", NULL); + if (is_float_op(a, b)) + return make_bool(as_f64(a) != as_f64(b) ? 1 : 0); + return make_bool(as_i64(a) != as_i64(b) ? 1 : 0); +} + +/* Bool vector element-wise helpers to reduce duplication in and/or/not. */ +#define BOOL_VEC_BINOP(a, b, op) do { \ + int64_t n = a->len < b->len ? a->len : b->len; \ + ray_t* r = ray_vec_new(RAY_BOOL, n); \ + if (RAY_IS_ERR(r)) return r; \ + bool* da = (bool*)ray_data(a); \ + bool* db = (bool*)ray_data(b); \ + bool* dr = (bool*)ray_data(r); \ + for (int64_t i = 0; i < n; i++) dr[i] = da[i] op db[i]; \ + r->len = n; \ + return r; \ +} while(0) + +#define BOOL_VEC_SCALAR_L(vec, sv, op) do { \ + int64_t n = vec->len; \ + ray_t* r = ray_vec_new(RAY_BOOL, n); \ + if (RAY_IS_ERR(r)) return r; \ + bool* dv = (bool*)ray_data(vec); \ + bool* dr = (bool*)ray_data(r); \ + for (int64_t i = 0; i < n; i++) dr[i] = dv[i] op sv; \ + r->len = n; \ + return r; \ +} while(0) + +ray_t* ray_and_fn(ray_t* a, ray_t* b) { + /* Element-wise for bool vectors */ + if (ray_is_vec(a) && a->type == RAY_BOOL && ray_is_vec(b) && b->type == RAY_BOOL) + BOOL_VEC_BINOP(a, b, &&); + /* Scalar broadcast: vec and scalar */ + if (ray_is_vec(a) && a->type == RAY_BOOL && ray_is_atom(b)) + BOOL_VEC_SCALAR_L(a, is_truthy(b), &&); + if (ray_is_atom(a) && ray_is_vec(b) && b->type == RAY_BOOL) + BOOL_VEC_SCALAR_L(b, is_truthy(a), &&); + return make_bool((is_truthy(a) && is_truthy(b)) ? 1 : 0); +} + +ray_t* ray_or_fn(ray_t* a, ray_t* b) { + /* Element-wise for bool vectors */ + if (ray_is_vec(a) && a->type == RAY_BOOL && ray_is_vec(b) && b->type == RAY_BOOL) + BOOL_VEC_BINOP(a, b, ||); + /* Scalar broadcast */ + if (ray_is_vec(a) && a->type == RAY_BOOL && ray_is_atom(b)) + BOOL_VEC_SCALAR_L(a, is_truthy(b), ||); + if (ray_is_atom(a) && ray_is_vec(b) && b->type == RAY_BOOL) + BOOL_VEC_SCALAR_L(b, is_truthy(a), ||); + return make_bool((is_truthy(a) || is_truthy(b)) ? 1 : 0); +} + +/* Special-form variadic AND/OR with short-circuit (matches v1). + * + * `args` are UNEVALUATED AST nodes — registered with RAY_FN_SPECIAL_FORM + * so the evaluator hands us raw forms rather than computed values. We + * call ray_eval per arg ourselves and stop as soon as the result is + * determined: AND on first scalar falsy, OR on first scalar truthy. + * + * Mixed scalar+vector: when the running accumulator becomes a *scalar* + * with the determining truth value, we return it immediately — same + * shape as Lisp/Clojure where short-circuit yields the determinant. + * If the accumulator is a vector we cannot short-circuit (subsequent + * args may be vectors that still need element-wise combination), so we + * fall through to ray_and_fn / ray_or_fn for that step. */ +static ray_t* eval_and_short(ray_t* arg) { + ray_t* v = ray_eval(arg); + if (!v || RAY_IS_ERR(v)) return v; + if (ray_is_lazy(v)) v = ray_lazy_materialize(v); + return v; +} + +ray_t* ray_and_vary_fn(ray_t** args, int64_t n) { + if (n < 2) return ray_error("arity", "expected at least 2 args, got %lld", (long long)n); + ray_t* acc = eval_and_short(args[0]); + if (!acc || RAY_IS_ERR(acc)) return acc; + /* Short-circuit only when the running result is a *scalar* falsy. + * If acc is a vector, subsequent args still need element-wise + * combination (so `(and vec false)` broadcasts to all-false vector + * of acc's shape rather than a bare scalar). */ + if (ray_is_atom(acc) && !is_truthy(acc)) return acc; + for (int64_t i = 1; i < n; i++) { + ray_t* v = eval_and_short(args[i]); + if (!v || RAY_IS_ERR(v)) { ray_release(acc); return v; } + ray_t* next = ray_and_fn(acc, v); + ray_release(acc); + ray_release(v); + if (!next || RAY_IS_ERR(next)) return next; + acc = next; + if (ray_is_atom(acc) && !is_truthy(acc)) return acc; + } + return acc; +} + +ray_t* ray_or_vary_fn(ray_t** args, int64_t n) { + if (n < 2) return ray_error("arity", "expected at least 2 args, got %lld", (long long)n); + ray_t* acc = eval_and_short(args[0]); + if (!acc || RAY_IS_ERR(acc)) return acc; + /* Short-circuit only on scalar truthy accumulator (see AND comment). */ + if (ray_is_atom(acc) && is_truthy(acc)) return acc; + for (int64_t i = 1; i < n; i++) { + ray_t* v = eval_and_short(args[i]); + if (!v || RAY_IS_ERR(v)) { ray_release(acc); return v; } + ray_t* next = ray_or_fn(acc, v); + ray_release(acc); + ray_release(v); + if (!next || RAY_IS_ERR(next)) return next; + acc = next; + if (ray_is_atom(acc) && is_truthy(acc)) return acc; + } + return acc; +} + +/* Unary */ +ray_t* ray_not_fn(ray_t* x) { + /* Element-wise for bool vectors */ + if (ray_is_vec(x) && x->type == RAY_BOOL) { + int64_t n = x->len; + ray_t* r = ray_vec_new(RAY_BOOL, n); + if (RAY_IS_ERR(r)) return r; + bool* src = (bool*)ray_data(x); + bool* dr = (bool*)ray_data(r); + for (int64_t i = 0; i < n; i++) dr[i] = !src[i]; + r->len = n; + return r; + } + return make_bool(is_truthy(x) ? 0 : 1); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/collection.c b/crates/rayforce-sys/vendor/rayforce/src/ops/collection.c new file mode 100644 index 0000000..75783f8 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/collection.c @@ -0,0 +1,2040 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Collection / higher-order builtins — extracted from eval.c */ + +#include "lang/internal.h" +#include "core/types.h" +#include "core/pool.h" +#include "mem/sys.h" +#include "ops/hash.h" +#include +#include + +/* ══════════════════════════════════════════ + * Open-addressing hash set used by distinct/union/except/sect/in + * to replace O(n×m) linear scans with O(n+m) hash lookups. + * + * Slots store the row index of the first occurrence in a single vec, + * or HS_EMPTY. Hashing and equality are dispatched through a small + * vtable keyed by collection type so the same set can probe a typed + * vec or a boxed RAY_LIST without per-call boxing on the typed path. + * Nulls aggregate into a separate null-bucket flag, mirroring atom_eq + * (all nulls compare equal regardless of typed-null vs RAY_NULL_OBJ). + * + * Load factor capped at 0.5; growth doubles capacity. + * ══════════════════════════════════════════ */ + +#define HS_EMPTY ((int64_t)-1) + +typedef struct hashset_t { + int64_t* slots; /* cap entries; -1 = empty, else row index */ + ray_t* block; /* backing alloc for slots */ + int64_t cap; /* power of 2 */ + int64_t mask; /* cap - 1 */ + int64_t count; /* live entries (excl. null bucket) */ + int null_seen; /* 1 if any null has been recorded */ + int64_t null_idx; /* row index of first null encountered */ + /* Cached typed-vec data pointers to avoid re-derefs in hot loops. */ + ray_t* src; /* source vec or list */ + int8_t src_type; /* ray_t.type */ + bool src_has_nulls; + void* src_data; /* pointer to typed data (or RAY_LIST elements) */ +} hashset_t; + +/* Hash a single row at index i in src. Mirrors atom_eq's coercion + * rules: numeric types normalize through f64 so an I64 atom and an + * F64 atom holding the same value collide (boxed-list path only — a + * typed vec is homogeneous, so the dispatch picks one branch). */ +static uint64_t hs_hash_row(ray_t* src, int64_t i, int8_t t, void* data) { + switch (t) { + case RAY_I64: return ray_hash_i64(((const int64_t*)data)[i]); + case RAY_I32: return ray_hash_i64((int64_t)((const int32_t*)data)[i]); + case RAY_I16: return ray_hash_i64((int64_t)((const int16_t*)data)[i]); + case RAY_U8: return ray_hash_i64((int64_t)((const uint8_t*)data)[i]); + case RAY_BOOL: return ray_hash_i64((int64_t)((const bool*)data)[i]); + case RAY_F64: return ray_hash_f64(((const double*)data)[i]); + case RAY_DATE: return ray_hash_i64((int64_t)((const int32_t*)data)[i]); + case RAY_TIME: return ray_hash_i64((int64_t)((const int32_t*)data)[i]); + case RAY_TIMESTAMP: return ray_hash_i64(((const int64_t*)data)[i]); + case RAY_SYM: { + uint64_t s = ray_read_sym(data, i, src->type, src->attrs); + return ray_hash_i64((int64_t)s); + } + case RAY_GUID: + return ray_hash_bytes((const uint8_t*)data + i * 16, 16); + case RAY_STR: { + size_t l = 0; + const char* p = ray_str_vec_get(src, i, &l); + return p ? ray_hash_bytes(p, l) : 0; + } + case RAY_LIST: { + ray_t** elems = (ray_t**)data; + ray_t* e = elems[i]; + if (!e || RAY_ATOM_IS_NULL(e)) return 0; + /* Numeric coercion: hash as f64 so distinct numeric types + * holding the same value collide (atom_eq does the same). */ + if (is_numeric(e)) return ray_hash_f64(as_f64(e)); + switch (e->type) { + case -RAY_SYM: return ray_hash_i64(e->i64); + case -RAY_DATE: + case -RAY_TIME: return ray_hash_i64((int64_t)e->i32); + case -RAY_TIMESTAMP: return ray_hash_i64(e->i64); + case -RAY_GUID: { + const uint8_t* g = e->obj + ? (const uint8_t*)ray_data(e->obj) + : (const uint8_t*)ray_data((ray_t*)e); + return ray_hash_bytes(g, 16); + } + case -RAY_STR: + return ray_hash_bytes(ray_str_ptr(e), ray_str_len(e)); + default: + /* Unknown / unsupported atom kind: hash by type tag. */ + return ray_hash_i64((int64_t)e->type); + } + } + default: + return ray_hash_i64(i); + } +} + +/* Compare two rows for equality, dispatched the same way as the hash. */ +static int hs_eq_rows(ray_t* a_src, int64_t ai, int8_t at, void* a_data, + ray_t* b_src, int64_t bi, int8_t bt, void* b_data) { + if (at == bt && at != RAY_LIST) { + switch (at) { + case RAY_I64: return ((const int64_t*)a_data)[ai] == ((const int64_t*)b_data)[bi]; + case RAY_I32: return ((const int32_t*)a_data)[ai] == ((const int32_t*)b_data)[bi]; + case RAY_I16: return ((const int16_t*)a_data)[ai] == ((const int16_t*)b_data)[bi]; + case RAY_U8: return ((const uint8_t*)a_data)[ai] == ((const uint8_t*)b_data)[bi]; + case RAY_BOOL: return ((const bool*)a_data)[ai] == ((const bool*)b_data)[bi]; + case RAY_F64: return ((const double*)a_data)[ai] == ((const double*)b_data)[bi]; + case RAY_DATE: + case RAY_TIME: return ((const int32_t*)a_data)[ai] == ((const int32_t*)b_data)[bi]; + case RAY_TIMESTAMP: return ((const int64_t*)a_data)[ai] == ((const int64_t*)b_data)[bi]; + case RAY_SYM: { + uint64_t sa = ray_read_sym(a_data, ai, a_src->type, a_src->attrs); + uint64_t sb = ray_read_sym(b_data, bi, b_src->type, b_src->attrs); + return sa == sb; + } + case RAY_GUID: + return memcmp((const uint8_t*)a_data + ai * 16, + (const uint8_t*)b_data + bi * 16, 16) == 0; + case RAY_STR: { + size_t al = 0, bl = 0; + const char* ap = ray_str_vec_get(a_src, ai, &al); + const char* bp = ray_str_vec_get(b_src, bi, &bl); + if (!ap) ap = ""; + if (!bp) bp = ""; + return al == bl && memcmp(ap, bp, al) == 0; + } + } + } + /* Fall back to atom_eq via boxed values. Used for cross-type + * comparisons (e.g. except over typed I64 vs F64 vec) and the + * RAY_LIST path. collection_elem allocates a temporary atom for + * typed vecs; the cost is paid only on collisions / mixed types. */ + int alloc_a = 0, alloc_b = 0; + ray_t* a = collection_elem(a_src, ai, &alloc_a); + ray_t* b = collection_elem(b_src, bi, &alloc_b); + int eq = (a && b) ? atom_eq(a, b) : 0; + if (alloc_a && a) ray_release(a); + if (alloc_b && b) ray_release(b); + return eq; +} + +/* True if the row at index i in src is null. */ +static inline int hs_row_is_null(ray_t* src, int64_t i, void* data) { + if (src->type == RAY_LIST) { + ray_t* e = ((ray_t**)data)[i]; + return !e || RAY_ATOM_IS_NULL(e); + } + return (src->attrs & RAY_ATTR_HAS_NULLS) && ray_vec_is_null(src, i); +} + +static bool hashset_init(hashset_t* hs, ray_t* src, int64_t hint) { + int64_t cap = 16; + /* Cap target: 2× hint to keep load factor under 0.5. */ + while (cap < (hint > 0 ? hint * 2 : 16)) cap *= 2; + hs->block = ray_alloc((size_t)cap * sizeof(int64_t)); + if (!hs->block || RAY_IS_ERR(hs->block)) { hs->block = NULL; return false; } + hs->slots = (int64_t*)ray_data(hs->block); + for (int64_t i = 0; i < cap; i++) hs->slots[i] = HS_EMPTY; + hs->cap = cap; + hs->mask = cap - 1; + hs->count = 0; + hs->null_seen = 0; + hs->null_idx = HS_EMPTY; + hs->src = src; + hs->src_type = src ? src->type : 0; + hs->src_has_nulls = src ? ((src->attrs & RAY_ATTR_HAS_NULLS) != 0) : false; + hs->src_data = src ? ray_data(src) : NULL; + return true; +} + +static void hashset_destroy(hashset_t* hs) { + if (hs->block) { ray_release(hs->block); hs->block = NULL; } + hs->slots = NULL; +} + +static bool hashset_grow(hashset_t* hs) { + int64_t old_cap = hs->cap; + int64_t* old_slots = hs->slots; + int64_t new_cap = old_cap * 2; + if (new_cap < old_cap) return false; + ray_t* nb = ray_alloc((size_t)new_cap * sizeof(int64_t)); + if (!nb || RAY_IS_ERR(nb)) return false; + int64_t* ns = (int64_t*)ray_data(nb); + for (int64_t i = 0; i < new_cap; i++) ns[i] = HS_EMPTY; + int64_t mask = new_cap - 1; + for (int64_t i = 0; i < old_cap; i++) { + int64_t ridx = old_slots[i]; + if (ridx == HS_EMPTY) continue; + uint64_t h = hs_hash_row(hs->src, ridx, hs->src_type, hs->src_data); + int64_t s = (int64_t)(h & (uint64_t)mask); + while (ns[s] != HS_EMPTY) s = (s + 1) & mask; + ns[s] = ridx; + } + ray_release(hs->block); + hs->block = nb; + hs->slots = ns; + hs->cap = new_cap; + hs->mask = mask; + return true; +} + +/* Probe the set for the row (probe_src, probe_i). Returns the stored + * row index from the build-side vec on hit, HS_EMPTY on miss. */ +static int64_t hashset_find_xrow(hashset_t* hs, ray_t* probe_src, int64_t probe_i, + int8_t probe_type, void* probe_data) { + if (hs_row_is_null(probe_src, probe_i, probe_data)) + return hs->null_seen ? hs->null_idx : HS_EMPTY; + uint64_t h = hs_hash_row(probe_src, probe_i, probe_type, probe_data); + int64_t s = (int64_t)(h & (uint64_t)hs->mask); + while (hs->slots[s] != HS_EMPTY) { + int64_t stored = hs->slots[s]; + if (hs_eq_rows(probe_src, probe_i, probe_type, probe_data, + hs->src, stored, hs->src_type, hs->src_data)) + return stored; + s = (s + 1) & hs->mask; + } + return HS_EMPTY; +} + +/* qsort comparator state for distinct_sort_indices: thread-local so + * the standard qsort entry point can pull it without a context arg. + * Single-threaded VM-eval is the only caller, so TLS is fine. */ +static _Thread_local ray_t* g_dsort_src; +static _Thread_local int8_t g_dsort_type; +static _Thread_local const void* g_dsort_data; + +static int distinct_sort_cmp(const void* a, const void* b) { + int64_t ia = *(const int64_t*)a; + int64_t ib = *(const int64_t*)b; + switch (g_dsort_type) { + case RAY_I64: case RAY_TIMESTAMP: { + int64_t va = ((const int64_t*)g_dsort_data)[ia]; + int64_t vb = ((const int64_t*)g_dsort_data)[ib]; + return (va > vb) - (va < vb); + } + case RAY_I32: case RAY_DATE: case RAY_TIME: { + int32_t va = ((const int32_t*)g_dsort_data)[ia]; + int32_t vb = ((const int32_t*)g_dsort_data)[ib]; + return (va > vb) - (va < vb); + } + case RAY_I16: { + int16_t va = ((const int16_t*)g_dsort_data)[ia]; + int16_t vb = ((const int16_t*)g_dsort_data)[ib]; + return (va > vb) - (va < vb); + } + case RAY_U8: case RAY_BOOL: { + uint8_t va = ((const uint8_t*)g_dsort_data)[ia]; + uint8_t vb = ((const uint8_t*)g_dsort_data)[ib]; + return (va > vb) - (va < vb); + } + case RAY_F64: { + double va = ((const double*)g_dsort_data)[ia]; + double vb = ((const double*)g_dsort_data)[ib]; + return (va > vb) - (va < vb); + } + default: { + /* Fall back to boxed compare for less-common element kinds. */ + int alloc_a = 0, alloc_b = 0; + ray_t* a_e = collection_elem(g_dsort_src, ia, &alloc_a); + ray_t* b_e = collection_elem(g_dsort_src, ib, &alloc_b); + double va = a_e ? as_f64(a_e) : 0.0; + double vb = b_e ? as_f64(b_e) : 0.0; + if (alloc_a && a_e) ray_release(a_e); + if (alloc_b && b_e) ray_release(b_e); + return (va > vb) - (va < vb); + } + } +} + +/* Sort `count` indices by their numeric value in `src`. Preserves + * the existing `distinct` semantic of returning numeric output sorted. */ +static void distinct_sort_indices(ray_t* src, int64_t* idx, int64_t count) { + g_dsort_src = src; + g_dsort_type = src->type; + g_dsort_data = ray_data(src); + qsort(idx, (size_t)count, sizeof(int64_t), distinct_sort_cmp); +} + +/* Insert row i (from the set's build-side src) if absent. + * Returns true if newly inserted, false if duplicate. On grow OOM + * the set silently keeps the previous capacity (caller proceeds). */ +static bool hashset_insert(hashset_t* hs, int64_t i) { + if (hs_row_is_null(hs->src, i, hs->src_data)) { + if (hs->null_seen) return false; + hs->null_seen = 1; + hs->null_idx = i; + return true; + } + if (hs->count * 2 >= hs->cap) { + if (!hashset_grow(hs)) { /* fall through, may degrade */ } + } + uint64_t h = hs_hash_row(hs->src, i, hs->src_type, hs->src_data); + int64_t s = (int64_t)(h & (uint64_t)hs->mask); + while (hs->slots[s] != HS_EMPTY) { + int64_t stored = hs->slots[s]; + if (hs_eq_rows(hs->src, i, hs->src_type, hs->src_data, + hs->src, stored, hs->src_type, hs->src_data)) + return false; + s = (s + 1) & hs->mask; + } + hs->slots[s] = i; + hs->count++; + return true; +} + +/* ══════════════════════════════════════════ + * Higher-order functions + * ══════════════════════════════════════════ */ + +/* (map fn val vec) — apply binary fn(val, elem) to each element of vec. + * Also supports (map fn vec) for unary mapping. */ +ray_t* ray_map_fn(ray_t** args, int64_t n) { + if (n < 2) return ray_error("domain", NULL); + for (int64_t i = 0; i < n; i++) + if (ray_is_lazy(args[i])) args[i] = ray_lazy_materialize(args[i]); + + ray_t* fn = args[0]; + ray_t* _bx = NULL; + + if (n == 2) { + /* Unary map: (map fn vec) */ + ray_t* vec = unbox_vec_arg(args[1], &_bx); + if (RAY_IS_ERR(vec)) return vec; + if (!is_list(vec)) { if (_bx) ray_release(_bx); return ray_error("type", NULL); } + int64_t len = ray_len(vec); + ray_t* result = ray_alloc(len * sizeof(ray_t*)); + if (!result) { if (_bx) ray_release(_bx); return ray_error("oom", NULL); } + result->type = RAY_LIST; + result->len = len; + ray_t** out = (ray_t**)ray_data(result); + ray_t** elems = (ray_t**)ray_data(vec); + for (int64_t i = 0; i < len; i++) { + out[i] = call_fn1(fn, elems[i]); + if (RAY_IS_ERR(out[i])) { + for (int64_t j = 0; j < i; j++) ray_release(out[j]); + result->len = 0; ray_release(result); if (_bx) ray_release(_bx); + return out[i]; + } + } + if (_bx) ray_release(_bx); + return result; + } + + /* Binary map: (map fn val vec) — apply fn(val, elem) */ + ray_t* val = args[1]; + ray_t* vec = unbox_vec_arg(args[2], &_bx); + if (RAY_IS_ERR(vec)) return vec; + /* If vec is scalar, just call fn(val, vec) once */ + if (!is_list(vec)) { + if (_bx) ray_release(_bx); + return call_fn2(fn, val, args[2]); + } + int64_t len = ray_len(vec); + ray_t* result = ray_alloc(len * sizeof(ray_t*)); + if (!result) { if (_bx) ray_release(_bx); return ray_error("oom", NULL); } + result->type = RAY_LIST; + result->len = len; + ray_t** out = (ray_t**)ray_data(result); + ray_t** elems = (ray_t**)ray_data(vec); + for (int64_t i = 0; i < len; i++) { + out[i] = call_fn2(fn, val, elems[i]); + if (RAY_IS_ERR(out[i])) { + for (int64_t j = 0; j < i; j++) ray_release(out[j]); + result->len = 0; ray_release(result); if (_bx) ray_release(_bx); + return out[i]; + } + } + if (_bx) ray_release(_bx); + return result; +} + +/* (pmap fn val vec) — same as map, parallel not implemented yet (sequential fallback) */ +ray_t* ray_pmap_fn(ray_t** args, int64_t n) { + return ray_map_fn(args, n); +} + +/* (fold fn vec) or (fold fn init vec) — reduce with binary fn */ +ray_t* ray_fold_fn(ray_t** args, int64_t n) { + if (n < 2) return ray_error("domain", NULL); + for (int64_t i = 0; i < n; i++) + if (ray_is_lazy(args[i])) args[i] = ray_lazy_materialize(args[i]); + + ray_t* fn = args[0]; + ray_t* vec; + ray_t* acc; + ray_t* _bx = NULL; + if (n == 2) { + /* (fold fn vec) — use first element as initial value */ + vec = unbox_vec_arg(args[1], &_bx); + if (RAY_IS_ERR(vec)) return vec; + if (!is_list(vec)) { if (_bx) ray_release(_bx); return ray_error("type", NULL); } + int64_t len = ray_len(vec); + if (len == 0) { if (_bx) ray_release(_bx); return ray_error("domain", NULL); } + ray_t** elems = (ray_t**)ray_data(vec); + ray_retain(elems[0]); + acc = elems[0]; + for (int64_t i = 1; i < len; i++) { + ray_t* next = call_fn2(fn, acc, elems[i]); + ray_release(acc); + if (RAY_IS_ERR(next)) { if (_bx) ray_release(_bx); return next; } + acc = next; + } + if (_bx) ray_release(_bx); + return acc; + } + + /* (fold fn init vec) */ + ray_retain(args[1]); + acc = args[1]; + vec = unbox_vec_arg(args[2], &_bx); + if (RAY_IS_ERR(vec)) { ray_release(acc); return vec; } + if (!is_list(vec)) { ray_release(acc); if (_bx) ray_release(_bx); return ray_error("type", NULL); } + int64_t len = ray_len(vec); + ray_t** elems = (ray_t**)ray_data(vec); + for (int64_t i = 0; i < len; i++) { + ray_t* next = call_fn2(fn, acc, elems[i]); + ray_release(acc); + if (RAY_IS_ERR(next)) { if (_bx) ray_release(_bx); return next; } + acc = next; + } + if (_bx) ray_release(_bx); + return acc; +} + +/* (scan fn vec) — running fold, returns vector of partial results */ +ray_t* ray_scan_fn(ray_t** args, int64_t n) { + if (n < 2) return ray_error("domain", NULL); + for (int64_t i = 0; i < n; i++) + if (ray_is_lazy(args[i])) args[i] = ray_lazy_materialize(args[i]); + + ray_t* fn = args[0]; + ray_t* _bx = NULL; + ray_t* vec = unbox_vec_arg(args[1], &_bx); + if (RAY_IS_ERR(vec)) return vec; + if (!is_list(vec)) { if (_bx) ray_release(_bx); return ray_error("type", NULL); } + int64_t len = ray_len(vec); + if (len == 0) { + if (_bx) ray_release(_bx); + ray_t* result = ray_alloc(0); + if (!result) return ray_error("oom", NULL); + result->type = RAY_LIST; + result->len = 0; + return result; + } + + ray_t* result = ray_alloc(len * sizeof(ray_t*)); + if (!result) { if (_bx) ray_release(_bx); return ray_error("oom", NULL); } + result->type = RAY_LIST; + result->len = len; + ray_t** out = (ray_t**)ray_data(result); + ray_t** elems = (ray_t**)ray_data(vec); + + ray_retain(elems[0]); + out[0] = elems[0]; + for (int64_t i = 1; i < len; i++) { + out[i] = call_fn2(fn, out[i - 1], elems[i]); + if (RAY_IS_ERR(out[i])) { + for (int64_t j = 0; j < i; j++) ray_release(out[j]); + result->len = 0; ray_release(result); if (_bx) ray_release(_bx); + return out[i]; + } + } + if (_bx) ray_release(_bx); + return result; +} + +/* (filter vec mask) — filter vector by boolean mask */ +ray_t* ray_filter_fn(ray_t* vec, ray_t* mask) { + if (ray_is_lazy(vec)) vec = ray_lazy_materialize(vec); + if (ray_is_lazy(mask)) mask = ray_lazy_materialize(mask); + + /* Table filter: apply mask to each column */ + if (vec->type == RAY_TABLE && ray_is_vec(mask) && mask->type == RAY_BOOL) { + int64_t ncols = ray_table_ncols(vec); + int64_t nrows = ray_table_nrows(vec); + if (nrows != mask->len) return ray_error("length", NULL); + ray_t* result = ray_table_new(ncols); + if (RAY_IS_ERR(result)) return result; + for (int64_t c = 0; c < ncols; c++) { + int64_t cn = ray_table_col_name(vec, c); + ray_t* src_col = ray_table_get_col_idx(vec, c); + ray_t* filtered = ray_filter_fn(src_col, mask); + if (RAY_IS_ERR(filtered)) { ray_release(result); return filtered; } + result = ray_table_add_col(result, cn, filtered); + ray_release(filtered); + if (RAY_IS_ERR(result)) return result; + } + return result; + } + + /* String filter: STR atom + bool mask → filter characters */ + if (ray_is_atom(vec) && (-vec->type) == RAY_STR && ray_is_vec(mask) && mask->type == RAY_BOOL) { + const char* sp = ray_str_ptr(vec); + size_t slen = ray_str_len(vec); + int64_t mlen = mask->len; + if ((int64_t)slen != mlen) return ray_error("length", NULL); + bool* mb = (bool*)ray_data(mask); + int64_t count = 0; + for (int64_t i = 0; i < mlen; i++) if (mb[i]) count++; + char buf[8192]; + if ((size_t)count > sizeof(buf)) return ray_error("limit", NULL); + int64_t j = 0; + for (int64_t i = 0; i < mlen; i++) { + if (mb[i]) buf[j++] = sp[i]; + } + return ray_str(buf, (size_t)count); + } + + /* Fast path: typed vector + typed bool mask */ + if (ray_is_vec(vec) && ray_is_vec(mask) && mask->type == RAY_BOOL) { + int64_t len = vec->len; + int64_t mlen = mask->len; + if (len != mlen) return ray_error("length", NULL); + bool* mb = (bool*)ray_data(mask); + + /* Count true values */ + int64_t count = 0; + for (int64_t i = 0; i < len; i++) if (mb[i]) count++; + + int8_t vtype = vec->type; + int esz = ray_elem_size(vtype); + ray_t* result = ray_vec_new(vtype, count); + if (RAY_IS_ERR(result)) return result; + result->len = count; + char* src = (char*)ray_data(vec); + char* dst = (char*)ray_data(result); + int64_t j = 0; + for (int64_t i = 0; i < len; i++) { + if (mb[i]) { + memcpy(dst + j * esz, src + i * esz, esz); + if (ray_vec_is_null(vec, i)) + ray_vec_set_null(result, j, true); + j++; + } + } + return result; + } + + /* Fallback: boxed list path */ + ray_t *_bx1 = NULL, *_bx2 = NULL; + vec = unbox_vec_arg(vec, &_bx1); + if (RAY_IS_ERR(vec)) return vec; + mask = unbox_vec_arg(mask, &_bx2); + if (RAY_IS_ERR(mask)) { if (_bx1) ray_release(_bx1); return mask; } + if (!is_list(vec) || !is_list(mask)) { if (_bx1) ray_release(_bx1); if (_bx2) ray_release(_bx2); return ray_error("type", NULL); } + int64_t len = ray_len(vec); + int64_t mlen = ray_len(mask); + if (len != mlen) return ray_error("length", NULL); + + ray_t** velems = (ray_t**)ray_data(vec); + ray_t** melems = (ray_t**)ray_data(mask); + + /* Validate mask is all booleans */ + for (int64_t i = 0; i < len; i++) { + if (melems[i]->type != -RAY_BOOL) { if (_bx1) ray_release(_bx1); if (_bx2) ray_release(_bx2); return ray_error("type", NULL); } + } + /* Count true values */ + int64_t count = 0; + for (int64_t i = 0; i < len; i++) { + if (melems[i]->b8) count++; + } + + ray_t* result = ray_alloc(count * sizeof(ray_t*)); + if (!result) { if (_bx1) ray_release(_bx1); if (_bx2) ray_release(_bx2); return ray_error("oom", NULL); } + result->type = RAY_LIST; + result->len = count; + ray_t** out = (ray_t**)ray_data(result); + int64_t j = 0; + for (int64_t i = 0; i < len; i++) { + if (melems[i]->b8) { + ray_retain(velems[i]); + out[j++] = velems[i]; + } + } + if (_bx1) ray_release(_bx1); + if (_bx2) ray_release(_bx2); + return result; +} + +/* (apply fn vec1 vec2) — zip-apply fn element-wise over two vectors */ +ray_t* ray_apply_fn(ray_t** args, int64_t n) { + if (n < 3) return ray_error("domain", NULL); + for (int64_t i = 0; i < n; i++) + if (ray_is_lazy(args[i])) args[i] = ray_lazy_materialize(args[i]); + + ray_t* fn = args[0]; + + /* If both args are scalars, just call fn(a, b) once */ + if (ray_is_atom(args[1]) && ray_is_atom(args[2])) + return call_fn2(fn, args[1], args[2]); + + ray_t *_bx1 = NULL, *_bx2 = NULL; + ray_t* vec1 = unbox_vec_arg(args[1], &_bx1); + if (RAY_IS_ERR(vec1)) return vec1; + ray_t* vec2 = unbox_vec_arg(args[2], &_bx2); + if (RAY_IS_ERR(vec2)) { if (_bx1) ray_release(_bx1); return vec2; } + if (!is_list(vec1) || !is_list(vec2)) { if (_bx1) ray_release(_bx1); if (_bx2) ray_release(_bx2); return ray_error("type", NULL); } + int64_t len1 = ray_len(vec1); + int64_t len2 = ray_len(vec2); + int64_t len = len1 < len2 ? len1 : len2; + + ray_t* result = ray_alloc(len * sizeof(ray_t*)); + if (!result) { if (_bx1) ray_release(_bx1); if (_bx2) ray_release(_bx2); return ray_error("oom", NULL); } + result->type = RAY_LIST; + result->len = len; + ray_t** out = (ray_t**)ray_data(result); + ray_t** e1 = (ray_t**)ray_data(vec1); + ray_t** e2 = (ray_t**)ray_data(vec2); + + for (int64_t i = 0; i < len; i++) { + out[i] = call_fn2(fn, e1[i], e2[i]); + if (RAY_IS_ERR(out[i])) { + for (int64_t j = 0; j < i; j++) ray_release(out[j]); + result->len = 0; ray_release(result); if (_bx1) ray_release(_bx1); if (_bx2) ray_release(_bx2); + return out[i]; + } + } + if (_bx1) ray_release(_bx1); + if (_bx2) ray_release(_bx2); + return result; +} + +/* ══════════════════════════════════════════ + * Collection operations + * ══════════════════════════════════════════ */ + +/* Helper: compare two atoms for equality (value-based) */ +int atom_eq(ray_t* a, ray_t* b) { + int a_null = RAY_ATOM_IS_NULL(a); + int b_null = RAY_ATOM_IS_NULL(b); + if (a_null && b_null) return 1; + if (a_null || b_null) return 0; + if (a->type != b->type) { + if (is_numeric(a) && is_numeric(b)) + return as_f64(a) == as_f64(b); + return 0; + } + switch (a->type) { + case -RAY_I64: return a->i64 == b->i64; + case -RAY_I32: return a->i32 == b->i32; + case -RAY_I16: return a->i16 == b->i16; + case -RAY_U8: return a->u8 == b->u8; + case -RAY_F64: return a->f64 == b->f64; + case -RAY_BOOL: return a->b8 == b->b8; + case -RAY_SYM: return a->i64 == b->i64; + case -RAY_DATE: case -RAY_TIME: + return a->i32 == b->i32; + case -RAY_TIMESTAMP: + return a->i64 == b->i64; + case -RAY_GUID: { + const uint8_t* ga = a->obj ? (const uint8_t*)ray_data(a->obj) : (const uint8_t*)ray_data((ray_t*)a); + const uint8_t* gb = b->obj ? (const uint8_t*)ray_data(b->obj) : (const uint8_t*)ray_data((ray_t*)b); + return memcmp(ga, gb, 16) == 0; + } + case -RAY_STR: + return ray_str_len(a) == ray_str_len(b) && + memcmp(ray_str_ptr(a), ray_str_ptr(b), ray_str_len(a)) == 0; + default: + /* Vector equality: same type and length, element-wise comparison */ + if (a->type > 0 && a->type == b->type && a->len == b->len) { + int esz = ray_elem_size(a->type); + return memcmp(ray_data(a), ray_data(b), (size_t)(a->len * esz)) == 0; + } + return 0; + } +} + +/* Forward declaration */ +ray_t* list_to_typed_vec(ray_t* list, int8_t orig_vec_type); + +/* (distinct x) — remove duplicates. Dispatches on type: + * table → deduplicate rows (via DAG GROUP with zero aggs) + * vector → remove duplicate elements, preserving first occurrence + * string → unique chars, sorted */ +ray_t* ray_distinct_fn(ray_t* x) { + if (ray_is_lazy(x)) x = ray_lazy_materialize(x); + + /* Table distinct: dispatch to table-specific implementation */ + if (x->type == RAY_TABLE) + return ray_table_distinct_fn(x); + + /* String distinct: unique chars, sorted */ + if (ray_is_atom(x) && (-x->type) == RAY_STR) { + const char* sp = ray_str_ptr(x); + size_t slen = ray_str_len(x); + if (slen == 0) { ray_retain(x); return x; } + char uniq[256]; + int nu = 0; + for (size_t i = 0; i < slen; i++) { + int dup = 0; + for (int j = 0; j < nu; j++) { if (uniq[j] == sp[i]) { dup = 1; break; } } + if (!dup && nu < 256) uniq[nu++] = sp[i]; + } + /* Sort */ + for (int i = 0; i < nu - 1; i++) + for (int j = i + 1; j < nu; j++) + if ((unsigned char)uniq[i] > (unsigned char)uniq[j]) { char t = uniq[i]; uniq[i] = uniq[j]; uniq[j] = t; } + return ray_str(uniq, (size_t)nu); + } + + /* Typed vector path: deduplicate via hash set in O(n). + * The previous nested-loop scan was O(n^2); for a 100k vec it ran + * for ~3 minutes. */ + if (ray_is_vec(x)) { + int64_t len = ray_len(x); + if (len == 0) { ray_retain(x); return x; } + + int64_t idx_stack[256]; + int64_t* idx = (len <= 256) ? idx_stack : (int64_t*)ray_sys_alloc((size_t)len * sizeof(int64_t)); + if (!idx) return ray_error("oom", NULL); + + hashset_t hs; + if (!hashset_init(&hs, x, len)) { + if (idx != idx_stack) ray_sys_free(idx); + return ray_error("oom", NULL); + } + int64_t count = 0; + for (int64_t i = 0; i < len; i++) { + if (hashset_insert(&hs, i)) idx[count++] = i; + } + hashset_destroy(&hs); + + /* Sort unique indices by value for numeric/temporal types — preserves + * pre-existing distinct semantics. qsort-based; was O(count^2). */ + if (x->type != RAY_SYM && x->type != RAY_GUID && x->type != RAY_STR && count > 1) { + distinct_sort_indices(x, idx, count); + } + + ray_t* result = gather_by_idx(x, idx, count); + if (idx != idx_stack) ray_sys_free(idx); + return result; + } + + ray_t* _bx = NULL; + x = unbox_vec_arg(x, &_bx); + if (RAY_IS_ERR(x)) return x; + if (!is_list(x)) { if (_bx) ray_release(_bx); return ray_error("type", NULL); } + int64_t len = ray_len(x); + if (len == 0) { if (_bx) ray_release(_bx); ray_retain(x); return x; } + ray_t** elems = (ray_t**)ray_data(x); + + ray_t* result = ray_alloc(len * sizeof(ray_t*)); + if (!result) { if (_bx) ray_release(_bx); return ray_error("oom", NULL); } + result->type = RAY_LIST; + ray_t** out = (ray_t**)ray_data(result); + int64_t count = 0; + + for (int64_t i = 0; i < len; i++) { + int dup = 0; + for (int64_t j = 0; j < count; j++) { + if (atom_eq(out[j], elems[i])) { dup = 1; break; } + } + if (!dup) { + ray_retain(elems[i]); + out[count++] = elems[i]; + } + } + result->len = count; + /* Sort: atoms before vectors (scalars have negative type) */ + for (int64_t i = 0; i < count - 1; i++) { + for (int64_t j = i + 1; j < count; j++) { + int ai = ray_is_atom(out[i]); + int aj = ray_is_atom(out[j]); + if (!ai && aj) { + ray_t* tmp = out[i]; out[i] = out[j]; out[j] = tmp; + } + } + } + if (_bx) ray_release(_bx); + return result; +} + +/* (in val vec) — check membership */ +ray_t* ray_in_fn(ray_t* val, ray_t* vec) { + if (ray_is_lazy(val)) val = ray_lazy_materialize(val); + if (ray_is_lazy(vec)) vec = ray_lazy_materialize(vec); + /* STR in STR: for each char of val, check membership in vec string */ + if (ray_is_atom(val) && (-val->type) == RAY_STR && ray_is_atom(vec) && (-vec->type) == RAY_STR) { + const char* vp = ray_str_ptr(val); + size_t vlen = ray_str_len(val); + const char* sp = ray_str_ptr(vec); + size_t slen = ray_str_len(vec); + ray_t* result = ray_vec_new(RAY_BOOL, (int64_t)vlen); + if (RAY_IS_ERR(result)) return result; + result->len = (int64_t)vlen; + bool* out = (bool*)ray_data(result); + for (size_t i = 0; i < vlen; i++) { + out[i] = false; + for (size_t j = 0; j < slen; j++) { + if (vp[i] == sp[j]) { out[i] = true; break; } + } + } + return result; + } + /* Scalar in scalar: equality check */ + if (ray_is_atom(val) && ray_is_atom(vec)) + return make_bool(atom_eq(val, vec) ? 1 : 0); + /* STR in LIST: for each char of val, check membership in list elements */ + if (ray_is_atom(val) && (-val->type) == RAY_STR && (vec->type == RAY_LIST)) { + const char* vp = ray_str_ptr(val); + size_t vlen = ray_str_len(val); + ray_t* result = ray_vec_new(RAY_BOOL, (int64_t)vlen); + if (RAY_IS_ERR(result)) return result; + result->len = (int64_t)vlen; + bool* out_b = (bool*)ray_data(result); + ray_t** list_elems = (ray_t**)ray_data(vec); + int64_t list_len = ray_len(vec); + for (size_t i = 0; i < vlen; i++) { + out_b[i] = false; + ray_t* ch = ray_str(&vp[i], 1); + for (int64_t j = 0; j < list_len; j++) { + if (atom_eq(ch, list_elems[j])) { out_b[i] = true; break; } + } + ray_release(ch); + } + return result; + } + /* Vector val: map in over each element */ + if (is_collection(val) && !ray_is_atom(val)) { + int64_t vlen = ray_len(val); + if (vlen == 0) { + /* Empty collection → return empty list */ + return ray_list_new(0); + } + /* Hash-based fast path: both sides are typed vecs of compatible + * shape and the result is a row-aligned bool vec. Build a + * hashset over `vec` once, probe per element of `val`. Was + * O(len(val)×len(vec)); now O(len(val)+len(vec)). */ + if (ray_is_vec(val) && ray_is_vec(vec)) { + ray_t* result = ray_vec_new(RAY_BOOL, vlen); + if (RAY_IS_ERR(result)) return result; + result->len = vlen; + bool* out = (bool*)ray_data(result); + hashset_t hs; + if (!hashset_init(&hs, vec, vec->len)) { + ray_release(result); + return ray_error("oom", NULL); + } + for (int64_t j = 0; j < vec->len; j++) hashset_insert(&hs, j); + int8_t val_type = val->type; + void* val_data = ray_data(val); + for (int64_t i = 0; i < vlen; i++) { + out[i] = hashset_find_xrow(&hs, val, i, val_type, val_data) != HS_EMPTY; + } + hashset_destroy(&hs); + return result; + } + /* Probe first element to check if result is scalar or vector */ + int alloc0 = 0; + ray_t* e0 = collection_elem(val, 0, &alloc0); + if (RAY_IS_ERR(e0)) return e0; + ray_t* r0 = ray_in_fn(e0, vec); + if (alloc0) ray_release(e0); + if (RAY_IS_ERR(r0)) return r0; + if (ray_is_atom(r0) && r0->type == -RAY_BOOL) { + /* All results are scalar bools — use typed bool vector */ + ray_t* result = ray_vec_new(RAY_BOOL, vlen); + if (RAY_IS_ERR(result)) { ray_release(r0); return result; } + result->len = vlen; + bool* out = (bool*)ray_data(result); + out[0] = r0->b8; + ray_release(r0); + for (int64_t i = 1; i < vlen; i++) { + int alloc = 0; + ray_t* elem = collection_elem(val, i, &alloc); + if (RAY_IS_ERR(elem)) { ray_release(result); return elem; } + ray_t* r = ray_in_fn(elem, vec); + if (alloc) ray_release(elem); + if (RAY_IS_ERR(r)) { ray_release(result); return r; } + out[i] = r->b8; + ray_release(r); + } + return result; + } else { + /* Results are non-scalar — collect as list */ + ray_t* result = ray_list_new(vlen); + if (RAY_IS_ERR(result)) { ray_release(r0); return result; } + result = ray_list_append(result, r0); + ray_release(r0); + if (RAY_IS_ERR(result)) return result; + for (int64_t i = 1; i < vlen; i++) { + int alloc = 0; + ray_t* elem = collection_elem(val, i, &alloc); + if (RAY_IS_ERR(elem)) { ray_release(result); return elem; } + ray_t* r = ray_in_fn(elem, vec); + if (alloc) ray_release(elem); + if (RAY_IS_ERR(r)) { ray_release(result); return r; } + result = ray_list_append(result, r); + ray_release(r); + if (RAY_IS_ERR(result)) return result; + } + return result; + } + } + /* Typed vector: search without boxing */ + if (ray_is_vec(vec) && ray_is_atom(val)) { + int64_t len = vec->len; + bool has_nulls = (vec->attrs & RAY_ATTR_HAS_NULLS) != 0; + for (int64_t i = 0; i < len; i++) { + if (has_nulls && ray_vec_is_null(vec, i)) { + if (RAY_ATOM_IS_NULL(val)) return make_bool(1); + continue; + } + int alloc = 0; + ray_t* elem = collection_elem(vec, i, &alloc); + int eq = atom_eq(val, elem); + if (alloc) ray_release(elem); + if (eq) return make_bool(1); + } + return make_bool(0); + } + ray_t* _bx = NULL; + vec = unbox_vec_arg(vec, &_bx); + if (RAY_IS_ERR(vec)) return vec; + if (!is_list(vec)) { if (_bx) ray_release(_bx); return ray_error("type", NULL); } + int64_t len = ray_len(vec); + ray_t** elems = (ray_t**)ray_data(vec); + for (int64_t i = 0; i < len; i++) { + if (atom_eq(val, elems[i])) { if (_bx) ray_release(_bx); return make_bool(1); } + } + if (_bx) ray_release(_bx); + return make_bool(0); +} + +/* Helper: convert a boxed list result back to a typed vector if the original was typed */ +ray_t* list_to_typed_vec(ray_t* list, int8_t orig_vec_type) { + if (!list || RAY_IS_ERR(list) || list->type != RAY_LIST) return list; + int64_t count = list->len; + /* For SYM and STR types, only convert when empty (to get [] instead of ()) */ + if (orig_vec_type == RAY_SYM || orig_vec_type == RAY_STR) { + if (count == 0) { + ray_release(list); + return ray_vec_new(orig_vec_type, 0); + } + return list; /* Keep as boxed list for non-empty SYM/STR */ + } + ray_t* vec = ray_vec_new(orig_vec_type, count); + if (RAY_IS_ERR(vec)) return vec; + vec->len = count; + ray_t** elems = (ray_t**)ray_data(list); + for (int64_t i = 0; i < count; i++) + store_typed_elem(vec, i, elems[i]); + /* Release the list (ray_release_owned_refs handles child elements) */ + ray_release(list); + return vec; +} + +/* (vec_elem_in helper removed — replaced by hashset-based lookups in + * except/union/sect/in/distinct.) */ + +/* (except vec1 vec2) — elements in vec1 not in vec2 */ +ray_t* ray_except_fn(ray_t* vec1, ray_t* vec2) { + if (ray_is_lazy(vec1)) vec1 = ray_lazy_materialize(vec1); + if (ray_is_lazy(vec2)) vec2 = ray_lazy_materialize(vec2); + + /* Typed vector path: hash-based. Was O(len1×len2); now O(len1+len2). */ + if (ray_is_vec(vec1) && (ray_is_vec(vec2) || ray_is_atom(vec2))) { + int64_t len1 = vec1->len; + int64_t idx_stack[256]; + int64_t* idx = (len1 <= 256) ? idx_stack : (int64_t*)ray_sys_alloc((size_t)len1 * sizeof(int64_t)); + if (!idx) return ray_error("oom", NULL); + int64_t count = 0; + if (ray_is_atom(vec2)) { + /* Scalar: filter out matching elements (single compare per row). */ + for (int64_t i = 0; i < len1; i++) { + int alloc = 0; + ray_t* elem = collection_elem(vec1, i, &alloc); + int eq = atom_eq(elem, vec2); + if (alloc) ray_release(elem); + if (!eq) idx[count++] = i; + } + } else { + hashset_t hs; + if (!hashset_init(&hs, vec2, vec2->len)) { + if (idx != idx_stack) ray_sys_free(idx); + return ray_error("oom", NULL); + } + for (int64_t j = 0; j < vec2->len; j++) hashset_insert(&hs, j); + int8_t v1_type = vec1->type; + void* v1_data = ray_data(vec1); + for (int64_t i = 0; i < len1; i++) { + if (hashset_find_xrow(&hs, vec1, i, v1_type, v1_data) == HS_EMPTY) + idx[count++] = i; + } + hashset_destroy(&hs); + } + ray_t* result = gather_by_idx(vec1, idx, count); + if (idx != idx_stack) ray_sys_free(idx); + return result; + } + + /* Boxed list fallback */ + int8_t orig_type = ray_is_vec(vec1) ? vec1->type : -1; + ray_t *_bx1 = NULL, *_bx2 = NULL; + vec1 = unbox_vec_arg(vec1, &_bx1); + if (RAY_IS_ERR(vec1)) return vec1; + vec2 = unbox_vec_arg(vec2, &_bx2); + if (RAY_IS_ERR(vec2)) { if (_bx1) ray_release(_bx1); return vec2; } + if (!is_list(vec1)) { if (_bx1) ray_release(_bx1); if (_bx2) ray_release(_bx2); return ray_error("type", NULL); } + int64_t len1 = ray_len(vec1); + ray_t** e1 = (ray_t**)ray_data(vec1); + + ray_t* result = ray_alloc(len1 * sizeof(ray_t*)); + if (!result) { if (_bx1) ray_release(_bx1); if (_bx2) ray_release(_bx2); return ray_error("oom", NULL); } + result->type = RAY_LIST; + ray_t** out = (ray_t**)ray_data(result); + int64_t count = 0; + + if (ray_is_atom(vec2)) { + for (int64_t i = 0; i < len1; i++) { + if (!atom_eq(e1[i], vec2)) { ray_retain(e1[i]); out[count++] = e1[i]; } + } + } else { + int64_t len2 = ray_len(vec2); + ray_t** e2 = (ray_t**)ray_data(vec2); + for (int64_t i = 0; i < len1; i++) { + int found = 0; + for (int64_t j = 0; j < len2; j++) { + if (atom_eq(e1[i], e2[j])) { found = 1; break; } + } + if (!found) { ray_retain(e1[i]); out[count++] = e1[i]; } + } + } + result->len = count; + if (_bx1) ray_release(_bx1); + if (_bx2) ray_release(_bx2); + if (orig_type >= 0 && count == 0) { ray_release(result); return ray_vec_new(orig_type, 0); } + return result; +} + +/* (union vec1 vec2) — elements in vec1 + elements in vec2 not already in vec1 */ +ray_t* ray_union_fn(ray_t* vec1, ray_t* vec2) { + if (ray_is_lazy(vec1)) vec1 = ray_lazy_materialize(vec1); + if (ray_is_lazy(vec2)) vec2 = ray_lazy_materialize(vec2); + + /* Typed vector path: hash-based. Was O(len1×len2); now O(len1+len2). */ + if (ray_is_vec(vec1) && ray_is_vec(vec2)) { + int64_t len2 = vec2->len; + int64_t idx_stack[256]; + int64_t* idx = (len2 <= 256) ? idx_stack : (int64_t*)ray_sys_alloc((size_t)len2 * sizeof(int64_t)); + if (!idx) return ray_error("oom", NULL); + hashset_t hs; + if (!hashset_init(&hs, vec1, vec1->len)) { + if (idx != idx_stack) ray_sys_free(idx); + return ray_error("oom", NULL); + } + for (int64_t j = 0; j < vec1->len; j++) hashset_insert(&hs, j); + int8_t v2_type = vec2->type; + void* v2_data = ray_data(vec2); + int64_t extra = 0; + for (int64_t i = 0; i < len2; i++) { + if (hashset_find_xrow(&hs, vec2, i, v2_type, v2_data) == HS_EMPTY) + idx[extra++] = i; + } + hashset_destroy(&hs); + ray_t* part2 = gather_by_idx(vec2, idx, extra); + if (idx != idx_stack) ray_sys_free(idx); + if (RAY_IS_ERR(part2)) return part2; + ray_t* result = ray_concat_fn(vec1, part2); + ray_release(part2); + return result; + } + + /* Boxed list fallback */ + ray_t *_bx1 = NULL, *_bx2 = NULL; + vec1 = unbox_vec_arg(vec1, &_bx1); + if (RAY_IS_ERR(vec1)) return vec1; + vec2 = unbox_vec_arg(vec2, &_bx2); + if (RAY_IS_ERR(vec2)) { if (_bx1) ray_release(_bx1); return vec2; } + if (!is_list(vec1) || !is_list(vec2)) { if (_bx1) ray_release(_bx1); if (_bx2) ray_release(_bx2); return ray_error("type", NULL); } + int64_t len1 = ray_len(vec1), len2 = ray_len(vec2); + ray_t** e1 = (ray_t**)ray_data(vec1); + ray_t** e2 = (ray_t**)ray_data(vec2); + + ray_t* result = ray_alloc((len1 + len2) * sizeof(ray_t*)); + if (!result) { if (_bx1) ray_release(_bx1); if (_bx2) ray_release(_bx2); return ray_error("oom", NULL); } + result->type = RAY_LIST; + ray_t** out = (ray_t**)ray_data(result); + int64_t count = 0; + for (int64_t i = 0; i < len1; i++) { ray_retain(e1[i]); out[count++] = e1[i]; } + for (int64_t i = 0; i < len2; i++) { + int found = 0; + for (int64_t j = 0; j < count; j++) + if (atom_eq(out[j], e2[i])) { found = 1; break; } + if (!found) { ray_retain(e2[i]); out[count++] = e2[i]; } + } + result->len = count; + if (_bx1) ray_release(_bx1); + if (_bx2) ray_release(_bx2); + return result; +} + +/* (sect vec1 vec2) — intersection: elements in both */ +ray_t* ray_sect_fn(ray_t* vec1, ray_t* vec2) { + if (ray_is_lazy(vec1)) vec1 = ray_lazy_materialize(vec1); + if (ray_is_lazy(vec2)) vec2 = ray_lazy_materialize(vec2); + + /* Typed vector path: hash-based. Was O(len1×len2); now O(len1+len2). */ + if (ray_is_vec(vec1) && ray_is_vec(vec2)) { + int64_t len1 = vec1->len; + int64_t idx_stack[256]; + int64_t* idx = (len1 <= 256) ? idx_stack : (int64_t*)ray_sys_alloc((size_t)len1 * sizeof(int64_t)); + if (!idx) return ray_error("oom", NULL); + hashset_t hs; + if (!hashset_init(&hs, vec2, vec2->len)) { + if (idx != idx_stack) ray_sys_free(idx); + return ray_error("oom", NULL); + } + for (int64_t j = 0; j < vec2->len; j++) hashset_insert(&hs, j); + int8_t v1_type = vec1->type; + void* v1_data = ray_data(vec1); + int64_t count = 0; + for (int64_t i = 0; i < len1; i++) { + if (hashset_find_xrow(&hs, vec1, i, v1_type, v1_data) != HS_EMPTY) + idx[count++] = i; + } + hashset_destroy(&hs); + ray_t* result = gather_by_idx(vec1, idx, count); + if (idx != idx_stack) ray_sys_free(idx); + return result; + } + + /* Boxed list fallback */ + ray_t *_bx1 = NULL, *_bx2 = NULL; + vec1 = unbox_vec_arg(vec1, &_bx1); + if (RAY_IS_ERR(vec1)) return vec1; + vec2 = unbox_vec_arg(vec2, &_bx2); + if (RAY_IS_ERR(vec2)) { if (_bx1) ray_release(_bx1); return vec2; } + if (!is_list(vec1) || !is_list(vec2)) { if (_bx1) ray_release(_bx1); if (_bx2) ray_release(_bx2); return ray_error("type", NULL); } + int64_t len1 = ray_len(vec1); + ray_t** e1 = (ray_t**)ray_data(vec1); + ray_t** e2 = (ray_t**)ray_data(vec2); + int64_t len2 = ray_len(vec2); + + ray_t* result = ray_alloc(len1 * sizeof(ray_t*)); + if (!result) { if (_bx1) ray_release(_bx1); if (_bx2) ray_release(_bx2); return ray_error("oom", NULL); } + result->type = RAY_LIST; + ray_t** out = (ray_t**)ray_data(result); + int64_t count = 0; + for (int64_t i = 0; i < len1; i++) { + for (int64_t j = 0; j < len2; j++) { + if (atom_eq(e1[i], e2[j])) { ray_retain(e1[i]); out[count++] = e1[i]; break; } + } + } + result->len = count; + if (_bx1) ray_release(_bx1); + if (_bx2) ray_release(_bx2); + return result; +} + +/* (take vec n) — first n elements (positive) or last |n| elements (negative) */ +ray_t* ray_take_fn(ray_t* vec, ray_t* n_obj) { + if (ray_is_lazy(vec)) vec = ray_lazy_materialize(vec); + /* N must be an integer (or 2-elem i64 vector for range-take). Reject + * floats up front: as_i64(f64) reads the bit pattern and would cause + * e.g. (take 1.0 2.0) to attempt a 4.6-quintillion-element allocation + * and surface as "oom" — misleading for what is really a type error. */ + if (ray_is_atom(n_obj) && n_obj->type == -RAY_F64) + return ray_error("type", NULL); + /* Range take: (take collection [start amount]) — slice from start for amount elements */ + if (ray_is_vec(n_obj) && n_obj->type == RAY_I64 && ray_len(n_obj) == 2) { + int64_t* idx = (int64_t*)ray_data(n_obj); + int64_t start = idx[0]; + int64_t amount = idx[1]; + if (amount < 0) return ray_error("length", NULL); + + /* Table range take */ + if (vec->type == RAY_TABLE) { + int64_t ncols = ray_table_ncols(vec); + ray_t* result = ray_table_new(ncols); + if (RAY_IS_ERR(result)) return result; + for (int64_t i = 0; i < ncols; i++) { + ray_t* col = ray_table_get_col_idx(vec, i); + int64_t name_id = ray_table_col_name(vec, i); + ray_t* taken = ray_take_fn(col, n_obj); + if (RAY_IS_ERR(taken)) { ray_release(result); return taken; } + result = ray_table_add_col(result, name_id, taken); + if (RAY_IS_ERR(result)) { ray_release(taken); return result; } + } + return result; + } + + /* String range take */ + if (ray_is_atom(vec) && (-vec->type) == RAY_STR) { + const char* s = ray_str_ptr(vec); + int64_t slen = (int64_t)ray_str_len(vec); + if (start < 0) start = slen + start; + if (start < 0) start = 0; + if (start >= slen) return ray_str("", 0); + int64_t end = start + amount; + if (end > slen) end = slen; + return ray_str(s + start, (size_t)(end - start)); + } + + /* Typed vector range take */ + if (ray_is_vec(vec)) { + int64_t len = ray_len(vec); + if (start < 0) start = len + start; + if (start < 0) start = 0; + if (start >= len) return ray_vec_new(vec->type, 0); + int64_t end = start + amount; + if (end > len) end = len; + int64_t count = end - start; + int8_t vtype = vec->type; + int esz = ray_elem_size(vtype); + ray_t* result = ray_vec_new(vtype, count); + if (RAY_IS_ERR(result)) return result; + result->len = count; + memcpy(ray_data(result), (char*)ray_data(vec) + start * esz, (size_t)(count * esz)); + /* Propagate null bitmap — check parent's flag for slices */ + bool has_nulls = (vec->attrs & RAY_ATTR_HAS_NULLS) || + ((vec->attrs & RAY_ATTR_SLICE) && vec->slice_parent && + (vec->slice_parent->attrs & RAY_ATTR_HAS_NULLS)); + if (has_nulls) { + for (int64_t i = 0; i < count; i++) + if (ray_vec_is_null(vec, start + i)) + ray_vec_set_null(result, i, true); + } + return result; + } + + /* Dict range take — slice both keys and vals in parallel. */ + if (vec->type == RAY_DICT) { + ray_t* keys = ray_dict_keys(vec); + ray_t* vals = ray_dict_vals(vec); + int64_t len = keys ? keys->len : 0; + if (start < 0) start = len + start; + if (start < 0) start = 0; + int64_t end = start + amount; + if (end > len) end = len; + if (end < start) end = start; + int64_t count = end - start; + + ray_t* nk = ray_vec_slice(keys, start, count); + if (!nk || RAY_IS_ERR(nk)) return nk ? nk : ray_error("oom", NULL); + + ray_t* nv; + if (vals && vals->type == RAY_LIST) { + nv = ray_alloc(count * sizeof(ray_t*)); + if (!nv) { ray_release(nk); return ray_error("oom", NULL); } + nv->type = RAY_LIST; + nv->len = count; + ray_t** vsrc = (ray_t**)ray_data(vals); + ray_t** vdst = (ray_t**)ray_data(nv); + for (int64_t i = 0; i < count; i++) { + vdst[i] = vsrc[start + i]; + if (vdst[i]) ray_retain(vdst[i]); + } + } else { + nv = ray_vec_slice(vals, start, count); + if (!nv || RAY_IS_ERR(nv)) { ray_release(nk); return nv ? nv : ray_error("oom", NULL); } + } + return ray_dict_new(nk, nv); + } + + /* Boxed list range take */ + if (vec->type == RAY_LIST) { + int64_t len = ray_len(vec); + if (start < 0) start = len + start; + if (start < 0) start = 0; + if (start >= len) { + ray_t* result = ray_alloc(0); + result->type = RAY_LIST; + result->len = 0; + return result; + } + int64_t end = start + amount; + if (end > len) end = len; + int64_t count = end - start; + ray_t** elems = (ray_t**)ray_data(vec); + ray_t* result = ray_alloc(count * sizeof(ray_t*)); + if (!result) return ray_error("oom", NULL); + result->type = RAY_LIST; + result->len = count; + ray_t** out = (ray_t**)ray_data(result); + for (int64_t i = 0; i < count; i++) { + ray_retain(elems[start + i]); + out[i] = elems[start + i]; + } + return result; + } + + return ray_error("type", NULL); + } + /* Char take: (take 'a' n) → string of n copies of char */ + if (ray_is_atom(vec) && vec->type == -RAY_STR && ray_str_len(vec) == 1 && ray_is_atom(n_obj) && is_numeric(n_obj)) { + int64_t n = as_i64(n_obj); + int64_t count = n < 0 ? -n : n; + char buf[8192]; + if (count > (int64_t)sizeof(buf)) return ray_error("limit", NULL); + for (int64_t i = 0; i < count; i++) buf[i] = vec->sdata[0]; + return ray_str(buf, (size_t)count); + } + /* Scalar take: (take value n) → repeat value n times */ + if (ray_is_atom(vec) && (-vec->type) != RAY_STR && ray_is_atom(n_obj) && is_numeric(n_obj)) { + int64_t n = as_i64(n_obj); + int64_t count = n < 0 ? -n : n; + int8_t vtype = -(vec->type); + ray_t* result = ray_vec_new(vtype, count); + if (RAY_IS_ERR(result)) return result; + result->len = count; + for (int64_t i = 0; i < count; i++) + store_typed_elem(result, i, vec); + return result; + } + /* String take: (take "hello" 3) → "hel", with wrapping extension */ + if (ray_is_atom(vec) && (-vec->type) == RAY_STR && ray_is_atom(n_obj) && is_numeric(n_obj)) { + const char* s = ray_str_ptr(vec); + int64_t slen = (int64_t)ray_str_len(vec); + int64_t n = as_i64(n_obj); + int64_t abs_n = n < 0 ? -n : n; + char buf[8192]; + if (abs_n > (int64_t)sizeof(buf)) return ray_error("limit", NULL); + if (slen == 0) { + return ray_str("", 0); + } + if (n >= 0) { + for (int64_t i = 0; i < abs_n; i++) buf[i] = s[i % slen]; + } else { + for (int64_t i = 0; i < abs_n; i++) { + int64_t si = slen - (abs_n - i) % slen; + if (si == slen) si = 0; + buf[i] = s[si]; + } + } + return ray_str(buf, (size_t)abs_n); + } + /* Table take: apply take to each column */ + if (vec->type == RAY_TABLE && is_numeric(n_obj)) { + int64_t ncols = ray_table_ncols(vec); + ray_t* result = ray_table_new(ncols); + if (RAY_IS_ERR(result)) return result; + for (int64_t i = 0; i < ncols; i++) { + ray_t* col = ray_table_get_col_idx(vec, i); + int64_t name_id = ray_table_col_name(vec, i); + ray_t* taken = ray_take_fn(col, n_obj); + if (RAY_IS_ERR(taken)) { ray_release(result); return taken; } + result = ray_table_add_col(result, name_id, taken); + if (RAY_IS_ERR(result)) { ray_release(taken); return result; } + } + return result; + } + /* Dict take: apply take to keys and vals in parallel. Wrapping for + * |n| > pair count works the same as for typed vectors. */ + if (vec->type == RAY_DICT && is_numeric(n_obj)) { + ray_t* keys = ray_dict_keys(vec); + ray_t* vals = ray_dict_vals(vec); + if (!keys) return ray_error("type", NULL); + ray_t* nk = ray_take_fn(keys, n_obj); + if (RAY_IS_ERR(nk)) return nk; + ray_t* nv = vals ? ray_take_fn(vals, n_obj) : ray_list_new(0); + if (!nv || RAY_IS_ERR(nv)) { ray_release(nk); return nv ? nv : ray_error("oom", NULL); } + return ray_dict_new(nk, nv); + } + /* Typed vector take with extension */ + if (ray_is_vec(vec) && is_numeric(n_obj)) { + int64_t len = ray_len(vec); + int64_t n = as_i64(n_obj); + int64_t abs_n = n < 0 ? -n : n; + int8_t vtype = vec->type; + int esz = ray_elem_size(vtype); + ray_t* result = ray_vec_new(vtype, abs_n); + if (RAY_IS_ERR(result)) return result; + result->len = abs_n; + char* src = (char*)ray_data(vec); + char* dst = (char*)ray_data(result); + if (len == 0) { + memset(dst, 0, (size_t)(abs_n * esz)); + } else if (n >= 0 && abs_n > 0) { + /* Doubling tile-copy: O(log(abs_n/len)) memcpys instead of + * abs_n calls of esz bytes each. Invariant: after every + * memcpy `copied` is a multiple of `len`, so dst[0..copied) + * holds a perfect tile and we can keep doubling from dst[0]. + * The final partial copy is < copied so it stays within the + * already-tiled prefix. */ + int64_t to_copy = abs_n < len ? abs_n : len; + memcpy(dst, src, (size_t)(to_copy * esz)); + int64_t copied = to_copy; + while (copied + copied <= abs_n) { + memcpy(dst + copied * esz, dst, (size_t)(copied * esz)); + copied *= 2; + } + int64_t remaining = abs_n - copied; + if (remaining > 0) + memcpy(dst + copied * esz, dst, (size_t)(remaining * esz)); + } else if (n < 0) { + /* Negative: take from end with wrap */ + for (int64_t i = 0; i < abs_n; i++) { + int64_t si = len - (abs_n - i) % len; + if (si == len) si = 0; + memcpy(dst + i * esz, src + si * esz, esz); + } + } + /* Propagate null bitmap — check parent's flag for slices */ + bool has_nulls = len > 0 && + ((vec->attrs & RAY_ATTR_HAS_NULLS) || + ((vec->attrs & RAY_ATTR_SLICE) && vec->slice_parent && + (vec->slice_parent->attrs & RAY_ATTR_HAS_NULLS))); + if (has_nulls) { + if (n >= 0) { + for (int64_t i = 0; i < abs_n; i++) + if (ray_vec_is_null(vec, i % len)) + ray_vec_set_null(result, i, true); + } else { + for (int64_t i = 0; i < abs_n; i++) { + int64_t si = len - (abs_n - i) % len; + if (si == len) si = 0; + if (ray_vec_is_null(vec, si)) + ray_vec_set_null(result, i, true); + } + } + } + return result; + } + ray_t* _bx = NULL; + vec = unbox_vec_arg(vec, &_bx); + if (RAY_IS_ERR(vec)) return vec; + if (!is_list(vec) || !is_numeric(n_obj)) + { if (_bx) ray_release(_bx); return ray_error("type", NULL); } + int64_t len = ray_len(vec); + int64_t n = as_i64(n_obj); + ray_t** elems = (ray_t**)ray_data(vec); + + int64_t abs_n = n < 0 ? -n : n; + int64_t elem_count = abs_n; + ray_t* result = ray_alloc(elem_count * sizeof(ray_t*)); + if (!result) { if (_bx) ray_release(_bx); return ray_error("oom", NULL); } + result->type = RAY_LIST; + result->len = elem_count; + ray_t** out = (ray_t**)ray_data(result); + if (len == 0) { + result->len = 0; + } else if (n >= 0) { + for (int64_t i = 0; i < elem_count; i++) { + ray_retain(elems[i % len]); + out[i] = elems[i % len]; + } + } else { + for (int64_t i = 0; i < elem_count; i++) { + int64_t si = len - (elem_count - i) % len; + if (si == len) si = 0; + ray_retain(elems[si]); + out[i] = elems[si]; + } + } + if (_bx) ray_release(_bx); + return result; +} + +/* (at vec idx) or (at table 'col) — index into vector or table */ +ray_t* ray_at_fn(ray_t* vec, ray_t* idx) { + if (ray_is_lazy(vec)) vec = ray_lazy_materialize(vec); + /* Table column access by symbol key — return the typed vector directly */ + if (vec->type == RAY_TABLE && idx->type == -RAY_SYM) { + ray_t* col = ray_table_get_col(vec, idx->i64); + if (!col) return ray_error("domain", NULL); + ray_retain(col); + return col; + } + + /* Table row access by integer index: (at table 0) → {col1: val1, col2: val2} */ + if (vec->type == RAY_TABLE && ray_is_atom(idx) && + (idx->type == -RAY_I64 || idx->type == -RAY_I32 || + idx->type == -RAY_I16 || idx->type == -RAY_U8)) { + int64_t row = as_i64(idx); + int64_t nrows = ray_table_nrows(vec); + if (row < 0 || row >= nrows) return ray_error("domain", NULL); + int64_t ncols = ray_table_ncols(vec); + /* Build a dict: keys SYM vec + vals LIST */ + ray_t* keys = ray_sym_vec_new(RAY_SYM_W64, ncols); + if (RAY_IS_ERR(keys)) return keys; + ray_t* vals = ray_list_new(ncols); + if (RAY_IS_ERR(vals)) { ray_release(keys); return vals; } + for (int64_t c = 0; c < ncols; c++) { + int64_t key_id = ray_table_col_name(vec, c); + keys = ray_vec_append(keys, &key_id); + if (RAY_IS_ERR(keys)) { ray_release(vals); return keys; } + ray_t* col = ray_table_get_col_idx(vec, c); + int alloc = 0; + ray_t* val = collection_elem(col, row, &alloc); + if (RAY_IS_ERR(val)) { ray_release(keys); ray_release(vals); return val; } + vals = ray_list_append(vals, val); + if (alloc) ray_release(val); + if (RAY_IS_ERR(vals)) { ray_release(keys); return vals; } + } + return ray_dict_new(keys, vals); + } + + /* Dict key access: (at dict key) → value or 0Nl if missing */ + if (vec->type == RAY_DICT) { + ray_t* v = ray_dict_get(vec, idx); + if (v) return v; + return ray_typed_null(-RAY_I64); /* 0Nl for missing key */ + } + + /* String indexing: (at "hello" 1) → 'e', (at "hello" [0 4]) → "ho" */ + if (ray_is_atom(vec) && (-vec->type) == RAY_STR) { + const char* s = ray_str_ptr(vec); + size_t slen = ray_str_len(vec); + if (is_collection(idx)) { + /* Multiple indices → build string from chars */ + int64_t idxlen = ray_len(idx); + char buf[8192]; + if ((size_t)idxlen > sizeof(buf)) return ray_error("limit", NULL); + for (int64_t j = 0; j < idxlen; j++) { + int alloc = 0; + ray_t* ie = collection_elem(idx, j, &alloc); + int64_t k = as_i64(ie); + if (alloc) ray_release(ie); + if (k < 0 || (size_t)k >= slen) return ray_error("domain", NULL); + buf[j] = s[k]; + } + return ray_str(buf, (size_t)idxlen); + } + int64_t i = as_i64(idx); + if (i < 0 || (size_t)i >= slen) return ray_error("domain", NULL); + /* Return 1-char string atom */ + return ray_str(&s[i], 1); + } + + /* Vector index: (at vec [i j k]) → vector of values */ + if (is_collection(idx) && idx->type != -RAY_SYM) { + int64_t idxlen = ray_len(idx); + ray_t* result = ray_alloc(idxlen * sizeof(ray_t*)); + if (!result) return ray_error("oom", NULL); + result->type = RAY_LIST; + result->len = idxlen; + ray_t** out = (ray_t**)ray_data(result); + for (int64_t j = 0; j < idxlen; j++) { + int alloc = 0; + ray_t* idx_elem = collection_elem(idx, j, &alloc); + if (RAY_IS_ERR(idx_elem)) { + for (int64_t k = 0; k < j; k++) ray_release(out[k]); + ray_release(result); + return idx_elem; + } + ray_t* sub_idx = idx_elem; + ray_t* val = ray_at_fn(vec, sub_idx); + if (alloc) ray_release(idx_elem); + if (RAY_IS_ERR(val)) { + for (int64_t k = 0; k < j; k++) ray_release(out[k]); + ray_release(result); + return val; + } + out[j] = val; + } + return result; + } + + if (idx->type != -RAY_I64 && idx->type != -RAY_I32 && + idx->type != -RAY_I16 && idx->type != -RAY_U8) + return ray_error("type", NULL); + int64_t i = as_i64(idx); + + /* Typed vector: extract element directly */ + if (ray_is_vec(vec)) { + int64_t len = ray_len(vec); + if (i < 0 || i >= len) return ray_typed_null(-vec->type); /* out of bounds → typed null */ + int alloc = 0; + ray_t* elem = collection_elem(vec, i, &alloc); + /* collection_elem always allocates for typed vecs, so elem is owned */ + return elem; + } + + if (!is_list(vec)) return ray_error("type", NULL); + int64_t len = ray_len(vec); + if (i < 0 || i >= len) return ray_typed_null(-RAY_I64); /* out of bounds → 0Nl */ + ray_t* elem = ((ray_t**)ray_data(vec))[i]; + ray_retain(elem); + return elem; +} + +/* (find vec val) — index of first occurrence, or -1 */ +ray_t* ray_find_fn(ray_t* vec, ray_t* val) { + if (ray_is_lazy(vec)) vec = ray_lazy_materialize(vec); + if (ray_is_lazy(val)) val = ray_lazy_materialize(val); + /* String find: (find "hello" 'l') → index of char in string */ + if (ray_is_atom(vec) && (-vec->type) == RAY_STR && ray_is_atom(val) && val->type == -RAY_STR && ray_str_len(val) == 1) { + const char* s = ray_str_ptr(vec); + size_t slen = ray_str_len(vec); + char c = ray_str_ptr(val)[0]; + for (size_t i = 0; i < slen; i++) { + if (s[i] == c) return make_i64((int64_t)i); + } + return ray_typed_null(-RAY_I64); + } + /* Vector val: (find vec [v1 v2]) → [idx1 idx2] */ + if (is_collection(val)) { + /* If vec is empty, return empty vector */ + if (is_collection(vec) && ray_len(vec) == 0) + return ray_vec_new(RAY_I64, 0); + int64_t vlen = ray_len(val); + ray_t* result = ray_alloc(vlen * sizeof(ray_t*)); + if (!result) return ray_error("oom", NULL); + result->type = RAY_LIST; + result->len = vlen; + ray_t** out = (ray_t**)ray_data(result); + for (int64_t j = 0; j < vlen; j++) { + int alloc = 0; + ray_t* ve = collection_elem(val, j, &alloc); + out[j] = ray_find_fn(vec, ve); + if (alloc) ray_release(ve); + if (RAY_IS_ERR(out[j])) { + for (int64_t k = 0; k < j; k++) ray_release(out[k]); + ray_release(result); + return out[j]; + } + } + return result; + } + /* Typed vector: search without boxing */ + if (ray_is_vec(vec)) { + int64_t len = vec->len; + bool has_nulls = (vec->attrs & RAY_ATTR_HAS_NULLS) != 0; + bool val_null = RAY_ATOM_IS_NULL(val); + for (int64_t i = 0; i < len; i++) { + if (has_nulls && ray_vec_is_null(vec, i)) { + if (val_null) return make_i64(i); + continue; + } + if (val_null) continue; + int alloc = 0; + ray_t* elem = collection_elem(vec, i, &alloc); + int eq = atom_eq(elem, val); + if (alloc) ray_release(elem); + if (eq) return make_i64(i); + } + return ray_typed_null(-RAY_I64); + } + ray_t* _bx = NULL; + vec = unbox_vec_arg(vec, &_bx); + if (RAY_IS_ERR(vec)) return vec; + if (!is_list(vec)) { if (_bx) ray_release(_bx); return ray_error("type", NULL); } + int64_t len = ray_len(vec); + ray_t** elems = (ray_t**)ray_data(vec); + for (int64_t i = 0; i < len; i++) { + if (atom_eq(elems[i], val)) { if (_bx) ray_release(_bx); return make_i64(i); } + } + if (_bx) ray_release(_bx); + return ray_typed_null(-RAY_I64); /* 0Nl = not found */ +} + +/* (til n) — generate integer sequence [0, 1, ..., n-1] */ +static void til_fill(void* ctx, uint32_t worker_id, int64_t start, int64_t end) { + (void)worker_id; + int64_t* out = (int64_t*)ctx; + for (int64_t i = start; i < end; i++) + out[i] = i; +} + +ray_t* ray_til_fn(ray_t* x) { + if (!ray_is_atom(x) || x->type != -RAY_I64) return ray_error("type", NULL); + int64_t n = x->i64; + if (n < 0) return ray_error("domain", NULL); + if (n == 0) return ray_vec_new(RAY_I64, 0); + + ray_t* vec = ray_vec_new(RAY_I64, n); + if (!vec || RAY_IS_ERR(vec)) return vec; + vec->len = n; + int64_t* out = (int64_t*)ray_data(vec); + ray_pool_dispatch(ray_pool_get(), til_fill, out, n); + return vec; +} + +/* (reverse vec) — reverse a vector */ +ray_t* ray_reverse_fn(ray_t* x) { + if (ray_is_lazy(x)) x = ray_lazy_materialize(x); + + /* Typed vector: reverse directly without boxing */ + if (ray_is_vec(x)) { + int64_t len = x->len; + if (len <= 1) { ray_retain(x); return x; } + int8_t vtype = x->type; + if (vtype == RAY_STR) { + ray_t* result = ray_vec_new(RAY_STR, len); + if (RAY_IS_ERR(result)) return result; + bool has_nulls = (x->attrs & RAY_ATTR_HAS_NULLS) != 0; + for (int64_t i = 0; i < len; i++) { + if (has_nulls && ray_vec_is_null(x, len - 1 - i)) { + result = ray_str_vec_append(result, "", 0); + if (!RAY_IS_ERR(result)) + ray_vec_set_null(result, result->len - 1, true); + } else { + size_t slen; + const char* sp = ray_str_vec_get(x, len - 1 - i, &slen); + result = ray_str_vec_append(result, sp ? sp : "", sp ? slen : 0); + } + if (RAY_IS_ERR(result)) return result; + } + return result; + } + ray_t* result = (vtype == RAY_SYM) + ? ray_sym_vec_new(x->attrs & RAY_SYM_W_MASK, len) + : ray_vec_new(vtype, len); + if (!result || RAY_IS_ERR(result)) return result ? result : ray_error("oom", NULL); + result->len = len; + int esz = ray_elem_size(vtype); + if (vtype == RAY_SYM) esz = ray_sym_elem_size(vtype, x->attrs); + char* src = (char*)ray_data(x); + char* dst = (char*)ray_data(result); + bool has_nulls = (x->attrs & RAY_ATTR_HAS_NULLS) != 0; + for (int64_t i = 0; i < len; i++) { + memcpy(dst + i * esz, src + (len - 1 - i) * esz, esz); + if (has_nulls && ray_vec_is_null(x, len - 1 - i)) + ray_vec_set_null(result, i, true); + } + return result; + } + + /* Boxed list path */ + ray_t* _bx = NULL; + x = unbox_vec_arg(x, &_bx); + if (RAY_IS_ERR(x)) return x; + if (!is_list(x)) { if (_bx) ray_release(_bx); return ray_error("type", NULL); } + int64_t len = ray_len(x); + ray_t** elems = (ray_t**)ray_data(x); + + ray_t* result = ray_alloc(len * sizeof(ray_t*)); + if (!result) { if (_bx) ray_release(_bx); return ray_error("oom", NULL); } + result->type = RAY_LIST; + result->len = len; + ray_t** out = (ray_t**)ray_data(result); + for (int64_t i = 0; i < len; i++) { + ray_retain(elems[len - 1 - i]); + out[i] = elems[len - 1 - i]; + } + if (_bx) ray_release(_bx); + return result; +} + +/* ══════════════════════════════════════════ + * Binary search + * ══════════════════════════════════════════ */ + +/* (rand n max) → vector of n random i64 in [0, max) */ +ray_t* ray_rand_fn(ray_t* a, ray_t* b) { + if (!ray_is_atom(a) || !ray_is_atom(b)) return ray_error("type", NULL); + int64_t n, mx; + if (a->type == -RAY_I64) n = a->i64; + else if (a->type == -RAY_I32) n = a->i32; + else return ray_error("type", NULL); + if (b->type == -RAY_I64) mx = b->i64; + else if (b->type == -RAY_I32) mx = b->i32; + else return ray_error("type", NULL); + if (n < 0) return ray_error("domain", NULL); + if (mx <= 0) return ray_error("domain", NULL); + if (n == 0) return ray_vec_new(RAY_I64, 0); + ray_t* vec = ray_vec_new(RAY_I64, n); + if (RAY_IS_ERR(vec)) return vec; + int64_t* d = (int64_t*)ray_data(vec); + for (int64_t i = 0; i < n; i++) d[i] = (int64_t)(rand() % mx); + vec->len = n; + return vec; +} + +/* (bin sorted-vec val) → rightmost index where sorted[i] <= val, -1 if none */ +ray_t* ray_bin_fn(ray_t* sorted, ray_t* val) { + if (!ray_is_vec(sorted) || sorted->type != RAY_I64) + return ray_error("type", NULL); + int64_t* d = (int64_t*)ray_data(sorted); + int64_t n = sorted->len; + + if (ray_is_atom(val) && (val->type == -RAY_I64 || val->type == -RAY_I32)) { + int64_t v = val->i64; + int64_t lo = 0, hi = n - 1, result = -1; + while (lo <= hi) { + int64_t mid = lo + (hi - lo) / 2; + if (d[mid] <= v) { result = mid; lo = mid + 1; } + else hi = mid - 1; + } + return ray_i64(result); + } + if (ray_is_vec(val) && val->type == RAY_I64) { + int64_t* vals = (int64_t*)ray_data(val); + int64_t vn = val->len; + ray_t* rvec = ray_vec_new(RAY_I64, vn); + if (RAY_IS_ERR(rvec)) return rvec; + int64_t* out = (int64_t*)ray_data(rvec); + for (int64_t i = 0; i < vn; i++) { + int64_t v = vals[i]; + int64_t lo = 0, hi = n - 1, r = -1; + while (lo <= hi) { + int64_t mid = lo + (hi - lo) / 2; + if (d[mid] <= v) { r = mid; lo = mid + 1; } + else hi = mid - 1; + } + out[i] = r; + } + rvec->len = vn; + return rvec; + } + return ray_error("type", NULL); +} + +/* (binr sorted-vec val) → leftmost index where sorted[i] >= val */ +ray_t* ray_binr_fn(ray_t* sorted, ray_t* val) { + if (!ray_is_vec(sorted) || sorted->type != RAY_I64) + return ray_error("type", NULL); + int64_t* d = (int64_t*)ray_data(sorted); + int64_t n = sorted->len; + + if (ray_is_atom(val) && (val->type == -RAY_I64 || val->type == -RAY_I32)) { + int64_t v = val->i64; + int64_t lo = 0, hi = n - 1, result = n; + while (lo <= hi) { + int64_t mid = lo + (hi - lo) / 2; + if (d[mid] >= v) { result = mid; hi = mid - 1; } + else lo = mid + 1; + } + return ray_i64(result >= n ? n - 1 : result); + } + if (ray_is_vec(val) && val->type == RAY_I64) { + int64_t* vals = (int64_t*)ray_data(val); + int64_t vn = val->len; + ray_t* rvec = ray_vec_new(RAY_I64, vn); + if (RAY_IS_ERR(rvec)) return rvec; + int64_t* out = (int64_t*)ray_data(rvec); + for (int64_t i = 0; i < vn; i++) { + int64_t v = vals[i]; + int64_t lo = 0, hi = n - 1, r = n; + while (lo <= hi) { + int64_t mid = lo + (hi - lo) / 2; + if (d[mid] >= v) { r = mid; hi = mid - 1; } + else lo = mid + 1; + } + out[i] = r >= n ? n - 1 : r; + } + rvec->len = vn; + return rvec; + } + return ray_error("type", NULL); +} + +/* ══════════════════════════════════════════ + * Map variants + * ══════════════════════════════════════════ */ + +/* (map-left fn fixed vec) → apply fn(fixed, elem) for each elem in vec */ +/* Helper for map-left/map-right: iterate over vec calling fn with two args */ +static ray_t* map_iterate(ray_t* fn, ray_t* fixed, ray_t* vec, int fixed_is_left) { + /* If both are scalars, just call once */ + if (!ray_is_vec(vec) && vec->type != RAY_LIST) { + if (fixed_is_left) + return call_fn2(fn, fixed, vec); + else + return call_fn2(fn, vec, fixed); + } + + int64_t vn = vec->len; + ray_t* stack_results[4096]; + ray_t** results = stack_results; + if (vn > 4096) { + results = (ray_t**)ray_sys_alloc((size_t)vn * sizeof(ray_t*)); + if (!results) return ray_error("oom", NULL); + } + + for (int64_t i = 0; i < vn; i++) { + int alloc = 0; + ray_t* elem = collection_elem(vec, i, &alloc); + if (fixed_is_left) + results[i] = call_fn2(fn, fixed, elem); + else + results[i] = call_fn2(fn, elem, fixed); + if (alloc) ray_release(elem); + if (RAY_IS_ERR(results[i])) { + ray_t* err = results[i]; + for (int64_t j = 0; j < i; j++) ray_release(results[j]); + if (results != stack_results) ray_sys_free(results); + return err; + } + } + ray_t* out = ray_enlist_fn(results, vn); + for (int64_t i = 0; i < vn; i++) ray_release(results[i]); + if (results != stack_results) ray_sys_free(results); + return out; +} + +/* (map-left fn fixed vec) → apply fn(fixed, elem) for each elem in vec. + * If vec is scalar but fixed is a vector, auto-swap (iterate over fixed). */ +ray_t* ray_map_left_fn(ray_t** args, int64_t n) { + if (n != 3) return ray_error("domain", NULL); + ray_t* fn = args[0]; + ray_t* fixed = args[1]; + ray_t* vec = args[2]; + + /* Auto-detect: if vec is scalar but fixed is a vector, swap roles */ + if (!ray_is_vec(vec) && vec->type != RAY_LIST && + (ray_is_vec(fixed) || fixed->type == RAY_LIST)) { + return map_iterate(fn, vec, fixed, 0); /* fn(elem_of_fixed, vec) — but we want fn(fixed=scalar, elem) */ + } + + return map_iterate(fn, fixed, vec, 1); /* fn(fixed, elem) */ +} + +/* (map-right fn vec fixed) → apply fn(elem, fixed) for each elem in vec. + * If vec is scalar but fixed is a vector, auto-swap (iterate over fixed). */ +ray_t* ray_map_right_fn(ray_t** args, int64_t n) { + if (n != 3) return ray_error("domain", NULL); + ray_t* fn = args[0]; + ray_t* vec = args[1]; + ray_t* fixed = args[2]; + + /* Auto-detect: if vec is scalar but fixed is a vector, swap roles */ + if (!ray_is_vec(vec) && vec->type != RAY_LIST && + (ray_is_vec(fixed) || fixed->type == RAY_LIST)) { + return map_iterate(fn, vec, fixed, 1); /* fn(vec_scalar, elem_of_fixed) */ + } + + return map_iterate(fn, fixed, vec, 0); /* fn(elem, fixed) */ +} + +/* ══════════════════════════════════════════ + * Fold/scan variants + * ══════════════════════════════════════════ */ + +/* (fold-left fn init coll) — left fold with explicit initial value */ +ray_t* ray_fold_left_fn(ray_t** args, int64_t n) { + /* Same as (fold fn init coll) — fold already goes left-to-right */ + return ray_fold_fn(args, n); +} + +/* (fold-right fn init coll) — right fold */ +ray_t* ray_fold_right_fn(ray_t** args, int64_t n) { + if (n < 2) return ray_error("domain", NULL); + for (int64_t i = 0; i < n; i++) + if (ray_is_lazy(args[i])) args[i] = ray_lazy_materialize(args[i]); + + ray_t* fn = args[0]; + ray_t* _bx = NULL; + ray_t* acc; + ray_t* vec; + + if (n == 2) { + /* (fold-right fn vec) — use last element as initial value */ + vec = unbox_vec_arg(args[1], &_bx); + if (RAY_IS_ERR(vec)) return vec; + if (!is_list(vec)) { if (_bx) ray_release(_bx); return ray_error("type", NULL); } + int64_t len = ray_len(vec); + if (len == 0) { if (_bx) ray_release(_bx); return ray_error("domain", NULL); } + ray_t** elems = (ray_t**)ray_data(vec); + ray_retain(elems[len - 1]); + acc = elems[len - 1]; + for (int64_t i = len - 2; i >= 0; i--) { + ray_t* next = call_fn2(fn, elems[i], acc); + ray_release(acc); + if (RAY_IS_ERR(next)) { if (_bx) ray_release(_bx); return next; } + acc = next; + } + if (_bx) ray_release(_bx); + return acc; + } + + /* (fold-right fn init coll) */ + ray_retain(args[1]); + acc = args[1]; + vec = unbox_vec_arg(args[2], &_bx); + if (RAY_IS_ERR(vec)) { ray_release(acc); return vec; } + if (!is_list(vec)) { ray_release(acc); if (_bx) ray_release(_bx); return ray_error("type", NULL); } + int64_t len = ray_len(vec); + ray_t** elems = (ray_t**)ray_data(vec); + for (int64_t i = len - 1; i >= 0; i--) { + ray_t* next = call_fn2(fn, elems[i], acc); + ray_release(acc); + if (RAY_IS_ERR(next)) { if (_bx) ray_release(_bx); return next; } + acc = next; + } + if (_bx) ray_release(_bx); + return acc; +} + +/* (scan-left fn vec) — running left fold (same as scan) */ +ray_t* ray_scan_left_fn(ray_t** args, int64_t n) { + return ray_scan_fn(args, n); +} + +/* (scan-right fn vec) — running right fold, returns vector of partial results */ +ray_t* ray_scan_right_fn(ray_t** args, int64_t n) { + if (n < 2) return ray_error("domain", NULL); + for (int64_t i = 0; i < n; i++) + if (ray_is_lazy(args[i])) args[i] = ray_lazy_materialize(args[i]); + + ray_t* fn = args[0]; + ray_t* _bx = NULL; + ray_t* vec = unbox_vec_arg(args[1], &_bx); + if (RAY_IS_ERR(vec)) return vec; + if (!is_list(vec)) { if (_bx) ray_release(_bx); return ray_error("type", NULL); } + int64_t len = ray_len(vec); + if (len == 0) { + if (_bx) ray_release(_bx); + ray_t* result = ray_alloc(0); + if (!result) return ray_error("oom", NULL); + result->type = RAY_LIST; + result->len = 0; + return result; + } + + ray_t* result = ray_alloc(len * sizeof(ray_t*)); + if (!result) { if (_bx) ray_release(_bx); return ray_error("oom", NULL); } + result->type = RAY_LIST; + result->len = len; + ray_t** out = (ray_t**)ray_data(result); + ray_t** elems = (ray_t**)ray_data(vec); + + ray_retain(elems[len - 1]); + out[len - 1] = elems[len - 1]; + for (int64_t i = len - 2; i >= 0; i--) { + out[i] = call_fn2(fn, elems[i], out[i + 1]); + if (RAY_IS_ERR(out[i])) { + for (int64_t j = i + 1; j < len; j++) ray_release(out[j]); + result->len = 0; ray_release(result); if (_bx) ray_release(_bx); + return out[i]; + } + } + if (_bx) ray_release(_bx); + return result; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/datalog.c b/crates/rayforce-sys/vendor/rayforce/src/ops/datalog.c new file mode 100644 index 0000000..a354412 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/datalog.c @@ -0,0 +1,4325 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +/* + * datalog.c — Datalog evaluation engine for Rayforce + * + * Compiles Datalog rules into ray_graph_t operation DAGs and evaluates + * them to fixpoint using semi-naive evaluation with stratified negation. + */ +#include "ops/datalog.h" +#include "lang/internal.h" +#include "lang/env.h" +#include "table/sym.h" +#include "ops/ops.h" +#include "ops/hash.h" /* ray_hash_i64, ray_hash_combine */ +#include "ops/internal.h" /* col_propagate_str_pool */ +#include "mem/sys.h" /* ray_sys_alloc / ray_sys_free */ +#include +#include + +/* ======================================================================== + * Program lifecycle + * ======================================================================== */ + +dl_program_t* dl_program_new(void) { + /* Allocate via ray_alloc and use the data region for the program struct. + * This avoids alignment issues since ray_alloc returns a ray_t* header. */ + ray_t* block = ray_alloc(sizeof(dl_program_t)); + if (!block) return NULL; + dl_program_t* prog = (dl_program_t*)ray_data(block); + memset(prog, 0, sizeof(dl_program_t)); + return prog; +} + +/* Recover the ray_t header from a dl_program_t pointer for ray_free. */ +static inline ray_t* dl_prog_block(dl_program_t* prog) { + return (ray_t*)((char*)prog - 32); /* ray_data is at offset 32 */ +} + +void dl_program_free(dl_program_t* prog) { + if (!prog) return; + for (int i = 0; i < prog->n_rels; i++) { + if (prog->rels[i].table && !RAY_IS_ERR(prog->rels[i].table)) + ray_release(prog->rels[i].table); + if (prog->rels[i].prov_col && !RAY_IS_ERR(prog->rels[i].prov_col)) + ray_release(prog->rels[i].prov_col); + if (prog->rels[i].prov_src_offsets && !RAY_IS_ERR(prog->rels[i].prov_src_offsets)) + ray_release(prog->rels[i].prov_src_offsets); + if (prog->rels[i].prov_src_data && !RAY_IS_ERR(prog->rels[i].prov_src_data)) + ray_release(prog->rels[i].prov_src_data); + } + ray_free(dl_prog_block(prog)); +} + +/* ======================================================================== + * Relation management + * ======================================================================== */ + +int dl_find_rel(dl_program_t* prog, const char* name) { + for (int i = 0; i < prog->n_rels; i++) { + if (strcmp(prog->rels[i].name, name) == 0) + return i; + } + return -1; +} + +/* Generate a unique column name for a relation: "{relname}__c{idx}" */ +static int64_t dl_col_sym(const char* rel_name, int col_idx) { + char buf[80]; + snprintf(buf, sizeof(buf), "%s__c%d", rel_name, col_idx); + return ray_sym_intern(buf, strlen(buf)); +} + +int dl_add_edb(dl_program_t* prog, const char* name, ray_t* table, int arity) { + if (!prog || !name || !table || prog->n_rels >= DL_MAX_RELS) + return -1; + + int idx = prog->n_rels++; + dl_rel_t* rel = &prog->rels[idx]; + memset(rel, 0, sizeof(dl_rel_t)); + + size_t name_len = strlen(name); + if (name_len >= sizeof(rel->name)) name_len = sizeof(rel->name) - 1; + memcpy(rel->name, name, name_len); + rel->name[name_len] = '\0'; + + rel->arity = arity; + rel->is_idb = false; + + /* Build a new table with relation-prefixed column names to avoid + * collisions when multiple tables participate in a join. */ + for (int c = 0; c < arity && c < DL_MAX_ARITY; c++) + rel->col_names[c] = dl_col_sym(name, c); + + ray_t* new_tbl = ray_table_new(arity); + for (int c = 0; c < arity; c++) { + ray_t* col = ray_table_get_col_idx(table, c); + if (!col) { ray_release(new_tbl); return -1; } + new_tbl = ray_table_add_col(new_tbl, rel->col_names[c], col); + if (RAY_IS_ERR(new_tbl)) return -1; + } + rel->table = new_tbl; + + return idx; +} + +int dl_ensure_idb(dl_program_t* prog, const char* name, int arity) { + int idx = dl_find_rel(prog, name); + if (idx >= 0) return idx; + + if (prog->n_rels >= DL_MAX_RELS) return -1; + idx = prog->n_rels++; + dl_rel_t* rel = &prog->rels[idx]; + memset(rel, 0, sizeof(dl_rel_t)); + + size_t name_len = strlen(name); + if (name_len >= sizeof(rel->name)) name_len = sizeof(rel->name) - 1; + memcpy(rel->name, name, name_len); + rel->name[name_len] = '\0'; + + /* Create empty table with arity columns */ + rel->table = ray_table_new(arity); + if (!rel->table || RAY_IS_ERR(rel->table)) return -1; + + rel->arity = arity; + rel->is_idb = true; + + for (int c = 0; c < arity && c < DL_MAX_ARITY; c++) { + rel->col_names[c] = dl_col_sym(name, c); + ray_t* empty_col = ray_vec_new(RAY_I64, 0); + if (empty_col && !RAY_IS_ERR(empty_col)) { + rel->table = ray_table_add_col(rel->table, rel->col_names[c], empty_col); + ray_release(empty_col); + } + } + + return idx; +} + +/* ======================================================================== + * Rule management + * ======================================================================== */ + +/* When a rule has a typed head constant at slot c, the IDB relation's + * column c must be of that type so ray_vec_concat (used by table_union) + * doesn't reject the merge. Rebuilds matching columns on an *empty* IDB + * table in-place. Safe because schema is established before evaluation. */ +static void dl_idb_align_head_const_types(dl_program_t* prog, const dl_rule_t* rule) { + int rel_idx = dl_find_rel(prog, rule->head_pred); + if (rel_idx < 0) return; + dl_rel_t* rel = &prog->rels[rel_idx]; + if (!rel->is_idb) return; + if (!rel->table || RAY_IS_ERR(rel->table)) return; + if (ray_table_nrows(rel->table) != 0) return; /* types already committed */ + + int ncols = (int)ray_table_ncols(rel->table); + if (ncols != rel->arity) return; + + bool any_change = false; + int8_t desired[DL_MAX_ARITY]; + for (int c = 0; c < rel->arity; c++) { + ray_t* col = ray_table_get_col_idx(rel->table, c); + int8_t cur = col ? col->type : RAY_I64; + int8_t want = rule->head_const_types[c]; + if (want == 0) { + desired[c] = cur; + } else if (cur != RAY_I64 && cur != want) { + /* First-non-zero-wins policy: once a slot is committed to a + * non-default type by a prior rule, any later rule that + * disagrees is a program-level conflict. Mark the program + * so dl_eval (which reads eval_err after evaluation) reports + * failure — no stderr write from a non-debug code path. */ + prog->eval_err = true; + return; + } else { + desired[c] = want; + if (want != cur) any_change = true; + } + } + if (!any_change) return; + + /* Rebuild the table with typed empty columns. Alignment is required + * for later evaluation to produce type-matching table_union inputs, + * so any failure here must also set prog->eval_err = true — silently + * returning would leave the IDB schema unaligned and dl_eval would + * later hit a ray_vec_concat type mismatch without any error signal. */ + ray_t* fresh = ray_table_new(rel->arity); + if (!fresh) { prog->eval_err = true; return; } + if (RAY_IS_ERR(fresh)) { prog->eval_err = true; ray_error_free(fresh); return; } + for (int c = 0; c < rel->arity; c++) { + ray_t* empty_col = ray_vec_new(desired[c], 0); + if (!empty_col) { prog->eval_err = true; ray_release(fresh); return; } + if (RAY_IS_ERR(empty_col)) { + prog->eval_err = true; + ray_error_free(empty_col); + ray_release(fresh); + return; + } + ray_t* prev = fresh; + fresh = ray_table_add_col(fresh, rel->col_names[c], empty_col); + ray_release(empty_col); + if (!fresh) { prog->eval_err = true; ray_release(prev); return; } + if (RAY_IS_ERR(fresh)) { + prog->eval_err = true; + ray_release(prev); + ray_error_free(fresh); + return; + } + } + ray_release(rel->table); + rel->table = fresh; +} + +int dl_add_rule(dl_program_t* prog, const dl_rule_t* rule) { + if (!prog || !rule || prog->n_rules >= DL_MAX_RULES) + return -1; + int idx = prog->n_rules++; + memcpy(&prog->rules[idx], rule, sizeof(dl_rule_t)); + prog->rules[idx].stratum = -1; + + /* Ensure IDB relation exists for the head predicate */ + dl_ensure_idb(prog, rule->head_pred, rule->head_arity); + + /* Align IDB column types to any typed head constants in this rule. + * Must run before evaluation so table_union/concat see matching types. */ + dl_idb_align_head_const_types(prog, rule); + + return idx; +} + +/* ======================================================================== + * Rule builder helpers + * ======================================================================== */ + +void dl_rule_init(dl_rule_t* rule, const char* head_pred, int head_arity) { + memset(rule, 0, sizeof(dl_rule_t)); + size_t len = strlen(head_pred); + if (len >= sizeof(rule->head_pred)) len = sizeof(rule->head_pred) - 1; + memcpy(rule->head_pred, head_pred, len); + rule->head_pred[len] = '\0'; + rule->head_arity = head_arity; + rule->n_body = 0; + rule->n_vars = 0; + rule->stratum = -1; + for (int i = 0; i < DL_MAX_ARITY; i++) + rule->head_vars[i] = DL_CONST; +} + +void dl_rule_head_var(dl_rule_t* rule, int pos, int var_idx) { + if (pos < 0 || pos >= rule->head_arity) return; + rule->head_vars[pos] = var_idx; + rule->head_const_types[pos] = 0; + if (var_idx + 1 > rule->n_vars) rule->n_vars = var_idx + 1; +} + +void dl_rule_head_const_typed(dl_rule_t* rule, int pos, int64_t val, int8_t type) { + if (pos < 0 || pos >= rule->head_arity) return; + /* Default to RAY_I64 if an unrecognized type sneaks through; keeps + * old-callers-with-no-type compat when writing to the slot. */ + if (type != RAY_I64 && type != RAY_SYM && type != RAY_F64) + type = RAY_I64; + rule->head_vars[pos] = DL_CONST; + rule->head_consts[pos] = val; + rule->head_const_types[pos] = type; +} + +/* Backward-compatible I64 wrapper. Pre-aggregates-PR external callers + * used this 3-arg form; it now forwards to the typed variant with + * RAY_I64. */ +void dl_rule_head_const(dl_rule_t* rule, int pos, int64_t val) { + dl_rule_head_const_typed(rule, pos, val, RAY_I64); +} + +void dl_rule_head_const_f64(dl_rule_t* rule, int pos, double val) { + int64_t bits; + memcpy(&bits, &val, sizeof(bits)); + dl_rule_head_const_typed(rule, pos, bits, RAY_F64); +} + +int dl_rule_add_atom(dl_rule_t* rule, const char* pred, int arity) { + if (rule->n_body >= DL_MAX_BODY) return -1; + int idx = rule->n_body++; + dl_body_t* b = &rule->body[idx]; + memset(b, 0, sizeof(dl_body_t)); + b->type = DL_POS; + size_t len = strlen(pred); + if (len >= sizeof(b->pred)) len = sizeof(b->pred) - 1; + memcpy(b->pred, pred, len); + b->pred[len] = '\0'; + b->arity = arity; + for (int i = 0; i < DL_MAX_ARITY; i++) + b->vars[i] = DL_CONST; + return idx; +} + +void dl_body_set_var(dl_rule_t* rule, int body_idx, int pos, int var_idx) { + if (body_idx < 0 || body_idx >= rule->n_body) return; + if (pos < 0 || pos >= rule->body[body_idx].arity) return; + rule->body[body_idx].vars[pos] = var_idx; + if (var_idx + 1 > rule->n_vars) rule->n_vars = var_idx + 1; +} + +void dl_body_set_const(dl_rule_t* rule, int body_idx, int pos, int64_t val) { + if (body_idx < 0 || body_idx >= rule->n_body) return; + if (pos < 0 || pos >= rule->body[body_idx].arity) return; + rule->body[body_idx].vars[pos] = DL_CONST; + rule->body[body_idx].const_vals[pos] = val; +} + +int dl_rule_add_neg(dl_rule_t* rule, const char* pred, int arity) { + int idx = dl_rule_add_atom(rule, pred, arity); + if (idx >= 0) rule->body[idx].type = DL_NEG; + return idx; +} + +int dl_rule_add_cmp(dl_rule_t* rule, int cmp_op, int lhs_var, int rhs_var) { + if (rule->n_body >= DL_MAX_BODY) return -1; + int idx = rule->n_body++; + dl_body_t* b = &rule->body[idx]; + memset(b, 0, sizeof(dl_body_t)); + b->type = DL_CMP; + b->cmp_op = cmp_op; + b->cmp_lhs = lhs_var; + b->cmp_rhs = rhs_var; + if (lhs_var + 1 > rule->n_vars) rule->n_vars = lhs_var + 1; + if (rhs_var + 1 > rule->n_vars) rule->n_vars = rhs_var + 1; + return idx; +} + +int dl_rule_add_cmp_const(dl_rule_t* rule, int cmp_op, int lhs_var, int64_t rhs_val) { + if (rule->n_body >= DL_MAX_BODY) return -1; + int idx = rule->n_body++; + dl_body_t* b = &rule->body[idx]; + memset(b, 0, sizeof(dl_body_t)); + b->type = DL_CMP; + b->cmp_op = cmp_op; + b->cmp_lhs = lhs_var; + b->cmp_rhs = DL_CONST; + b->cmp_const = rhs_val; + if (lhs_var + 1 > rule->n_vars) rule->n_vars = lhs_var + 1; + return idx; +} + +/* ======================================================================== + * Expression tree builders + * ======================================================================== */ + +static dl_expr_t* dl_expr_alloc(void) { + ray_t* block = ray_alloc(sizeof(dl_expr_t)); + if (!block) return NULL; + dl_expr_t* e = (dl_expr_t*)ray_data(block); + memset(e, 0, sizeof(dl_expr_t)); + return e; +} + +dl_expr_t* dl_expr_const(int64_t val) { + dl_expr_t* e = dl_expr_alloc(); + if (!e) return NULL; + e->kind = DL_EXPR_CONST; + e->const_val = val; + return e; +} + +dl_expr_t* dl_expr_const_f64(double val) { + dl_expr_t* e = dl_expr_alloc(); + if (!e) return NULL; + e->kind = DL_EXPR_CONST_F64; + e->const_f64 = val; + return e; +} + +dl_expr_t* dl_expr_var(int var_idx) { + dl_expr_t* e = dl_expr_alloc(); + if (!e) return NULL; + e->kind = DL_EXPR_VAR; + e->var_idx = var_idx; + return e; +} + +dl_expr_t* dl_expr_binop(int op, dl_expr_t* left, dl_expr_t* right) { + dl_expr_t* e = dl_expr_alloc(); + if (!e) return NULL; + e->kind = DL_EXPR_BINOP; + e->binop = op; + e->left = left; + e->right = right; + return e; +} + +/* ======================================================================== + * Assignment and builtin rule builders + * ======================================================================== */ + +int dl_rule_add_assign(dl_rule_t* rule, int target_var, int op, dl_expr_t* expr) { + if (rule->n_body >= DL_MAX_BODY) return -1; + int idx = rule->n_body++; + dl_body_t* b = &rule->body[idx]; + memset(b, 0, sizeof(dl_body_t)); + b->type = DL_ASSIGN; + b->assign_var = target_var; + b->assign_expr = expr; + if (target_var + 1 > rule->n_vars) rule->n_vars = target_var + 1; + (void)op; /* reserved for future assignment operators */ + return idx; +} + +int dl_rule_add_builtin(dl_rule_t* rule, int builtin_id, int arity) { + if (rule->n_body >= DL_MAX_BODY) return -1; + int idx = rule->n_body++; + dl_body_t* b = &rule->body[idx]; + memset(b, 0, sizeof(dl_body_t)); + b->type = DL_BUILTIN; + b->builtin_id = builtin_id; + b->arity = arity; + for (int i = 0; i < DL_MAX_ARITY; i++) + b->vars[i] = DL_CONST; + return idx; +} + +static int dl_expr_max_var(const dl_expr_t* e) { + if (!e) return -1; + if (e->kind == DL_EXPR_VAR) return e->var_idx; + if (e->kind == DL_EXPR_BINOP) { + int l = dl_expr_max_var(e->left); + int r = dl_expr_max_var(e->right); + return l > r ? l : r; + } + return -1; +} + +int dl_rule_add_cmp_expr(dl_rule_t* rule, int cmp_op, dl_expr_t* lhs, dl_expr_t* rhs) { + if (rule->n_body >= DL_MAX_BODY) return -1; + int idx = rule->n_body++; + dl_body_t* b = &rule->body[idx]; + memset(b, 0, sizeof(dl_body_t)); + b->type = DL_CMP; + b->cmp_op = cmp_op; + b->cmp_lhs_expr = lhs; + b->cmp_rhs_expr = rhs; + /* Update n_vars from the expression trees */ + int mv = dl_expr_max_var(lhs); + int rv = dl_expr_max_var(rhs); + if (rv > mv) mv = rv; + if (mv + 1 > rule->n_vars) rule->n_vars = mv + 1; + return idx; +} + +int dl_rule_add_interval(dl_rule_t* rule, int fact_var, int start_var, int end_var) { + if (rule->n_body >= DL_MAX_BODY) return -1; + int idx = rule->n_body++; + dl_body_t* b = &rule->body[idx]; + memset(b, 0, sizeof(dl_body_t)); + b->type = DL_INTERVAL; + b->interval_fact_var = fact_var; + b->interval_start_var = start_var; + b->interval_end_var = end_var; + if (fact_var + 1 > rule->n_vars) rule->n_vars = fact_var + 1; + if (start_var + 1 > rule->n_vars) rule->n_vars = start_var + 1; + if (end_var + 1 > rule->n_vars) rule->n_vars = end_var + 1; + return idx; +} + +int dl_rule_add_agg(dl_rule_t* rule, int op, int target_var, + const char* pred, int pred_arity, int value_col) { + if (rule->n_body >= DL_MAX_BODY) return -1; + int idx = rule->n_body++; + dl_body_t* b = &rule->body[idx]; + memset(b, 0, sizeof(*b)); + b->type = DL_AGG; + b->agg_op = op; + b->agg_target_var = target_var; + snprintf(b->agg_pred, sizeof(b->agg_pred), "%s", pred); + b->agg_arity = pred_arity; + b->agg_value_col = value_col; + b->agg_n_group_keys = 0; + if (target_var + 1 > rule->n_vars) rule->n_vars = target_var + 1; + return idx; +} + +int dl_rule_agg_set_group(dl_rule_t* rule, int body_idx, + const int* key_vars, const int* key_cols, int n_keys) { + if (!rule || body_idx < 0 || body_idx >= rule->n_body) return -1; + if (n_keys < 0 || n_keys > DL_AGG_MAX_KEYS) return -1; + dl_body_t* b = &rule->body[body_idx]; + if (b->type != DL_AGG) return -1; + b->agg_n_group_keys = n_keys; + for (int i = 0; i < n_keys; i++) { + b->agg_group_key_vars[i] = key_vars[i]; + b->agg_group_key_cols[i] = key_cols[i]; + if (key_vars[i] + 1 > rule->n_vars) + rule->n_vars = key_vars[i] + 1; + } + return 0; +} + +/* ======================================================================== + * Stratification — topological sort on negation dependency graph + * ======================================================================== */ + +int dl_stratify(dl_program_t* prog) { + if (!prog) return -1; + + /* Build dependency graph: for each IDB predicate, which other IDB + * predicates does it depend on positively or negatively? */ + int n = prog->n_rels; + /* dep[i][j]: 0 = no dep, 1 = positive dep, 2 = negative dep */ + int dep[DL_MAX_RELS][DL_MAX_RELS]; + memset(dep, 0, sizeof(dep)); + + for (int r = 0; r < prog->n_rules; r++) { + dl_rule_t* rule = &prog->rules[r]; + int head_idx = dl_find_rel(prog, rule->head_pred); + if (head_idx < 0) continue; + + for (int b = 0; b < rule->n_body; b++) { + dl_body_t* body = &rule->body[b]; + if (body->type == DL_AGG) { + /* Aggregates are non-monotonic: head must live in a higher + * stratum than the predicate being aggregated. */ + int body_idx = dl_find_rel(prog, body->agg_pred); + if (body_idx < 0) continue; + dep[head_idx][body_idx] = 2; /* negative (non-monotonic) dep */ + continue; + } + if (body->type != DL_POS && body->type != DL_NEG) continue; + int body_idx = dl_find_rel(prog, body->pred); + if (body_idx < 0) continue; + if (body->type == DL_NEG) + dep[head_idx][body_idx] = 2; /* negative dep */ + else if (dep[head_idx][body_idx] == 0) + dep[head_idx][body_idx] = 1; /* positive dep (don't override neg) */ + } + } + + /* Assign strata: predicates with no negative dependencies go to stratum 0. + * A predicate with a negative dep on stratum S goes to stratum S+1. + * Repeat until stable. If unstable after n iterations, there's a cycle. */ + int stratum[DL_MAX_RELS]; + memset(stratum, 0, sizeof(stratum)); + + for (int iter = 0; iter < n + 1; iter++) { + bool changed = false; + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + if (dep[i][j] == 2) { + /* Negative dependency: head must be in higher stratum */ + if (stratum[i] <= stratum[j]) { + stratum[i] = stratum[j] + 1; + changed = true; + } + } else if (dep[i][j] == 1) { + /* Positive dependency: head must be >= stratum */ + if (stratum[i] < stratum[j]) { + stratum[i] = stratum[j]; + changed = true; + } + } + } + } + if (!changed) break; + if (iter == n) return -1; /* unstratifiable negation cycle */ + } + + /* Build strata arrays */ + int max_stratum = 0; + for (int i = 0; i < n; i++) { + if (stratum[i] > max_stratum) max_stratum = stratum[i]; + } + prog->n_strata = max_stratum + 1; + memset(prog->strata_sizes, 0, sizeof(prog->strata_sizes)); + + for (int i = 0; i < n; i++) { + int s = stratum[i]; + if (s < DL_MAX_STRATA && prog->strata_sizes[s] < DL_MAX_RELS) { + prog->strata[s][prog->strata_sizes[s]++] = i; + } + } + + /* Assign stratum to each rule */ + for (int r = 0; r < prog->n_rules; r++) { + int head_idx = dl_find_rel(prog, prog->rules[r].head_pred); + if (head_idx >= 0) + prog->rules[r].stratum = stratum[head_idx]; + } + + return 0; +} + +/* ======================================================================== + * Rule compiler — materializing approach + * + * Instead of building a single graph with joins, we execute each body + * atom separately, producing intermediate tables, and join them C-level. + * This avoids column-name-collision issues in the graph-level join. + * ======================================================================== */ + +/* ======================================================================== + * Expression evaluation — compute column from expression tree + * ======================================================================== */ + +/* Helper: materialize a column of the given type/size as a copy or promotion + * of src. If target==RAY_F64 and src is RAY_I64, promote. Returns new owned column. */ +static ray_t* dl_col_as_f64(ray_t* src, int64_t nrows) { + ray_t* out = ray_vec_new(RAY_F64, nrows); + if (!out) return NULL; + if (RAY_IS_ERR(out)) { ray_error_free(out); return NULL; } + out->len = nrows; + double* od = (double*)ray_data(out); + if (src->type == RAY_F64) { + memcpy(od, ray_data(src), (size_t)nrows * sizeof(double)); + } else { /* RAY_I64 */ + int64_t* sd = (int64_t*)ray_data(src); + for (int64_t r = 0; r < nrows; r++) od[r] = (double)sd[r]; + } + return out; +} + +/* Evaluate an expression tree against the accumulator table. + * Returns a new owned vector of length nrows. The element type is RAY_F64 + * if the expression involves any float constant or any RAY_F64 source column, + * otherwise RAY_I64. */ +static ray_t* dl_eval_expr(dl_expr_t* expr, ray_t* accum, + int* var_col, int64_t nrows) { + if (!expr) return NULL; + + switch (expr->kind) { + case DL_EXPR_CONST: { + ray_t* col = ray_vec_new(RAY_I64, nrows); + if (!col) return NULL; + if (RAY_IS_ERR(col)) { ray_error_free(col); return NULL; } + col->len = nrows; + int64_t* d = (int64_t*)ray_data(col); + for (int64_t r = 0; r < nrows; r++) + d[r] = expr->const_val; + return col; + } + case DL_EXPR_CONST_F64: { + ray_t* col = ray_vec_new(RAY_F64, nrows); + if (!col) return NULL; + if (RAY_IS_ERR(col)) { ray_error_free(col); return NULL; } + col->len = nrows; + double* d = (double*)ray_data(col); + for (int64_t r = 0; r < nrows; r++) + d[r] = expr->const_f64; + return col; + } + case DL_EXPR_VAR: { + int ci = var_col[expr->var_idx]; + ray_t* src = ray_table_get_col_idx(accum, ci); + if (!src) return NULL; + if (src->type != RAY_I64 && src->type != RAY_F64) return NULL; + size_t elem = (src->type == RAY_F64) ? sizeof(double) : sizeof(int64_t); + ray_t* dst = ray_vec_new(src->type, nrows); + if (!dst) return NULL; + if (RAY_IS_ERR(dst)) { ray_error_free(dst); return NULL; } + dst->len = nrows; + memcpy(ray_data(dst), ray_data(src), (size_t)nrows * elem); + return dst; + } + case DL_EXPR_BINOP: { + ray_t* lv = dl_eval_expr(expr->left, accum, var_col, nrows); + ray_t* rv = dl_eval_expr(expr->right, accum, var_col, nrows); + if (!lv || !rv) { + if (lv) ray_release(lv); + if (rv) ray_release(rv); + return NULL; + } + bool is_f64 = (lv->type == RAY_F64) || (rv->type == RAY_F64); + if (is_f64) { + ray_t* lf = dl_col_as_f64(lv, nrows); + ray_t* rf = dl_col_as_f64(rv, nrows); + ray_release(lv); ray_release(rv); + if (!lf || !rf) { + if (lf) ray_release(lf); + if (rf) ray_release(rf); + return NULL; + } + ray_t* out = ray_vec_new(RAY_F64, nrows); + if (!out) { ray_release(lf); ray_release(rf); return NULL; } + if (RAY_IS_ERR(out)) { + ray_error_free(out); + ray_release(lf); ray_release(rf); return NULL; + } + out->len = nrows; + double* ld = (double*)ray_data(lf); + double* rd = (double*)ray_data(rf); + double* od = (double*)ray_data(out); + for (int64_t r = 0; r < nrows; r++) { + switch (expr->binop) { + case OP_ADD: od[r] = ld[r] + rd[r]; break; + case OP_SUB: od[r] = ld[r] - rd[r]; break; + case OP_MUL: od[r] = ld[r] * rd[r]; break; + case OP_DIV: od[r] = rd[r] != 0.0 ? ld[r] / rd[r] : 0.0; break; + default: od[r] = 0.0; break; + } + } + ray_release(lf); ray_release(rf); + return out; + } + ray_t* out = ray_vec_new(RAY_I64, nrows); + if (!out) { ray_release(lv); ray_release(rv); return NULL; } + if (RAY_IS_ERR(out)) { + ray_error_free(out); + ray_release(lv); ray_release(rv); return NULL; + } + out->len = nrows; + int64_t* ld = (int64_t*)ray_data(lv); + int64_t* rd = (int64_t*)ray_data(rv); + int64_t* od = (int64_t*)ray_data(out); + for (int64_t r = 0; r < nrows; r++) { + switch (expr->binop) { + case OP_ADD: od[r] = ld[r] + rd[r]; break; + case OP_SUB: od[r] = ld[r] - rd[r]; break; + case OP_MUL: od[r] = ld[r] * rd[r]; break; + case OP_DIV: od[r] = rd[r] != 0 ? ld[r] / rd[r] : 0; break; + default: od[r] = 0; break; + } + } + ray_release(lv); + ray_release(rv); + return out; + } + } + return NULL; +} + +/* Helper: append a new column to a table. Returns new owned table. */ +static ray_t* dl_table_add_computed_col(ray_t* tbl, ray_t* new_col, const char* name) { + int64_t ncols = ray_table_ncols(tbl); + ray_t* out = ray_table_new((int)(ncols + 1)); + for (int64_t c = 0; c < ncols; c++) { + ray_t* col = ray_table_get_col_idx(tbl, c); + if (col) + out = ray_table_add_col(out, ray_table_col_name(tbl, c), col); + } + int64_t sym = ray_sym_intern(name, strlen(name)); + out = ray_table_add_col(out, sym, new_col); + return out; +} + +/* ======================================================================== + * Builtin predicate evaluation helpers + * ======================================================================== */ + +/* before(S, E, T): keep rows where T < S */ +static ray_t* dl_builtin_before(ray_t* tbl, int s_col, int t_col) { + if (!tbl || RAY_IS_ERR(tbl) || ray_table_nrows(tbl) == 0) return tbl; + + int64_t nrows = ray_table_nrows(tbl); + int64_t ncols = ray_table_ncols(tbl); + int64_t* sd = (int64_t*)ray_data(ray_table_get_col_idx(tbl, s_col)); + int64_t* t_data = (int64_t*)ray_data(ray_table_get_col_idx(tbl, t_col)); + + int64_t count = 0; + for (int64_t r = 0; r < nrows; r++) + if (t_data[r] < sd[r]) count++; + + if (count == nrows) { ray_retain(tbl); return tbl; } + + ray_t* out = ray_table_new((int)ncols); + for (int64_t c = 0; c < ncols; c++) { + ray_t* src = ray_table_get_col_idx(tbl, c); + if (!src) continue; + ray_t* dst = ray_vec_new(src->type, count); + if (!dst || RAY_IS_ERR(dst)) continue; + dst->len = count; + int64_t* src_d = (int64_t*)ray_data(src); + int64_t* dst_d = (int64_t*)ray_data(dst); + int64_t j = 0; + for (int64_t r = 0; r < nrows; r++) + if (t_data[r] < sd[r]) + dst_d[j++] = src_d[r]; + out = ray_table_add_col(out, ray_table_col_name(tbl, c), dst); + ray_release(dst); + } + return out; +} + +/* duration_since(T1, T2, D): compute D = T2 - T1, append as new column */ +static ray_t* dl_builtin_duration_since(ray_t* tbl, int t1_col, int t2_col, + const char* out_name) { + if (!tbl || RAY_IS_ERR(tbl)) return tbl; + int64_t nrows = ray_table_nrows(tbl); + int64_t* t1 = (int64_t*)ray_data(ray_table_get_col_idx(tbl, t1_col)); + int64_t* t2 = (int64_t*)ray_data(ray_table_get_col_idx(tbl, t2_col)); + + ray_t* col = ray_vec_new(RAY_I64, nrows); + if (!col || RAY_IS_ERR(col)) { ray_retain(tbl); return tbl; } + col->len = nrows; + int64_t* d = (int64_t*)ray_data(col); + for (int64_t r = 0; r < nrows; r++) + d[r] = t2[r] - t1[r]; + + ray_t* out = dl_table_add_computed_col(tbl, col, out_name); + ray_release(col); + return out; +} + +/* abs(X, Y): compute Y = |X|, append as new column */ +static ray_t* dl_builtin_abs(ray_t* tbl, int x_col, const char* out_name) { + if (!tbl || RAY_IS_ERR(tbl)) return tbl; + int64_t nrows = ray_table_nrows(tbl); + int64_t* xd = (int64_t*)ray_data(ray_table_get_col_idx(tbl, x_col)); + + ray_t* col = ray_vec_new(RAY_I64, nrows); + if (!col || RAY_IS_ERR(col)) { ray_retain(tbl); return tbl; } + col->len = nrows; + int64_t* d = (int64_t*)ray_data(col); + for (int64_t r = 0; r < nrows; r++) + d[r] = xd[r] < 0 ? -xd[r] : xd[r]; + + ray_t* out = dl_table_add_computed_col(tbl, col, out_name); + ray_release(col); + return out; +} + +/* Helper: join two tables on specified column pairs. Returns new owned table. + * left_cols[k] and right_cols[k] are column indices in left/right tables. */ +static ray_t* dl_join_tables(ray_t* left, ray_t* right, + const int* left_cols, const int* right_cols, int n_keys) { + if (!left || RAY_IS_ERR(left) || !right || RAY_IS_ERR(right)) return NULL; + if (ray_table_nrows(left) == 0 || ray_table_nrows(right) == 0) { + /* Return empty table with left+right non-key columns */ + int64_t lnc = ray_table_ncols(left); + int64_t rnc = ray_table_ncols(right); + ray_t* empty = ray_table_new((int)(lnc + rnc)); + for (int64_t c = 0; c < lnc; c++) { + ray_t* col = ray_table_get_col_idx(left, c); + if (!col) continue; + ray_t* ec = ray_vec_new(col->type, 0); + if (ec && !RAY_IS_ERR(ec)) { + empty = ray_table_add_col(empty, ray_table_col_name(left, c), ec); + ray_release(ec); + } + } + return empty; + } + + /* Build unique column names for the join using a single graph */ + ray_graph_t* g = ray_graph_new(NULL); + if (!g) return NULL; + + /* Create copies with unique names */ + int64_t lnc = ray_table_ncols(left); + int64_t rnc = ray_table_ncols(right); + ray_t* ltbl = ray_table_new((int)lnc); + for (int64_t c = 0; c < lnc; c++) { + ray_t* col = ray_table_get_col_idx(left, c); + if (!col) continue; + char name[32]; snprintf(name, sizeof(name), "L%d", (int)c); + int64_t sym = ray_sym_intern(name, strlen(name)); + ltbl = ray_table_add_col(ltbl, sym, col); + } + ray_t* rtbl = ray_table_new((int)rnc); + for (int64_t c = 0; c < rnc; c++) { + ray_t* col = ray_table_get_col_idx(right, c); + if (!col) continue; + char name[32]; snprintf(name, sizeof(name), "R%d", (int)c); + int64_t sym = ray_sym_intern(name, strlen(name)); + rtbl = ray_table_add_col(rtbl, sym, col); + } + + uint16_t l_tid = ray_graph_add_table(g, ltbl); + uint16_t r_tid = ray_graph_add_table(g, rtbl); + ray_op_t* l_op = ray_const_table(g, ltbl); + ray_op_t* r_op = ray_const_table(g, rtbl); + + ray_op_t* lkeys[DL_MAX_ARITY]; + ray_op_t* rkeys[DL_MAX_ARITY]; + for (int k = 0; k < n_keys; k++) { + char lname[32]; snprintf(lname, sizeof(lname), "L%d", left_cols[k]); + char rname[32]; snprintf(rname, sizeof(rname), "R%d", right_cols[k]); + lkeys[k] = ray_scan_table(g, l_tid, lname); + rkeys[k] = ray_scan_table(g, r_tid, rname); + } + + ray_op_t* join = ray_join(g, l_op, lkeys, r_op, rkeys, (uint8_t)n_keys, 0); + ray_t* result = ray_execute(g, join); + ray_graph_free(g); + ray_release(ltbl); + ray_release(rtbl); + return result; +} + +/* Helper: antijoin two tables on specified column pairs. Returns new owned table. */ +static ray_t* dl_antijoin_tables(ray_t* left, ray_t* right, + const int* left_cols, const int* right_cols, int n_keys) { + if (!left || RAY_IS_ERR(left)) return left; + if (!right || RAY_IS_ERR(right) || ray_table_nrows(right) == 0) { + ray_retain(left); return left; + } + if (ray_table_nrows(left) == 0) { ray_retain(left); return left; } + + ray_graph_t* g = ray_graph_new(NULL); + if (!g) { ray_retain(left); return left; } + + int64_t lnc = ray_table_ncols(left); + int64_t rnc = ray_table_ncols(right); + ray_t* ltbl = ray_table_new((int)lnc); + for (int64_t c = 0; c < lnc; c++) { + ray_t* col = ray_table_get_col_idx(left, c); + if (!col) continue; + char name[32]; snprintf(name, sizeof(name), "L%d", (int)c); + ltbl = ray_table_add_col(ltbl, ray_sym_intern(name, strlen(name)), col); + } + ray_t* rtbl = ray_table_new((int)rnc); + for (int64_t c = 0; c < rnc; c++) { + ray_t* col = ray_table_get_col_idx(right, c); + if (!col) continue; + char name[32]; snprintf(name, sizeof(name), "R%d", (int)c); + rtbl = ray_table_add_col(rtbl, ray_sym_intern(name, strlen(name)), col); + } + + uint16_t l_tid = ray_graph_add_table(g, ltbl); + uint16_t r_tid = ray_graph_add_table(g, rtbl); + ray_op_t* l_op = ray_const_table(g, ltbl); + ray_op_t* r_op = ray_const_table(g, rtbl); + + ray_op_t* lkeys[DL_MAX_ARITY]; + ray_op_t* rkeys[DL_MAX_ARITY]; + for (int k = 0; k < n_keys; k++) { + char lname[32]; snprintf(lname, sizeof(lname), "L%d", left_cols[k]); + char rname[32]; snprintf(rname, sizeof(rname), "R%d", right_cols[k]); + lkeys[k] = ray_scan_table(g, l_tid, lname); + rkeys[k] = ray_scan_table(g, r_tid, rname); + } + + ray_op_t* aj = ray_antijoin(g, l_op, lkeys, r_op, rkeys, (uint8_t)n_keys); + ray_t* result = ray_execute(g, aj); + ray_graph_free(g); + ray_release(ltbl); + ray_release(rtbl); + return result; +} + +/* Helper: filter a table to rows where column col_idx == value */ +/* Row-at-index read helper: read an I64 from either a RAY_I64 column + * or from a RAY_SYM column (of any adaptive width) as a sym ID. Other + * types aren't supported by the constant-filter path and cause the + * caller to pass through the input table unchanged. */ +static bool dl_col_eq_row(ray_t* col, int64_t row, int64_t value) { + if (col->type == RAY_I64) return ((int64_t*)ray_data(col))[row] == value; + if (col->type == RAY_SYM) + return ray_read_sym(ray_data(col), row, col->type, col->attrs) == value; + return false; +} + +static ray_t* dl_filter_eq(ray_t* tbl, int col_idx, int64_t value) { + /* Contract: always return an owned reference (rc bumped) so the + * caller can release uniformly. Every pass-through must therefore + * retain — else the caller's `ray_release(body_tbl); body_tbl = + * filtered;` pattern would leave body_tbl under-referenced and a + * later release could land on freed memory. */ + if (!tbl || RAY_IS_ERR(tbl)) { if (tbl) ray_retain(tbl); return tbl; } + if (ray_table_nrows(tbl) == 0) { ray_retain(tbl); return tbl; } + + ray_t* col = ray_table_get_col_idx(tbl, col_idx); + if (!col) { ray_retain(tbl); return tbl; } + /* Non-numeric, non-sym keys: not supported by this filter — pass + * through (retained) rather than miscompare via raw memcpy. */ + if (col->type != RAY_I64 && col->type != RAY_SYM) { + ray_retain(tbl); + return tbl; + } + + int64_t nrows = ray_table_nrows(tbl); + int64_t ncols = ray_table_ncols(tbl); + + /* Count matching rows — type-aware read for RAY_SYM adaptive width. */ + int64_t count = 0; + for (int64_t r = 0; r < nrows; r++) + if (dl_col_eq_row(col, r, value)) count++; + + if (count == nrows) { ray_retain(tbl); return tbl; } + + /* Build filtered table. Each surviving column is allocated with + * its source's element-size (via ray_sym_elem_size) so narrow-SYM + * stays narrow rather than being silently widened to W64. */ + ray_t* out = ray_table_new((int)ncols); + if (!out) return ray_error("memory", "dl_filter_eq: table_new"); + if (RAY_IS_ERR(out)) return out; + for (int64_t c = 0; c < ncols; c++) { + ray_t* src = ray_table_get_col_idx(tbl, c); + if (!src) { + ray_release(out); + return ray_error("domain", "dl_filter_eq: missing source column"); + } + ray_t* dst = (src->type == RAY_SYM) + ? ray_sym_vec_new(src->attrs & RAY_SYM_W_MASK, count) + : ray_vec_new(src->type, count); + if (!dst) { ray_release(out); return ray_error("memory", "dl_filter_eq: vec_new"); } + if (RAY_IS_ERR(dst)) { ray_error_free(dst); ray_release(out); return ray_error("memory", "dl_filter_eq: vec_new"); } + dst->len = count; + uint8_t esz = ray_sym_elem_size(src->type, src->attrs); + const uint8_t* src_b = (const uint8_t*)ray_data(src); + uint8_t* dst_b = (uint8_t*)ray_data(dst); + int64_t j = 0; + for (int64_t r = 0; r < nrows; r++) { + if (dl_col_eq_row(col, r, value)) { + memcpy(dst_b + (size_t)j * esz, + src_b + (size_t)r * esz, + (size_t)esz); + j++; + } + } + if (src->type == RAY_STR) col_propagate_str_pool(dst, src); + ray_t* next = ray_table_add_col(out, ray_table_col_name(tbl, c), dst); + ray_release(dst); + /* ray_table_add_col does not release `out` on failure, so we + * must release the partially-built table before bailing out. */ + if (!next) { + ray_release(out); + return ray_error("memory", "dl_filter_eq: add_col"); + } + if (RAY_IS_ERR(next)) { + ray_release(out); + return next; + } + out = next; + } + return out; +} + +/* Helper: build a fully-owned broadcast column for a constant head slot. + * + * Returns a fresh ray_t* vec with refcount 1, caller-owned. The caller is + * expected to hand the ref to a table via ray_table_add_col (which retains) + * and then ray_release our owning ref, leaving the table as sole owner. + * + * Correctness note: this must be a real, heap-allocated vec — not a view + * onto rule-local scratch — so that the IDB relation table can outlive the + * per-iteration scratch that built it. Cross-IDB reads at subsequent + * strata borrow from this column via ray_table_get_col_idx. */ +/* width_template: when type == RAY_SYM, this column is consulted for its + * SYM attrs/width so the broadcast matches the IDB relation's existing + * adaptive width (otherwise ray_vec_new would default to W64 and a + * later table_union would hit a ray_vec_concat width mismatch). Pass + * NULL (no existing column) to get the W64 default. Using a pointer + * here rather than a uint8_t hint avoids the W8=0 sentinel ambiguity + * of an "a zero hint means default" convention. */ +static ray_t* dl_broadcast_const_col(int64_t nrows, int8_t type, int64_t val, + const ray_t* width_template) { + if (type != RAY_I64 && type != RAY_SYM && type != RAY_F64) { + return ray_error("type", NULL); + } + uint8_t sym_w = RAY_SYM_W64; + if (type == RAY_SYM && width_template && width_template->type == RAY_SYM) + sym_w = width_template->attrs & RAY_SYM_W_MASK; + ray_t* v = (type == RAY_SYM) + ? ray_sym_vec_new(sym_w, nrows) + : ray_vec_new(type, nrows); + if (!v || RAY_IS_ERR(v)) return v; + v->len = nrows; + + if (type == RAY_SYM) { + /* Use the generic writer so it handles any adaptive width. */ + void* data = ray_data(v); + for (int64_t i = 0; i < nrows; i++) { + ray_write_sym(data, i, (uint64_t)val, v->type, v->attrs); + } + } else if (type == RAY_F64) { + double d; + memcpy(&d, &val, sizeof(d)); + double* data = (double*)ray_data(v); + for (int64_t i = 0; i < nrows; i++) data[i] = d; + } else { /* RAY_I64 */ + int64_t* data = (int64_t*)ray_data(v); + for (int64_t i = 0; i < nrows; i++) data[i] = val; + } + return v; +} + +/* Helper: project table to selected columns, producing output with head relation naming. + * + * For each output slot c: + * - if col_indices[c] >= 0, copy that column from `tbl` + * - else (constant slot), synthesize a broadcast column from head_consts[c] + * with type head_const_types[c]. */ +static ray_t* dl_project(ray_t* tbl, const int* col_indices, int n_out, + dl_rel_t* head_rel, const int64_t* head_consts, + const int8_t* head_const_types) { + if (!tbl || RAY_IS_ERR(tbl)) return tbl; + int64_t nrows = ray_table_nrows(tbl); + ray_t* out = ray_table_new(n_out); + if (!out || RAY_IS_ERR(out)) + return out ? out : ray_error("memory", "dl_project: table_new"); + /* If accum collapsed to zero rows (e.g. antijoin removed everything), + * its schema may have been dropped too. Fall back to the IDB's existing + * column types so downstream table_union sees a matching schema. */ + bool empty_accum = (nrows == 0); + for (int c = 0; c < n_out; c++) { + int src_idx = col_indices[c]; + if (src_idx >= 0) { + ray_t* src = ray_table_get_col_idx(tbl, src_idx); + if (!src) { + if (empty_accum && head_rel && head_rel->table) { + ray_t* hcol = ray_table_get_col_idx(head_rel->table, c); + int8_t htype = hcol ? hcol->type : RAY_I64; + /* For SYM columns, preserve the head-relation's + * adaptive-width attrs — ray_vec_new(RAY_SYM, …) would + * force W64 and a later table_union onto a narrower + * head-rel column would hit the column-count check, + * or worse, produce a width-mismatched merge. */ + ray_t* ecol = (htype == RAY_SYM && hcol) + ? ray_sym_vec_new(hcol->attrs & RAY_SYM_W_MASK, 0) + : ray_vec_new(htype, 0); + if (!ecol) { + ray_release(out); + return ray_error("memory", "dl_project: empty col"); + } + if (RAY_IS_ERR(ecol)) { + ray_error_free(ecol); + ray_release(out); + return ray_error("memory", "dl_project: empty col"); + } + ray_t* next = ray_table_add_col(out, head_rel->col_names[c], ecol); + ray_release(ecol); + if (!next) { + ray_release(out); + return ray_error("memory", "dl_project: add_col"); + } + if (RAY_IS_ERR(next)) { + ray_release(out); + return next; + } + out = next; + continue; + } + ray_release(out); + return ray_error("domain", "dl_project: source column missing"); + } + /* Preserve SYM index width: ray_vec_new(RAY_SYM, …) would always + * produce a W64 vec, so memcpy'ing with the source's narrower + * element size would leave the upper bytes of each W64 slot + * uninitialized. ray_sym_vec_new mirrors src's attrs width. */ + ray_t* dst = (src->type == RAY_SYM) + ? ray_sym_vec_new(src->attrs & RAY_SYM_W_MASK, nrows) + : ray_vec_new(src->type, nrows); + if (!dst) { + ray_release(out); + return ray_error("memory", "dl_project: vec_new"); + } + if (RAY_IS_ERR(dst)) { + ray_error_free(dst); + ray_release(out); + return ray_error("memory", "dl_project: vec_new"); + } + dst->len = nrows; + uint8_t esz = ray_sym_elem_size(src->type, src->attrs); + if (esz == 0) { + ray_release(dst); + ray_release(out); + return ray_error("type", "dl_project: unsupported column type"); + } + memcpy(ray_data(dst), ray_data(src), (size_t)nrows * (size_t)esz); + /* RAY_STR stores 16-byte ray_str_t handles inline; strings >12 + * bytes keep their bytes in a per-vector pool referenced via + * pool_off. The memcpy above copies the handles but not the + * pool, so propagate the source's pool onto dst or later + * reads through pool_off would land in a NULL pool. */ + if (src->type == RAY_STR) col_propagate_str_pool(dst, src); + ray_t* next = ray_table_add_col(out, head_rel->col_names[c], dst); + ray_release(dst); + /* Release the partial `out` on failure — ray_table_add_col + * does not free its input on error. */ + if (!next) { + ray_release(out); + return ray_error("memory", "dl_project: add_col"); + } + if (RAY_IS_ERR(next)) { + ray_release(out); + return next; + } + out = next; + } else { + /* Constant head slot: materialize an owned broadcast column. */ + int8_t ctype = head_const_types ? head_const_types[c] : 0; + if (ctype == 0) { + ray_release(out); + return ray_error("domain", "dl_project: unset head-const type"); + } + /* When the head relation's slot is an existing SYM column + * (from a prior aligned rule), match its width so + * table_union's ray_vec_concat doesn't reject a W64 vs + * narrow mismatch. */ + const ray_t* width_tpl = NULL; + if (ctype == RAY_SYM && head_rel && head_rel->table) + width_tpl = ray_table_get_col_idx(head_rel->table, c); + ray_t* bcast = dl_broadcast_const_col(nrows, ctype, head_consts[c], width_tpl); + if (!bcast || RAY_IS_ERR(bcast)) { + ray_release(out); + return bcast ? bcast : ray_error("memory", "dl_project: broadcast"); + } + ray_t* next = ray_table_add_col(out, head_rel->col_names[c], bcast); + ray_release(bcast); + if (!next) { + ray_release(out); + return ray_error("memory", "dl_project: add_col"); + } + if (RAY_IS_ERR(next)) { + ray_release(out); + return next; + } + out = next; + } + } + return out; +} + +ray_op_t* dl_compile_rule(dl_program_t* prog, dl_rule_t* rule, + int delta_pos, int rule_idx, ray_graph_t* g) { + /* Materializing approach: execute body atoms one at a time. + * + * For each positive body atom, we get the relation table and apply + * constant filters. Then join with the accumulated result. + * Variable bindings track which column in the accumulated table + * holds each variable's value. + * + * var_col[v] = column index in `accum` table for variable v. + */ + int var_col[DL_MAX_ARITY * DL_MAX_BODY]; /* column index in accum per variable */ + bool var_bound[DL_MAX_ARITY * DL_MAX_BODY]; + memset(var_bound, 0, sizeof(var_bound)); + memset(var_col, -1, sizeof(var_col)); + + ray_t* accum = NULL; /* accumulated result table */ + + for (int b = 0; b < rule->n_body; b++) { + dl_body_t* body = &rule->body[b]; + if (body->type != DL_POS) continue; + + int rel_idx = dl_find_rel(prog, body->pred); + if (rel_idx < 0) { if (accum) ray_release(accum); return NULL; } + dl_rel_t* rel = &prog->rels[rel_idx]; + ray_t* body_tbl = rel->table; + ray_retain(body_tbl); + + /* Apply constant filters */ + for (int c = 0; c < body->arity; c++) { + if (body->vars[c] == DL_CONST) { + ray_t* filtered = dl_filter_eq(body_tbl, c, body->const_vals[c]); + ray_release(body_tbl); + if (!filtered) { + /* Treat as genuine failure — dl_filter_eq returns an + * owned reference on every non-NULL path, so NULL + * means something went wrong inside the helper. */ + if (accum) ray_release(accum); + prog->eval_err = true; + return NULL; + } + if (RAY_IS_ERR(filtered)) { + ray_error_free(filtered); + if (accum) ray_release(accum); + prog->eval_err = true; + return NULL; + } + body_tbl = filtered; + } + } + + if (accum == NULL) { + /* First body atom: accum = body_tbl */ + accum = body_tbl; + /* Bind variables to column indices */ + for (int c = 0; c < body->arity; c++) { + int v = body->vars[c]; + if (v == DL_CONST) continue; + if (!var_bound[v]) { + var_bound[v] = true; + var_col[v] = c; + } + } + } else { + /* Join accum with body_tbl on shared variables */ + int lkeys[DL_MAX_ARITY], rkeys[DL_MAX_ARITY]; + int n_jk = 0; + for (int c = 0; c < body->arity; c++) { + int v = body->vars[c]; + if (v == DL_CONST) continue; + if (var_bound[v]) { + lkeys[n_jk] = var_col[v]; + rkeys[n_jk] = c; + n_jk++; + } + } + + ray_t* joined; + if (n_jk > 0) { + joined = dl_join_tables(accum, body_tbl, lkeys, rkeys, n_jk); + } else { + /* Cross product: use dummy key */ + int lk0 = 0, rk0 = 0; + joined = dl_join_tables(accum, body_tbl, &lk0, &rk0, 0); + } + + int64_t accum_ncols = ray_table_ncols(accum); + ray_release(accum); + ray_release(body_tbl); + accum = joined; + + /* Bind new variables: their columns come after left columns in join output. + * Join output = [all left cols] + [non-key right cols]. + * We need to track which right columns appear in output. */ + int right_col_map[DL_MAX_ARITY]; /* right col c -> output col idx */ + int out_idx = (int)accum_ncols; + for (int c = 0; c < body->arity; c++) { + bool is_key = false; + for (int k = 0; k < n_jk; k++) { + if (rkeys[k] == c) { is_key = true; break; } + } + if (is_key) { + right_col_map[c] = -1; /* key col not in output */ + } else { + right_col_map[c] = out_idx++; + } + } + + for (int c = 0; c < body->arity; c++) { + int v = body->vars[c]; + if (v == DL_CONST) continue; + if (!var_bound[v]) { + var_bound[v] = true; + var_col[v] = right_col_map[c]; + } + } + } + } + + /* Rules with only aggregates (no positive body atoms) still need a + * one-row binding environment so aggregate results can be projected. */ + if (!accum) { + bool has_agg = false; + for (int bi = 0; bi < rule->n_body; bi++) { + if (rule->body[bi].type == DL_AGG) { + has_agg = true; + break; + } + } + if (!has_agg) + return NULL; + ray_t* one_val = ray_vec_new(RAY_I64, 1); + if (!one_val) { prog->eval_err = true; return NULL; } + if (RAY_IS_ERR(one_val)) { + ray_error_free(one_val); + prog->eval_err = true; + return NULL; + } + one_val->len = 1; + ((int64_t*)ray_data(one_val))[0] = 0; + accum = ray_table_new(1); + if (!accum) { + ray_release(one_val); + prog->eval_err = true; + return NULL; + } + if (RAY_IS_ERR(accum)) { + ray_error_free(accum); + ray_release(one_val); + prog->eval_err = true; + return NULL; + } + int64_t unit_sym = ray_sym_intern("_unit", 5); + ray_t* accum_unit = ray_table_add_col(accum, unit_sym, one_val); + ray_release(one_val); + /* ray_table_add_col doesn't free `accum` on error — release it + * ourselves so the partially-built table isn't leaked. */ + if (!accum_unit) { + ray_release(accum); + prog->eval_err = true; + return NULL; + } + if (RAY_IS_ERR(accum_unit)) { + ray_error_free(accum_unit); + ray_release(accum); + prog->eval_err = true; + return NULL; + } + accum = accum_unit; + } + + if (!accum) return NULL; + + /* Process non-join body literals in declared order. + * This ensures dependencies between literals (e.g., interval bind before + * assignment, assignment before comparison) are respected. */ + for (int b = 0; b < rule->n_body; b++) { + dl_body_t* body = &rule->body[b]; + if (body->type == DL_POS) continue; /* already processed above */ + if (!accum || RAY_IS_ERR(accum)) break; + + switch (body->type) { + case DL_NEG: { + int rel_idx = dl_find_rel(prog, body->pred); + if (rel_idx < 0) { ray_release(accum); return NULL; } + dl_rel_t* rel = &prog->rels[rel_idx]; + + /* Apply constant filters to the negated relation first */ + ray_t* neg_tbl = rel->table; + ray_retain(neg_tbl); + for (int c = 0; c < body->arity; c++) { + if (body->vars[c] == DL_CONST) { + ray_t* filtered = dl_filter_eq(neg_tbl, c, body->const_vals[c]); + ray_release(neg_tbl); + if (!filtered) { + ray_release(accum); + prog->eval_err = true; + return NULL; + } + if (RAY_IS_ERR(filtered)) { + ray_error_free(filtered); + ray_release(accum); + prog->eval_err = true; + return NULL; + } + neg_tbl = filtered; + } + } + + int lkeys[DL_MAX_ARITY], rkeys[DL_MAX_ARITY]; + int n_keys = 0; + for (int c = 0; c < body->arity; c++) { + int v = body->vars[c]; + if (v == DL_CONST) continue; + if (var_bound[v]) { + lkeys[n_keys] = var_col[v]; + rkeys[n_keys] = c; + n_keys++; + } + } + + if (n_keys > 0) { + ray_t* result = dl_antijoin_tables(accum, neg_tbl, lkeys, rkeys, n_keys); + ray_release(accum); + accum = result; + } + ray_release(neg_tbl); + break; + } + + case DL_ASSIGN: { + int64_t nrows = ray_table_nrows(accum); + ray_t* new_col = dl_eval_expr(body->assign_expr, accum, var_col, nrows); + /* Silently breaking would leave assign_var unbound and let + * the rest of the rule keep compiling with stale bindings, + * producing a dl_eval == 0 return alongside wrong rows. */ + if (!new_col) { + ray_release(accum); + prog->eval_err = true; + return NULL; + } + if (RAY_IS_ERR(new_col)) { + ray_error_free(new_col); + ray_release(accum); + prog->eval_err = true; + return NULL; + } + + int new_col_idx = (int)ray_table_ncols(accum); + char colname[32]; + snprintf(colname, sizeof(colname), "_a%d", body->assign_var); + ray_t* new_accum = dl_table_add_computed_col(accum, new_col, colname); + ray_release(new_col); + ray_release(accum); + if (!new_accum) { prog->eval_err = true; return NULL; } + if (RAY_IS_ERR(new_accum)) { + ray_error_free(new_accum); + prog->eval_err = true; + return NULL; + } + accum = new_accum; + + var_bound[body->assign_var] = true; + var_col[body->assign_var] = new_col_idx; + break; + } + + case DL_AGG: { + if (body->agg_n_group_keys > 0) { + /* Grouped aggregation: use rayforce's ray_group on src_table. + * + * Mixed-rule guard: this path assumes accum is the singleton + * _unit placeholder created for aggregate-only rules. If the + * rule has real positive body atoms, accum carries bound + * variables from a prior join that we would need to intersect + * against the group result — not yet supported. Bail early. */ + bool has_pos = false; + for (int bi = 0; bi < rule->n_body; bi++) { + if (rule->body[bi].type == DL_POS) { has_pos = true; break; } + } + if (has_pos) { + /* nyi: grouped aggregate + positive body atoms. + * Surface via eval_err so dl_eval reports failure + * instead of writing a warning to stderr in a + * non-debug build. */ + ray_release(accum); + prog->eval_err = true; + return NULL; + } + + int src_idx = dl_find_rel(prog, body->agg_pred); + if (src_idx < 0) { ray_release(accum); return NULL; } + ray_t* src_table = prog->rels[src_idx].table; + int64_t src_nrows = (src_table && !RAY_IS_ERR(src_table)) + ? ray_table_nrows(src_table) : 0; + if (src_nrows == 0) { + /* No source rows -> no groups -> rule produces no head tuples. */ + ray_release(accum); + return NULL; + } + + dl_rel_t* src_rel = &prog->rels[src_idx]; + int nk = body->agg_n_group_keys; + + /* Build a sub-graph that SCANs src_table's columns by symbol name. + * ray_graph_new retains src_table internally; no extra retain needed. */ + ray_graph_t* gg = ray_graph_new(src_table); + if (!gg) { + ray_release(accum); + prog->eval_err = true; + return NULL; + } + + ray_op_t* keys_ops[DL_AGG_MAX_KEYS]; + for (int i = 0; i < nk; i++) { + int kc = body->agg_group_key_cols[i]; + if (kc < 0 || kc >= src_rel->arity) { + ray_graph_free(gg); + ray_release(accum); + prog->eval_err = true; + return NULL; + } + int64_t sym = src_rel->col_names[kc]; + ray_t* s = ray_sym_str(sym); + if (!s) { + ray_graph_free(gg); + ray_release(accum); + prog->eval_err = true; + return NULL; + } + keys_ops[i] = ray_scan(gg, ray_str_ptr(s)); + if (!keys_ops[i]) { + ray_graph_free(gg); + ray_release(accum); + prog->eval_err = true; + return NULL; + } + } + + /* Agg input: value column (for COUNT we still pass a column; any + * column works since COUNT only counts rows). Must be bounds- + * checked — silently clamping to 0 would compute a valid-looking + * but wrong result over an unrelated column. */ + int value_col = body->agg_value_col; + if (body->agg_op != DL_AGG_COUNT && + (value_col < 0 || value_col >= src_rel->arity)) { + ray_graph_free(gg); + ray_release(accum); + prog->eval_err = true; + return NULL; + } + if (value_col < 0 || value_col >= src_rel->arity) value_col = 0; + ray_t* vs = ray_sym_str(src_rel->col_names[value_col]); + if (!vs) { + ray_graph_free(gg); + ray_release(accum); + prog->eval_err = true; + return NULL; + } + ray_op_t* agg_in = ray_scan(gg, ray_str_ptr(vs)); + if (!agg_in) { + ray_graph_free(gg); + ray_release(accum); + prog->eval_err = true; + return NULL; + } + + uint16_t op_code; + switch (body->agg_op) { + case DL_AGG_COUNT: op_code = OP_COUNT; break; + case DL_AGG_SUM: op_code = OP_SUM; break; + case DL_AGG_MIN: op_code = OP_MIN; break; + case DL_AGG_MAX: op_code = OP_MAX; break; + case DL_AGG_AVG: op_code = OP_AVG; break; + default: + ray_graph_free(gg); + ray_release(accum); return NULL; + } + + ray_op_t* ag_ins[1] = { agg_in }; + ray_op_t* root = ray_group(gg, keys_ops, (uint8_t)nk, &op_code, ag_ins, 1); + if (!root) { + ray_graph_free(gg); + ray_release(accum); + prog->eval_err = true; + return NULL; + } + ray_t* group_tbl = ray_execute(gg, root); + ray_graph_free(gg); + + if (!group_tbl) { + ray_release(accum); + prog->eval_err = true; + return NULL; + } + if (RAY_IS_ERR(group_tbl)) { + ray_error_free(group_tbl); + ray_release(accum); + prog->eval_err = true; + return NULL; + } + + /* Replace accum with group_tbl (schema: key0..key{nk-1}, agg). + * This is valid because the DL_AGG case for aggregate-only rules + * created a singleton _unit accum that we can discard. Mixed + * rules (body atoms + grouped agg) are not supported here; they + * would require a join on shared vars and fall under A5/later. */ + ray_release(accum); + accum = group_tbl; + + /* Bind key variables to the key columns in the group output */ + for (int i = 0; i < nk; i++) { + int kv = body->agg_group_key_vars[i]; + var_bound[kv] = true; + var_col[kv] = i; + } + /* Bind target variable to the aggregate column (last column) */ + var_bound[body->agg_target_var] = true; + var_col[body->agg_target_var] = nk; /* agg column immediately follows keys */ + break; + } + /* -------- existing scalar path below unchanged -------- */ + int src_idx = dl_find_rel(prog, body->agg_pred); + if (src_idx < 0) { + ray_release(accum); + return NULL; + } + dl_rel_t* src_rel_s = &prog->rels[src_idx]; + ray_t* src_table = src_rel_s->table; + int64_t src_nrows = (src_table && !RAY_IS_ERR(src_table)) + ? ray_table_nrows(src_table) + : 0; + + /* Bounds-check value column up front for every value-taking op + * (SUM/MIN/MAX/AVG). Must happen before the empty-source early + * returns below, otherwise an out-of-range index on an empty + * source would silently emit the SUM identity 0 / 0.0. */ + bool need_value_col = (body->agg_op == DL_AGG_SUM + || body->agg_op == DL_AGG_MIN + || body->agg_op == DL_AGG_MAX + || body->agg_op == DL_AGG_AVG); + if (need_value_col && + (body->agg_value_col < 0 || + body->agg_value_col >= src_rel_s->arity)) { + ray_release(accum); + prog->eval_err = true; + return NULL; + } + + if (src_nrows == 0 && (body->agg_op == DL_AGG_MIN + || body->agg_op == DL_AGG_MAX + || body->agg_op == DL_AGG_AVG)) { + /* Empty-source: MIN/MAX/AVG emit no row (matches rayforce core's domain + * error / typed-null semantics). COUNT and SUM keep their identities (0). */ + ray_release(accum); + return NULL; + } + + int64_t result_i = 0; + double result_f = 0.0; + bool is_avg = (body->agg_op == DL_AGG_AVG); + /* Float promotion: AVG always emits f64; SUM/MIN/MAX track their + * source column type (i64 in -> i64 out; f64 in -> f64 out). + * COUNT is always i64. For empty SUM, we still need to inspect + * the column type so the identity (0 / 0.0) is emitted in the + * correct result type. */ + bool is_float = is_avg; + if (need_value_col) { + ray_t* vc0 = ray_table_get_col_idx(src_table, body->agg_value_col); + if (vc0) { + if (vc0->type == RAY_F64) { + is_float = true; + } else if (vc0->type != RAY_I64) { + /* Non-numeric source: reject regardless of row count. */ + ray_release(accum); + prog->eval_err = true; + return NULL; + } + } + } + switch (body->agg_op) { + case DL_AGG_COUNT: + result_i = src_nrows; + break; + case DL_AGG_SUM: + case DL_AGG_MIN: + case DL_AGG_MAX: + case DL_AGG_AVG: + if (src_nrows > 0) { + ray_t* val_col = + ray_table_get_col_idx(src_table, body->agg_value_col); + if (!val_col) { + ray_release(accum); + return NULL; + } + if (val_col->type == RAY_I64) { + int64_t* vd = (int64_t*)ray_data(val_col); + if (body->agg_op == DL_AGG_SUM) { + result_i = 0; + for (int64_t i = 0; i < src_nrows; i++) + result_i += vd[i]; + } else if (body->agg_op == DL_AGG_MIN) { + result_i = vd[0]; + for (int64_t i = 1; i < src_nrows; i++) { + if (vd[i] < result_i) + result_i = vd[i]; + } + } else if (body->agg_op == DL_AGG_MAX) { + result_i = vd[0]; + for (int64_t i = 1; i < src_nrows; i++) { + if (vd[i] > result_i) + result_i = vd[i]; + } + } else { /* DL_AGG_AVG */ + int64_t acc = 0; + for (int64_t i = 0; i < src_nrows; i++) + acc += vd[i]; + result_f = (double)acc / (double)src_nrows; + } + } else if (val_col->type == RAY_F64) { + is_float = true; /* SUM/MIN/MAX promote to f64 */ + double* vd = (double*)ray_data(val_col); + if (body->agg_op == DL_AGG_SUM) { + result_f = 0.0; + for (int64_t i = 0; i < src_nrows; i++) + result_f += vd[i]; + } else if (body->agg_op == DL_AGG_MIN) { + result_f = vd[0]; + for (int64_t i = 1; i < src_nrows; i++) { + if (vd[i] < result_f) + result_f = vd[i]; + } + } else if (body->agg_op == DL_AGG_MAX) { + result_f = vd[0]; + for (int64_t i = 1; i < src_nrows; i++) { + if (vd[i] > result_f) + result_f = vd[i]; + } + } else { /* DL_AGG_AVG */ + double acc = 0.0; + for (int64_t i = 0; i < src_nrows; i++) + acc += vd[i]; + result_f = acc / (double)src_nrows; + } + } else { + /* Non-numeric source column — reject loudly rather than + * silently returning zero. */ + ray_release(accum); + prog->eval_err = true; + return NULL; + } + } + break; + default: + break; + } + + int64_t nrows = ray_table_nrows(accum); + if (nrows == 0) + break; + ray_t* new_col = ray_vec_new(is_float ? RAY_F64 : RAY_I64, nrows); + /* Silent break would leave agg_target_var unbound and eval + * would keep running with a partially-constructed rule — + * surface the allocation failure so dl_eval returns -1. */ + if (!new_col) { + ray_release(accum); + prog->eval_err = true; + return NULL; + } + if (RAY_IS_ERR(new_col)) { + ray_error_free(new_col); + ray_release(accum); + prog->eval_err = true; + return NULL; + } + new_col->len = nrows; + if (is_float) { + double* nd = (double*)ray_data(new_col); + for (int64_t r = 0; r < nrows; r++) nd[r] = result_f; + } else { + int64_t* nd = (int64_t*)ray_data(new_col); + for (int64_t r = 0; r < nrows; r++) nd[r] = result_i; + } + + int new_col_idx = (int)ray_table_ncols(accum); + char colname[32]; + snprintf(colname, sizeof(colname), "_g%d", body->agg_target_var); + ray_t* new_accum = dl_table_add_computed_col(accum, new_col, colname); + ray_release(new_col); + ray_release(accum); + accum = new_accum; + + var_bound[body->agg_target_var] = true; + var_col[body->agg_target_var] = new_col_idx; + break; + } + + case DL_BUILTIN: { + switch (body->builtin_id) { + case DL_BUILTIN_BEFORE: { + int s_col = var_col[body->vars[0]]; + int t_col = var_col[body->vars[2]]; + ray_t* filtered = dl_builtin_before(accum, s_col, t_col); + ray_release(accum); + accum = filtered; + break; + } + case DL_BUILTIN_DURATION_SINCE: { + int t1_col = var_col[body->vars[0]]; + int t2_col = var_col[body->vars[1]]; + int d_var = body->vars[2]; + int new_idx = (int)ray_table_ncols(accum); + char colname[32]; + snprintf(colname, sizeof(colname), "_d%d", d_var); + ray_t* result = dl_builtin_duration_since(accum, t1_col, t2_col, colname); + ray_release(accum); + accum = result; + var_bound[d_var] = true; + var_col[d_var] = new_idx; + break; + } + case DL_BUILTIN_ABS: { + int x_col = var_col[body->vars[0]]; + int y_var = body->vars[1]; + int new_idx = (int)ray_table_ncols(accum); + char colname[32]; + snprintf(colname, sizeof(colname), "_abs%d", y_var); + ray_t* result = dl_builtin_abs(accum, x_col, colname); + ray_release(accum); + accum = result; + var_bound[y_var] = true; + var_col[y_var] = new_idx; + break; + } + } + break; + } + + case DL_CMP: { + int64_t nrows = ray_table_nrows(accum); + if (nrows == 0) break; + + ray_t* lhs_evaled = NULL; + ray_t* rhs_evaled = NULL; + ray_t* lhs_src = NULL; /* borrowed reference for type inspection */ + ray_t* rhs_src = NULL; + + if (body->cmp_lhs_expr) { + lhs_evaled = dl_eval_expr(body->cmp_lhs_expr, accum, var_col, nrows); + /* LHS evaluation failure can't be silently skipped — a + * missing filter changes the query's answer. */ + if (!lhs_evaled) { + prog->eval_err = true; + ray_release(accum); + return NULL; + } + if (RAY_IS_ERR(lhs_evaled)) { + ray_error_free(lhs_evaled); + prog->eval_err = true; + ray_release(accum); + return NULL; + } + lhs_src = lhs_evaled; + } else { + int lhs_col = var_col[body->cmp_lhs]; + lhs_src = ray_table_get_col_idx(accum, lhs_col); + if (!lhs_src) { + prog->eval_err = true; + ray_release(accum); + return NULL; + } + } + + if (body->cmp_rhs_expr) { + rhs_evaled = dl_eval_expr(body->cmp_rhs_expr, accum, var_col, nrows); + if (!rhs_evaled) { + if (lhs_evaled) ray_release(lhs_evaled); + prog->eval_err = true; + ray_release(accum); + return NULL; + } + if (RAY_IS_ERR(rhs_evaled)) { + ray_error_free(rhs_evaled); + if (lhs_evaled) ray_release(lhs_evaled); + prog->eval_err = true; + ray_release(accum); + return NULL; + } + rhs_src = rhs_evaled; + } else if (body->cmp_rhs != DL_CONST) { + int rhs_col = var_col[body->cmp_rhs]; + rhs_src = ray_table_get_col_idx(accum, rhs_col); + if (!rhs_src) { + if (lhs_evaled) ray_release(lhs_evaled); + prog->eval_err = true; + ray_release(accum); + return NULL; + } + } + /* else rhs is a constant i64 body->cmp_const */ + + /* Reject non-numeric sources — DL_CMP has no meaningful + * comparison for SYM/STR columns without an ordering hook. */ + bool lhs_is_f64 = lhs_src && lhs_src->type == RAY_F64; + bool rhs_is_f64 = rhs_src && rhs_src->type == RAY_F64; + if (lhs_src && lhs_src->type != RAY_I64 && lhs_src->type != RAY_F64) { + if (lhs_evaled) ray_release(lhs_evaled); + if (rhs_evaled) ray_release(rhs_evaled); + prog->eval_err = true; + ray_release(accum); + return NULL; + } + if (rhs_src && rhs_src->type != RAY_I64 && rhs_src->type != RAY_F64) { + if (lhs_evaled) ray_release(lhs_evaled); + if (rhs_evaled) ray_release(rhs_evaled); + prog->eval_err = true; + ray_release(accum); + return NULL; + } + + /* Promote to f64 iff either side is f64. Otherwise stay in + * i64 arithmetic for speed and exact integer semantics. */ + bool use_f64 = lhs_is_f64 || rhs_is_f64; + const int64_t* lhs_i = !use_f64 ? (const int64_t*)ray_data(lhs_src) : NULL; + const int64_t* rhs_i = !use_f64 && rhs_src ? (const int64_t*)ray_data(rhs_src) : NULL; + const double* lhs_f = use_f64 && !lhs_is_f64 ? NULL + : (use_f64 ? (const double*)ray_data(lhs_src) : NULL); + const double* rhs_f = use_f64 && rhs_src && rhs_is_f64 + ? (const double*)ray_data(rhs_src) : NULL; + + ray_t* mask_block = ray_alloc((size_t)nrows * sizeof(bool)); + if (!mask_block) { + if (lhs_evaled) ray_release(lhs_evaled); + if (rhs_evaled) ray_release(rhs_evaled); + break; + } + bool* mask = (bool*)ray_data(mask_block); + int64_t count = 0; + for (int64_t r = 0; r < nrows; r++) { + bool pass = false; + if (use_f64) { + /* Widen the non-f64 side — mixed arithmetic is already + * supported by dl_eval_expr, and DL_CMP_const is i64. */ + double lv = lhs_is_f64 ? lhs_f[r] : (double)((const int64_t*)ray_data(lhs_src))[r]; + double rv; + if (rhs_src) + rv = rhs_is_f64 ? rhs_f[r] : (double)((const int64_t*)ray_data(rhs_src))[r]; + else + rv = (double)body->cmp_const; + switch (body->cmp_op) { + case DL_CMP_EQ: pass = (lv == rv); break; + case DL_CMP_NE: pass = (lv != rv); break; + case DL_CMP_LT: pass = (lv < rv); break; + case DL_CMP_LE: pass = (lv <= rv); break; + case DL_CMP_GT: pass = (lv > rv); break; + case DL_CMP_GE: pass = (lv >= rv); break; + } + } else { + int64_t lv = lhs_i[r]; + int64_t rv = rhs_i ? rhs_i[r] : body->cmp_const; + switch (body->cmp_op) { + case DL_CMP_EQ: pass = (lv == rv); break; + case DL_CMP_NE: pass = (lv != rv); break; + case DL_CMP_LT: pass = (lv < rv); break; + case DL_CMP_LE: pass = (lv <= rv); break; + case DL_CMP_GT: pass = (lv > rv); break; + case DL_CMP_GE: pass = (lv >= rv); break; + } + } + (void)lhs_f; /* silence unused warnings in non-f64 paths */ + mask[r] = pass; + if (pass) count++; + } + + if (lhs_evaled) ray_release(lhs_evaled); + if (rhs_evaled) ray_release(rhs_evaled); + + if (count == nrows) { + ray_free(mask_block); + break; /* all rows pass */ + } + + /* Build filtered table — element-size-aware memcpy so f64 + * columns and narrow-SYM columns survive the mask unchanged. + * Silently `continue`-ing past missing columns would yield + * a table with fewer columns than accum, breaking schema + * invariants in downstream table_union. Treat every such + * failure as unrecoverable. */ + int64_t ncols = ray_table_ncols(accum); + ray_t* out = ray_table_new((int)ncols); + if (!out || RAY_IS_ERR(out)) { + if (out && RAY_IS_ERR(out)) ray_error_free(out); + ray_free(mask_block); + ray_release(accum); + prog->eval_err = true; + return NULL; + } + for (int64_t c = 0; c < ncols; c++) { + ray_t* src = ray_table_get_col_idx(accum, c); + if (!src) { + ray_release(out); ray_free(mask_block); + ray_release(accum); + prog->eval_err = true; + return NULL; + } + ray_t* dst = (src->type == RAY_SYM) + ? ray_sym_vec_new(src->attrs & RAY_SYM_W_MASK, count) + : ray_vec_new(src->type, count); + if (!dst) { + ray_release(out); ray_free(mask_block); + ray_release(accum); + prog->eval_err = true; + return NULL; + } + if (RAY_IS_ERR(dst)) { + ray_error_free(dst); + ray_release(out); ray_free(mask_block); + ray_release(accum); + prog->eval_err = true; + return NULL; + } + dst->len = count; + uint8_t esz = ray_sym_elem_size(src->type, src->attrs); + const uint8_t* sb = (const uint8_t*)ray_data(src); + uint8_t* db = (uint8_t*)ray_data(dst); + int64_t j = 0; + for (int64_t r = 0; r < nrows; r++) + if (mask[r]) { + memcpy(db + (size_t)j * esz, sb + (size_t)r * esz, esz); + j++; + } + if (src->type == RAY_STR) col_propagate_str_pool(dst, src); + ray_t* next = ray_table_add_col(out, ray_table_col_name(accum, c), dst); + ray_release(dst); + if (!next) { + ray_release(out); ray_free(mask_block); + ray_release(accum); + prog->eval_err = true; + return NULL; + } + if (RAY_IS_ERR(next)) { + ray_error_free(next); + ray_release(out); ray_free(mask_block); + ray_release(accum); + prog->eval_err = true; + return NULL; + } + out = next; + } + ray_free(mask_block); + ray_release(accum); + accum = out; + break; + } + + case DL_INTERVAL: { + int fact_col = var_col[body->interval_fact_var]; + int start_col = fact_col; + int end_col = fact_col + 1; + + var_bound[body->interval_start_var] = true; + var_col[body->interval_start_var] = start_col; + + var_bound[body->interval_end_var] = true; + var_col[body->interval_end_var] = end_col; + break; + } + } /* switch */ + } + + /* Project to head variables */ + int head_idx = dl_find_rel(prog, rule->head_pred); + if (head_idx < 0) { ray_release(accum); return NULL; } + dl_rel_t* head_rel = &prog->rels[head_idx]; + + int proj_cols[DL_MAX_ARITY]; + for (int c = 0; c < rule->head_arity; c++) { + int v = rule->head_vars[c]; + if (v == DL_CONST) { + proj_cols[c] = -1; + } else { + proj_cols[c] = var_col[v]; + } + } + + ray_t* projected = dl_project(accum, proj_cols, rule->head_arity, head_rel, + rule->head_consts, rule->head_const_types); + ray_release(accum); + + /* dl_project now surfaces hard failures (alloc OOM, type errors, add-col + * errors) as RAY_ERROR objects. Catch those here and flag the program + * so dl_eval can return -1 instead of silently dropping the rule's + * output via the const_table/execute chain. */ + if (!projected) return NULL; + if (RAY_IS_ERR(projected)) { + ray_error_free(projected); + prog->eval_err = true; + return NULL; + } + + /* Store result in the graph as a const_table so the caller can execute */ + ray_op_t* result_node = ray_const_table(g, projected); + ray_release(projected); + return result_node; +} + +/* ======================================================================== + * Table utilities for fixpoint evaluation + * ======================================================================== */ + +/* Rename table columns to match the head relation's expected names. + * This is needed because ray_select output column names come from the scan + * nodes (e.g., "edge__c0"), but we need them to match the head relation + * (e.g., "path__c0"). Returns a new owned table. */ +static ray_t* table_rename_cols(ray_t* tbl, dl_rel_t* target_rel) { + if (!tbl || RAY_IS_ERR(tbl)) return tbl; + int64_t ncols = ray_table_ncols(tbl); + if (ncols <= 0) { ray_retain(tbl); return tbl; } + + int arity = target_rel->arity; + if (ncols != arity) { ray_retain(tbl); return tbl; } + + /* Check if renaming is needed */ + bool needs_rename = false; + for (int c = 0; c < arity; c++) { + if (ray_table_col_name(tbl, c) != target_rel->col_names[c]) { + needs_rename = true; + break; + } + } + if (!needs_rename) { ray_retain(tbl); return tbl; } + + /* Build new table with correct column names sharing the same column data */ + ray_t* out = ray_table_new(arity); + for (int c = 0; c < arity; c++) { + ray_t* col = ray_table_get_col_idx(tbl, c); + if (col) + out = ray_table_add_col(out, target_rel->col_names[c], col); + } + return out; +} + +/* Canonicalize column names to "c0","c1",... Returns new owned table. */ +static ray_t* canonicalize(ray_t* tbl) { + if (!tbl || RAY_IS_ERR(tbl)) return tbl; + int64_t nc = ray_table_ncols(tbl); + ray_t* out = ray_table_new(nc); + for (int64_t c = 0; c < nc; c++) { + ray_t* col = ray_table_get_col_idx(tbl, c); + if (!col) continue; + char buf[16]; + snprintf(buf, sizeof(buf), "c%d", (int)c); + int64_t sym = ray_sym_intern(buf, strlen(buf)); + out = ray_table_add_col(out, sym, col); + } + return out; +} + +/* Restore original column names from `src` onto `tbl`. */ +static ray_t* restore_names(ray_t* tbl, ray_t* src) { + if (!tbl || RAY_IS_ERR(tbl)) return tbl; + int64_t nc = ray_table_ncols(tbl); + ray_t* out = ray_table_new(nc); + for (int64_t c = 0; c < nc; c++) { + ray_t* col = ray_table_get_col_idx(tbl, c); + if (col) + out = ray_table_add_col(out, ray_table_col_name(src, c), col); + } + ray_release(tbl); + return out; +} + +/* Create a table by concatenating all rows from tables a and b (same schema). + * Uses column-wise ray_vec_concat. Returns new owned table with a's names. */ +static ray_t* table_union(ray_t* a, ray_t* b) { + /* Pass-through paths always return a retained non-NULL result so + * callers can release uniformly. A NULL operand falls back to the + * other side; a RAY_ERROR operand is *propagated* (retained) rather + * than masked by the non-error side — otherwise a real failure on + * `b` would silently surface as `a` and the caller would never see + * the error. ray_retain is a no-op on errors so the retain call is + * safe and keeps the contract "release is always valid". */ + if (!a) { + if (b) ray_retain(b); + return b; + } + if (RAY_IS_ERR(a)) { + ray_retain(a); /* no-op for errors; documents "owned return" */ + return a; + } + if (!b) { ray_retain(a); return a; } + if (RAY_IS_ERR(b)) { + ray_retain(b); + return b; + } + + /* Column-count check must run before the empty-rows short-circuit. + * Otherwise one side having 0 rows but a stripped schema (e.g. an + * antijoin result that collapsed to (0 rows, 0 cols)) would silently + * return the other side's schema and the caller would store a table + * whose arity differs from what it expected. */ + int64_t ncols_a = ray_table_ncols(a); + int64_t ncols_b = ray_table_ncols(b); + if (ncols_a != ncols_b) + return ray_error("schema", "table_union: column count mismatch"); + int64_t ncols = ncols_a; + + if (ray_table_nrows(a) == 0) { ray_retain(b); return b; } + if (ray_table_nrows(b) == 0) { ray_retain(a); return a; } + + ray_t* out = ray_table_new((int)ncols); + if (!out || RAY_IS_ERR(out)) + return out ? out : ray_error("memory", "table_union: table_new"); + for (int64_t c = 0; c < ncols; c++) { + ray_t* col_a = ray_table_get_col_idx(a, c); + ray_t* col_b = ray_table_get_col_idx(b, c); + if (!col_a || !col_b) { + /* Silently dropping a column would produce a schema-incomplete + * result that the caller mistakes for a successful union. */ + ray_release(out); + return ray_error("domain", "table_union: missing column"); + } + ray_t* merged = ray_vec_concat(col_a, col_b); + if (!merged) { + ray_release(out); + return ray_error("memory", "table_union: concat"); + } + if (RAY_IS_ERR(merged)) { + /* Propagate the original error (e.g. "type" for schema + * mismatch) so callers see the real diagnostic instead of + * a generic "memory". */ + ray_release(out); + return merged; + } + ray_t* next = ray_table_add_col(out, ray_table_col_name(a, c), merged); + ray_release(merged); + if (!next) { + ray_release(out); + return ray_error("memory", "table_union: add_col"); + } + if (RAY_IS_ERR(next)) { + ray_release(out); + return next; + } + out = next; + } + return out; +} + +/* Deduplicate table rows on all columns. Returns new owned table. */ +static ray_t* table_distinct(ray_t* tbl) { + if (!tbl || RAY_IS_ERR(tbl)) return tbl; + int64_t nrows = ray_table_nrows(tbl); + if (nrows <= 1) { ray_retain(tbl); return tbl; } + + int64_t ncols = ray_table_ncols(tbl); + if (ncols <= 0) { ray_retain(tbl); return tbl; } + + ray_t* canonical = canonicalize(tbl); + if (!canonical || RAY_IS_ERR(canonical)) + return canonical ? canonical : ray_error("memory", "table_distinct: canonicalize"); + + ray_graph_t* g = ray_graph_new(canonical); + if (!g) { + ray_release(canonical); + return ray_error("memory", "table_distinct: graph_new"); + } + + ray_op_t* keys[DL_MAX_ARITY]; + for (int64_t c = 0; c < ncols && c < DL_MAX_ARITY; c++) { + char buf[16]; + snprintf(buf, sizeof(buf), "c%d", (int)c); + keys[c] = ray_scan(g, buf); + } + + ray_op_t* dist = ray_distinct(g, keys, (uint8_t)ncols); + ray_optimize(g, dist); + ray_t* deduped = ray_execute(g, dist); + ray_graph_free(g); + ray_release(canonical); + + return restore_names(deduped, tbl); +} + +/* Anti-join: rows in `left` that don't appear in `right` (same schema). + * Returns new owned table with left's original column names. */ +static ray_t* table_antijoin(ray_t* left, ray_t* right) { + if (!left || RAY_IS_ERR(left)) return left; + if (!right || RAY_IS_ERR(right) || ray_table_nrows(right) == 0) { + ray_retain(left); + return left; + } + if (ray_table_nrows(left) == 0) { + ray_retain(left); + return left; + } + + int64_t ncols = ray_table_ncols(left); + if (ncols <= 0) { ray_retain(left); return left; } + + ray_t* cl = canonicalize(left); + if (!cl || RAY_IS_ERR(cl)) + return cl ? cl : ray_error("memory", "table_antijoin: canonicalize left"); + ray_t* cr = canonicalize(right); + if (!cr || RAY_IS_ERR(cr)) { + ray_release(cl); + return cr ? cr : ray_error("memory", "table_antijoin: canonicalize right"); + } + + ray_graph_t* g = ray_graph_new(NULL); + if (!g) { + ray_release(cl); + ray_release(cr); + return ray_error("memory", "table_antijoin: graph_new"); + } + + ray_op_t* l = ray_const_table(g, cl); + ray_op_t* r = ray_const_table(g, cr); + + uint16_t l_tid = ray_graph_add_table(g, cl); + uint16_t r_tid = ray_graph_add_table(g, cr); + + ray_op_t* lkeys[DL_MAX_ARITY]; + ray_op_t* rkeys[DL_MAX_ARITY]; + for (int64_t c = 0; c < ncols && c < DL_MAX_ARITY; c++) { + char buf[16]; + snprintf(buf, sizeof(buf), "c%d", (int)c); + lkeys[c] = ray_scan_table(g, l_tid, buf); + rkeys[c] = ray_scan_table(g, r_tid, buf); + } + + ray_op_t* aj = ray_antijoin(g, l, lkeys, r, rkeys, (uint8_t)ncols); + ray_t* raw = ray_execute(g, aj); + ray_graph_free(g); + ray_release(cl); + ray_release(cr); + + return restore_names(raw, left); +} + +/* Normalize column names of a table to match the target relation's naming scheme. + * Returns a new owned table with correct names (shares column data). + * Currently unused but retained for future use by external callers. */ +static ray_t* normalize_columns(ray_t* tbl, dl_rel_t* rel) + __attribute__((unused)); +static ray_t* normalize_columns(ray_t* tbl, dl_rel_t* rel) { + if (!tbl || RAY_IS_ERR(tbl)) return tbl; + int64_t ncols = ray_table_ncols(tbl); + if (ncols != rel->arity) { + /* Arity mismatch — can't normalize */ + ray_retain(tbl); + return tbl; + } + /* Check if already correct */ + bool ok = true; + for (int c = 0; c < rel->arity; c++) { + if (ray_table_col_name(tbl, c) != rel->col_names[c]) { ok = false; break; } + } + if (ok) { ray_retain(tbl); return tbl; } + + /* Rebuild with correct names, sharing column data */ + ray_t* out = ray_table_new(rel->arity); + for (int c = 0; c < rel->arity; c++) { + ray_t* col = ray_table_get_col_idx(tbl, c); + if (col) + out = ray_table_add_col(out, rel->col_names[c], col); + } + return out; +} + +/* ======================================================================== + * Provenance helpers + * ======================================================================== */ + +/* Hash all columns of ref at row r into a single key. */ +static uint64_t dl_row_hash(int64_t** col_data, int64_t ncols, int64_t r) { + uint64_t h = ray_hash_i64(col_data[0][r]); + for (int64_t c = 1; c < ncols; c++) + h = ray_hash_combine(h, ray_hash_i64(col_data[c][r])); + return h; +} + +/* Check if rows match across `ncols` columns. */ +static bool dl_row_eq(int64_t** a_cols, int64_t ar, + int64_t** b_cols, int64_t br, int64_t ncols) { + for (int64_t c = 0; c < ncols; c++) + if (a_cols[c][ar] != b_cols[c][br]) return false; + return true; +} + +/* Open-addressing hash set keyed by ref-row tuple. Slot stores ref row + * index; lookup hashes the probe-row tuple and walks the probe chain. + * Replaces the per-call O(ref_rows) linear scan in dl_row_in_table — + * the previous shape was O(tbl_rows × ref_rows × ncols) which is + * quadratic for typical datalog provenance workloads. */ +typedef struct { + int64_t* slots; /* row index, -1 = empty */ + ray_t* block; + int64_t cap; + int64_t mask; + int64_t** ref_cols; /* cached column data ptrs for ref */ + int64_t ncols; +} dl_rowset_t; + +static bool dl_rowset_init(dl_rowset_t* rs, ray_t* ref) { + int64_t ncols = ray_table_ncols(ref); + int64_t nrows = ray_table_nrows(ref); + rs->ncols = ncols; + rs->ref_cols = (int64_t**)ray_sys_alloc(sizeof(int64_t*) * (size_t)ncols); + if (!rs->ref_cols) return false; + for (int64_t c = 0; c < ncols; c++) { + ray_t* col = ray_table_get_col_idx(ref, c); + rs->ref_cols[c] = col ? (int64_t*)ray_data(col) : NULL; + } + int64_t cap = 16; + while (cap < (nrows > 0 ? nrows * 2 : 16)) cap *= 2; + rs->block = ray_alloc((size_t)cap * sizeof(int64_t)); + if (!rs->block || RAY_IS_ERR(rs->block)) { + ray_sys_free(rs->ref_cols); + rs->ref_cols = NULL; + return false; + } + rs->slots = (int64_t*)ray_data(rs->block); + rs->cap = cap; + rs->mask = cap - 1; + for (int64_t i = 0; i < cap; i++) rs->slots[i] = -1; + + for (int64_t r = 0; r < nrows; r++) { + uint64_t h = dl_row_hash(rs->ref_cols, ncols, r); + int64_t s = (int64_t)(h & (uint64_t)rs->mask); + while (rs->slots[s] != -1) s = (s + 1) & rs->mask; + rs->slots[s] = r; + } + return true; +} + +static void dl_rowset_destroy(dl_rowset_t* rs) { + if (rs->block) { ray_release(rs->block); rs->block = NULL; } + if (rs->ref_cols) { ray_sys_free(rs->ref_cols); rs->ref_cols = NULL; } +} + +/* True if the row at `tbl_cols[..][row]` is present in the set. */ +static bool dl_rowset_contains(dl_rowset_t* rs, int64_t** tbl_cols, int64_t row) { + uint64_t h = dl_row_hash(tbl_cols, rs->ncols, row); + int64_t s = (int64_t)(h & (uint64_t)rs->mask); + while (rs->slots[s] != -1) { + int64_t r = rs->slots[s]; + if (dl_row_eq(tbl_cols, row, rs->ref_cols, r, rs->ncols)) + return true; + s = (s + 1) & rs->mask; + } + return false; +} + +/* Build source provenance for one IDB relation in CSR format. + * + * For each derived row, extracts head variable bindings from the firing rule + * and scans each positive body atom's relation for rows consistent with those + * bindings. Results are stored as two parallel vectors on the relation: + * + * prov_src_offsets — I64[nrows+1]: offsets[i] = start index in prov_src_data + * for derived row i. offsets[nrows] = total entry count. + * prov_src_data — I64[total]: each entry = (rel_idx << 32) | row_idx, + * packed reference to the contributing source row. + * Row indices are truncated to 32 bits (max ~4 billion rows + * per relation). + * + * Body-only variables (not appearing in the head) are unconstrained during + * source lookup, so the entry set may be a superset of the true proof. */ +static void dl_build_source_prov(dl_program_t* prog, dl_rel_t* rel, + int64_t nrows, int64_t* pd) { + ray_t* off_vec = ray_vec_new(RAY_I64, nrows + 1); + if (!off_vec || RAY_IS_ERR(off_vec)) return; + off_vec->len = nrows + 1; + int64_t* off = (int64_t*)ray_data(off_vec); + + int64_t buf_cap = (nrows < 16) ? 64 : nrows * 4; + ray_t* buf_block = ray_alloc((size_t)buf_cap * sizeof(int64_t)); + if (!buf_block) { ray_release(off_vec); return; } + int64_t* buf = (int64_t*)ray_data(buf_block); + int64_t buf_len = 0; + + for (int64_t row = 0; row < nrows; row++) { + off[row] = buf_len; + if (pd[row] < 0) continue; + + dl_rule_t* rule = &prog->rules[pd[row]]; + + int64_t var_vals[DL_MAX_ARITY * DL_MAX_BODY]; + bool var_set [DL_MAX_ARITY * DL_MAX_BODY]; + memset(var_set, 0, sizeof(var_set)); + + /* Extract head variable bindings from this derived row */ + for (int h = 0; h < rule->head_arity; h++) { + int v = rule->head_vars[h]; + if (v == DL_CONST) continue; + ray_t* col = ray_table_get_col_idx(rel->table, h); + if (!col) continue; + var_vals[v] = ((int64_t*)ray_data(col))[row]; + var_set[v] = true; + } + + /* For each positive body atom, find matching source rows */ + for (int b = 0; b < rule->n_body; b++) { + dl_body_t* body = &rule->body[b]; + if (body->type != DL_POS) continue; + + int bri = dl_find_rel(prog, body->pred); + if (bri < 0) continue; + dl_rel_t* brel = &prog->rels[bri]; + int64_t bnrows = ray_table_nrows(brel->table); + + for (int64_t br = 0; br < bnrows; br++) { + bool match = true; + for (int c = 0; c < body->arity; c++) { + ray_t* bcol = ray_table_get_col_idx(brel->table, c); + if (!bcol) { match = false; break; } + int64_t cell = ((int64_t*)ray_data(bcol))[br]; + int v = body->vars[c]; + if (v == DL_CONST) { + if (cell != body->const_vals[c]) { match = false; break; } + } else if (var_set[v]) { + if (cell != var_vals[v]) { match = false; break; } + } + /* body-only variable: unconstrained, always matches */ + } + if (!match) continue; + + if (buf_len >= buf_cap) { + int64_t new_cap = buf_cap * 2; + ray_t* new_block = ray_alloc((size_t)new_cap * sizeof(int64_t)); + if (!new_block) goto oom; + memcpy(ray_data(new_block), buf, (size_t)buf_len * sizeof(int64_t)); + ray_free(buf_block); + buf_block = new_block; + buf = (int64_t*)ray_data(new_block); + buf_cap = new_cap; + } + buf[buf_len++] = ((int64_t)bri << 32) | (int64_t)(uint32_t)br; + } + } + } + + /* Success path: finalize CSR */ + off[nrows] = buf_len; + { + ray_t* data_vec = ray_vec_new(RAY_I64, buf_len > 0 ? buf_len : 1); + if (!data_vec || RAY_IS_ERR(data_vec)) goto oom; + data_vec->len = buf_len; + if (buf_len > 0) + memcpy(ray_data(data_vec), buf, (size_t)buf_len * sizeof(int64_t)); + ray_free(buf_block); + + if (rel->prov_src_offsets) ray_release(rel->prov_src_offsets); + if (rel->prov_src_data) ray_release(rel->prov_src_data); + rel->prov_src_offsets = off_vec; + rel->prov_src_data = data_vec; + return; + } + +oom: + /* Allocation failed — discard partial results, leave both fields NULL */ + ray_free(buf_block); + ray_release(off_vec); + if (rel->prov_src_offsets) { ray_release(rel->prov_src_offsets); rel->prov_src_offsets = NULL; } + if (rel->prov_src_data) { ray_release(rel->prov_src_data); rel->prov_src_data = NULL; } +} + +/* Build provenance for all IDB relations. + * For each rule, compile with final tables and mark matching tuples. + * Then build deep source provenance (CSR offsets + packed source refs). */ +static void dl_build_provenance(dl_program_t* prog) { + for (int ri = 0; ri < prog->n_rels; ri++) { + dl_rel_t* rel = &prog->rels[ri]; + if (!rel->is_idb) continue; + + int64_t nrows = ray_table_nrows(rel->table); + if (nrows == 0) continue; + + /* Allocate provenance column initialized to -1 (unknown) */ + ray_t* prov = ray_vec_new(RAY_I64, nrows); + if (!prov || RAY_IS_ERR(prov)) continue; + prov->len = nrows; + int64_t* pd = (int64_t*)ray_data(prov); + for (int64_t r = 0; r < nrows; r++) + pd[r] = -1; + + /* For each rule with this head predicate */ + for (int r = 0; r < prog->n_rules; r++) { + dl_rule_t* rule = &prog->rules[r]; + if (strcmp(rule->head_pred, rel->name) != 0) continue; + + /* Compile and execute the rule to get its derivable tuples */ + ray_graph_t* g = ray_graph_new(NULL); + if (!g) continue; + + ray_op_t* output = dl_compile_rule(prog, rule, -1, r, g); + if (!output) { ray_graph_free(g); continue; } + + ray_t* raw = ray_execute(g, output); + ray_graph_free(g); + if (!raw || RAY_IS_ERR(raw)) continue; + + ray_t* derived = table_rename_cols(raw, rel); + ray_release(raw); + if (!derived || RAY_IS_ERR(derived)) continue; + + /* Mark rows in rel->table that appear in derived. Build a + * hashset over `derived` once and probe per row of rel — + * was O(nrows × derived_rows × ncols), now O(nrows + derived_rows). */ + dl_rowset_t rs; + if (dl_rowset_init(&rs, derived)) { + int64_t ncols_t = ray_table_ncols(rel->table); + int64_t** tbl_cols = (int64_t**)ray_sys_alloc(sizeof(int64_t*) * (size_t)ncols_t); + if (tbl_cols) { + for (int64_t c = 0; c < ncols_t; c++) { + ray_t* col = ray_table_get_col_idx(rel->table, c); + tbl_cols[c] = col ? (int64_t*)ray_data(col) : NULL; + } + for (int64_t row = 0; row < nrows; row++) { + if (pd[row] >= 0) continue; + if (dl_rowset_contains(&rs, tbl_cols, row)) + pd[row] = r; + } + ray_sys_free(tbl_cols); + } + dl_rowset_destroy(&rs); + } + ray_release(derived); + } + + if (rel->prov_col) ray_release(rel->prov_col); + rel->prov_col = prov; + + dl_build_source_prov(prog, rel, nrows, pd); + } +} + +/* ======================================================================== + * Semi-naive fixpoint evaluation + * ======================================================================== */ + +int dl_eval(dl_program_t* prog) { + if (!prog) return -1; + + /* eval_err is sticky: it may have been raised at rule-add time (e.g. + * by a head-const type conflict in dl_idb_align_head_const_types) — + * resetting here would silently discard that signal. Additional + * failures during stratify/compile/exec below keep setting the flag, + * and the final return honors it either way. */ + if (prog->eval_err) { + /* Short-circuit: compile-time errors already stand; don't run + * a potentially broken fixpoint. */ + return -1; + } + + /* Stratify if not already done */ + if (prog->n_strata == 0) { + if (dl_stratify(prog) != 0) return -1; + } + + /* Process each stratum */ + for (int s = 0; s < prog->n_strata; s++) { + /* Collect rules in this stratum */ + dl_rule_t* stratum_rules[DL_MAX_RULES]; + int stratum_rule_idx[DL_MAX_RULES]; /* original index in prog->rules */ + int n_stratum_rules = 0; + + for (int r = 0; r < prog->n_rules; r++) { + if (prog->rules[r].stratum == s) { + stratum_rule_idx[n_stratum_rules] = r; + stratum_rules[n_stratum_rules++] = &prog->rules[r]; + } + } + if (n_stratum_rules == 0) continue; + + /* Phase A: Initial evaluation — evaluate each rule with full relations */ + /* Group rules by head predicate */ + for (int ri = 0; ri < n_stratum_rules; ri++) { + dl_rule_t* rule = stratum_rules[ri]; + int head_idx = dl_find_rel(prog, rule->head_pred); + if (head_idx < 0) continue; + dl_rel_t* head_rel = &prog->rels[head_idx]; + + ray_graph_t* g = ray_graph_new(NULL); + if (!g) { prog->eval_err = true; continue; } + + ray_op_t* output = dl_compile_rule(prog, rule, -1, stratum_rule_idx[ri], g); + if (!output) { + /* dl_compile_rule marks eval_err on genuine failures; a bare + * NULL means "rule has no rows this pass" — not a fault. */ + ray_graph_free(g); + continue; + } + + ray_t* raw_tuples = ray_execute(g, output); + ray_graph_free(g); + + if (!raw_tuples) continue; + if (RAY_IS_ERR(raw_tuples)) { prog->eval_err = true; ray_error_free(raw_tuples); continue; } + + /* Rename columns to match head relation's expected names */ + ray_t* new_tuples = table_rename_cols(raw_tuples, head_rel); + ray_release(raw_tuples); + if (!new_tuples) continue; + if (RAY_IS_ERR(new_tuples)) { prog->eval_err = true; ray_error_free(new_tuples); continue; } + + /* Merge into the head relation's table */ + ray_t* merged = table_union(head_rel->table, new_tuples); + ray_release(new_tuples); + if (!merged) { prog->eval_err = true; continue; } + if (RAY_IS_ERR(merged)) { prog->eval_err = true; ray_error_free(merged); continue; } + ray_t* deduped = table_distinct(merged); + ray_release(merged); + if (!deduped) { prog->eval_err = true; continue; } + if (RAY_IS_ERR(deduped)) { prog->eval_err = true; ray_error_free(deduped); continue; } + ray_release(head_rel->table); + head_rel->table = deduped; + } + + /* Phase B: Semi-naive loop — iterate with delta relations */ + /* For each IDB predicate in this stratum, compute delta as the + * difference between current and previous table states. */ + ray_t* prev_tables[DL_MAX_RELS]; + ray_t* delta_tables[DL_MAX_RELS]; + memset(prev_tables, 0, sizeof(prev_tables)); + memset(delta_tables, 0, sizeof(delta_tables)); + + /* Initially, delta = full table (all tuples are new) */ + for (int p = 0; p < prog->strata_sizes[s]; p++) { + int rel_idx = prog->strata[s][p]; + dl_rel_t* rel = &prog->rels[rel_idx]; + if (rel->is_idb) { + ray_retain(rel->table); + delta_tables[rel_idx] = rel->table; + /* prev = empty table with same schema as the relation. + * Column types must match rel->table so later ray_vec_concat + * calls don't reject the merge when the relation has + * non-i64 columns (e.g. RAY_SYM from head-constant slots). */ + prev_tables[rel_idx] = ray_table_new(rel->arity); + for (int c = 0; c < rel->arity && c < DL_MAX_ARITY; c++) { + ray_t* src = ray_table_get_col_idx(rel->table, c); + int8_t ctype = src ? src->type : RAY_I64; + ray_t* empty_col = ray_vec_new(ctype, 0); + if (empty_col && !RAY_IS_ERR(empty_col)) { + prev_tables[rel_idx] = ray_table_add_col( + prev_tables[rel_idx], rel->col_names[c], empty_col); + ray_release(empty_col); + } + } + } + } + + /* Semi-naive iteration */ + int max_iter = 1000; + for (int iter = 0; iter < max_iter; iter++) { + /* Check convergence: all deltas empty */ + bool any_new = false; + for (int p = 0; p < prog->strata_sizes[s]; p++) { + int rel_idx = prog->strata[s][p]; + if (delta_tables[rel_idx] && + !RAY_IS_ERR(delta_tables[rel_idx]) && + ray_table_nrows(delta_tables[rel_idx]) > 0) { + any_new = true; + break; + } + } + if (!any_new) break; + + /* For each rule, for each positive body position that uses a + * delta relation, compile and execute */ + ray_t* new_tuples_per_rel[DL_MAX_RELS]; + memset(new_tuples_per_rel, 0, sizeof(new_tuples_per_rel)); + + for (int ri = 0; ri < n_stratum_rules; ri++) { + dl_rule_t* rule = stratum_rules[ri]; + int head_idx = dl_find_rel(prog, rule->head_pred); + if (head_idx < 0) continue; + + for (int b = 0; b < rule->n_body; b++) { + dl_body_t* body = &rule->body[b]; + if (body->type != DL_POS) continue; + + int body_rel = dl_find_rel(prog, body->pred); + if (body_rel < 0) continue; + if (!prog->rels[body_rel].is_idb) continue; + if (!delta_tables[body_rel] || + ray_table_nrows(delta_tables[body_rel]) == 0) continue; + + /* Swap in delta relation for this body position */ + ray_t* saved = prog->rels[body_rel].table; + prog->rels[body_rel].table = delta_tables[body_rel]; + + ray_graph_t* g = ray_graph_new(NULL); + if (!g) { + prog->rels[body_rel].table = saved; + prog->eval_err = true; + continue; + } + + ray_op_t* output = dl_compile_rule(prog, rule, b, stratum_rule_idx[ri], g); + if (!output) { + ray_graph_free(g); + prog->rels[body_rel].table = saved; + /* dl_compile_rule sets eval_err itself on genuine + * failures; NULL without the flag means "rule yields + * no rows this iteration" and should not fault. */ + continue; + } + + ray_t* raw_result = ray_execute(g, output); + ray_graph_free(g); + prog->rels[body_rel].table = saved; + + if (!raw_result) continue; + if (RAY_IS_ERR(raw_result)) { prog->eval_err = true; ray_error_free(raw_result); continue; } + + /* Rename columns to match head relation */ + dl_rel_t* head_rel2 = &prog->rels[head_idx]; + ray_t* result = table_rename_cols(raw_result, head_rel2); + ray_release(raw_result); + if (!result) continue; + if (RAY_IS_ERR(result)) { prog->eval_err = true; ray_error_free(result); continue; } + + /* Accumulate new tuples for this head */ + if (new_tuples_per_rel[head_idx]) { + ray_t* u = table_union(new_tuples_per_rel[head_idx], result); + ray_release(new_tuples_per_rel[head_idx]); + ray_release(result); + if (!u) { + prog->eval_err = true; + new_tuples_per_rel[head_idx] = NULL; + continue; + } + if (RAY_IS_ERR(u)) { + prog->eval_err = true; + ray_error_free(u); + new_tuples_per_rel[head_idx] = NULL; + continue; + } + new_tuples_per_rel[head_idx] = u; + } else { + new_tuples_per_rel[head_idx] = result; + } + } + } + + /* For each IDB: dedup new tuples, subtract existing, merge */ + for (int p = 0; p < prog->strata_sizes[s]; p++) { + int rel_idx = prog->strata[s][p]; + dl_rel_t* rel = &prog->rels[rel_idx]; + if (!rel->is_idb) continue; + + /* Free old delta */ + if (delta_tables[rel_idx] && !RAY_IS_ERR(delta_tables[rel_idx])) + ray_release(delta_tables[rel_idx]); + delta_tables[rel_idx] = NULL; + + ray_t* new_tuples = new_tuples_per_rel[rel_idx]; + if (!new_tuples) { delta_tables[rel_idx] = NULL; continue; } + if (RAY_IS_ERR(new_tuples)) { + prog->eval_err = true; + ray_error_free(new_tuples); + delta_tables[rel_idx] = NULL; + continue; + } + + /* Deduplicate */ + ray_t* deduped = table_distinct(new_tuples); + ray_release(new_tuples); + if (!deduped) { prog->eval_err = true; continue; } + if (RAY_IS_ERR(deduped)) { prog->eval_err = true; ray_error_free(deduped); continue; } + + /* Subtract existing relation to get true delta */ + ray_t* delta = table_antijoin(deduped, rel->table); + ray_release(deduped); + if (!delta) { prog->eval_err = true; continue; } + if (RAY_IS_ERR(delta)) { prog->eval_err = true; ray_error_free(delta); continue; } + + delta_tables[rel_idx] = delta; + + /* Merge delta into full relation. A merge failure here + * leaves delta_tables set but rel->table stale — that would + * desync the fixpoint, so treat it as a hard failure. */ + if (ray_table_nrows(delta) > 0) { + ray_t* merged = table_union(rel->table, delta); + if (!merged) { prog->eval_err = true; continue; } + if (RAY_IS_ERR(merged)) { + prog->eval_err = true; + ray_error_free(merged); + continue; + } + ray_release(rel->table); + rel->table = merged; + } + } + + /* Update prev tables */ + for (int p = 0; p < prog->strata_sizes[s]; p++) { + int rel_idx = prog->strata[s][p]; + if (prev_tables[rel_idx] && !RAY_IS_ERR(prev_tables[rel_idx])) + ray_release(prev_tables[rel_idx]); + ray_retain(prog->rels[rel_idx].table); + prev_tables[rel_idx] = prog->rels[rel_idx].table; + } + } + + /* Cleanup stratum temporaries */ + for (int p = 0; p < prog->strata_sizes[s]; p++) { + int rel_idx = prog->strata[s][p]; + if (prev_tables[rel_idx] && !RAY_IS_ERR(prev_tables[rel_idx])) + ray_release(prev_tables[rel_idx]); + if (delta_tables[rel_idx] && !RAY_IS_ERR(delta_tables[rel_idx])) + ray_release(delta_tables[rel_idx]); + } + } + + /* Build provenance if requested */ + if (prog->flags & DL_FLAG_PROVENANCE) + dl_build_provenance(prog); + + /* Any compile-time or runtime error surfaced by a rule causes dl_eval + * to report failure, so callers (notably ray_query_fn) can turn this + * into a user-visible "evaluation failed" error instead of shipping a + * silently-incomplete result. */ + return prog->eval_err ? -1 : 0; +} + +/* ======================================================================== + * Query — retrieve result after evaluation + * ======================================================================== */ + +ray_t* dl_query(dl_program_t* prog, const char* pred_name) { + if (!prog || !pred_name) return NULL; + int idx = dl_find_rel(prog, pred_name); + if (idx < 0) return NULL; + return prog->rels[idx].table; +} + +ray_t* dl_get_provenance(dl_program_t* prog, const char* pred_name) { + if (!prog || !pred_name) return NULL; + if (!(prog->flags & DL_FLAG_PROVENANCE)) return NULL; + int idx = dl_find_rel(prog, pred_name); + if (idx < 0) return NULL; + return prog->rels[idx].prov_col; +} + +ray_t* dl_get_provenance_src_offsets(dl_program_t* prog, const char* pred_name) { + if (!prog || !pred_name) return NULL; + if (!(prog->flags & DL_FLAG_PROVENANCE)) return NULL; + int idx = dl_find_rel(prog, pred_name); + if (idx < 0) return NULL; + return prog->rels[idx].prov_src_offsets; +} + +ray_t* dl_get_provenance_src_data(dl_program_t* prog, const char* pred_name) { + if (!prog || !pred_name) return NULL; + if (!(prog->flags & DL_FLAG_PROVENANCE)) return NULL; + int idx = dl_find_rel(prog, pred_name); + if (idx < 0) return NULL; + return prog->rels[idx].prov_src_data; +} + +/* ── Builtins ── */ + +/* ══════════════════════════════════════════ + * EAV triple storage — datoms, assert-fact, scan-eav + * ══════════════════════════════════════════ */ + +/* (datoms) — create empty EAV table with schema [e a v] */ +ray_t* ray_datoms_fn(ray_t** args, int64_t n) { + (void)args; + if (n != 0) return ray_error("arity", "datoms takes no arguments"); + + int64_t e_id = ray_sym_intern("e", 1); + int64_t a_id = ray_sym_intern("a", 1); + int64_t v_id = ray_sym_intern("v", 1); + + ray_t* tbl = ray_table_new(3); + if (RAY_IS_ERR(tbl)) return tbl; + + /* e column: RAY_I64 */ + ray_t* e_col = ray_vec_new(RAY_I64, 0); + if (RAY_IS_ERR(e_col)) { ray_release(tbl); return e_col; } + tbl = ray_table_add_col(tbl, e_id, e_col); + ray_release(e_col); + if (RAY_IS_ERR(tbl)) return tbl; + + /* a column: RAY_SYM */ + ray_t* a_col = ray_vec_new(RAY_SYM, 0); + if (RAY_IS_ERR(a_col)) { ray_release(tbl); return a_col; } + tbl = ray_table_add_col(tbl, a_id, a_col); + ray_release(a_col); + if (RAY_IS_ERR(tbl)) return tbl; + + /* v column: RAY_I64 (symbols stored as intern ID, integers as-is) */ + ray_t* v_col = ray_vec_new(RAY_I64, 0); + if (RAY_IS_ERR(v_col)) { ray_release(tbl); return v_col; } + tbl = ray_table_add_col(tbl, v_id, v_col); + ray_release(v_col); + + return tbl; +} + +/* (assert-fact db entity attr value) — append a triple to the datoms table */ +ray_t* ray_assert_fact_fn(ray_t** args, int64_t n) { + if (n != 4) return ray_error("arity", "assert-fact expects 4 arguments: db entity attr value"); + + ray_t* db = args[0]; + ray_t* entity = args[1]; + ray_t* attr = args[2]; + ray_t* value = args[3]; + + /* Validate db is a table with 3 columns */ + if (db->type != RAY_TABLE || ray_table_ncols(db) != 3) + return ray_error("type", "assert-fact: first arg must be a datoms table"); + + /* Validate entity is i64 */ + if (entity->type != -RAY_I64) + return ray_error("type", "assert-fact: entity must be an integer"); + + /* Validate attr is a symbol */ + if (attr->type != -RAY_SYM) + return ray_error("type", "assert-fact: attr must be a symbol"); + + /* Value: accept i64 or sym. Store as i64 (sym -> intern ID). */ + int64_t v_val; + if (value->type == -RAY_I64) { + v_val = value->i64; + } else if (value->type == -RAY_SYM) { + v_val = value->i64; /* sym intern ID is already i64 */ + } else { + return ray_error("type", "assert-fact: value must be an integer or symbol"); + } + + /* Build new table with appended row */ + int64_t ncols = 3; + ray_t* result = ray_table_new(ncols); + if (RAY_IS_ERR(result)) return result; + + for (int64_t c = 0; c < ncols; c++) { + ray_t* old_col = ray_table_get_col_idx(db, c); + int64_t col_name = ray_table_col_name(db, c); + + /* Clone the column via retain + COW on append */ + ray_retain(old_col); + ray_t* new_col = old_col; + + if (c == 0) { + /* e column: append entity i64 */ + int64_t e_val = entity->i64; + new_col = ray_vec_append(new_col, &e_val); + } else if (c == 1) { + /* a column: append attr sym ID */ + int64_t a_val = attr->i64; + new_col = ray_vec_append(new_col, &a_val); + } else { + /* v column: append value as i64 */ + new_col = ray_vec_append(new_col, &v_val); + } + + if (RAY_IS_ERR(new_col)) { + /* ray_cow inside ray_vec_append already released old_col ref on error/copy */ + ray_release(result); + return new_col; + } + /* ray_cow consumed our retain when it copied; don't double-release old_col */ + + result = ray_table_add_col(result, col_name, new_col); + ray_release(new_col); + if (RAY_IS_ERR(result)) return result; + } + + return result; +} + +/* (retract-fact db entity attr value) — remove a triple from the datoms table */ +ray_t* ray_retract_fact_fn(ray_t** args, int64_t n) { + if (n != 4) return ray_error("arity", "retract-fact expects 4 arguments: db entity attr value"); + + ray_t* db = args[0]; + ray_t* entity = args[1]; + ray_t* attr = args[2]; + ray_t* value = args[3]; + + if (db->type != RAY_TABLE || ray_table_ncols(db) != 3) + return ray_error("type", "retract-fact: first arg must be a datoms table"); + if (entity->type != -RAY_I64) + return ray_error("type", "retract-fact: entity must be an integer"); + if (attr->type != -RAY_SYM) + return ray_error("type", "retract-fact: attr must be a symbol"); + + int64_t match_e = entity->i64; + int64_t match_a = attr->i64; + int64_t match_v; + if (value->type == -RAY_I64) + match_v = value->i64; + else if (value->type == -RAY_SYM) + match_v = value->i64; + else + return ray_error("type", "retract-fact: value must be an integer or symbol"); + + /* Get existing columns */ + ray_t* e_col = ray_table_get_col_idx(db, 0); + ray_t* a_col = ray_table_get_col_idx(db, 1); + ray_t* v_col = ray_table_get_col_idx(db, 2); + int64_t nrows = ray_len(e_col); + + int64_t* e_data = (int64_t*)ray_data(e_col); + int64_t* a_data = (int64_t*)ray_data(a_col); + int64_t* v_data = (int64_t*)ray_data(v_col); + + /* Build new columns, skipping matching rows */ + ray_t* new_e = ray_vec_new(RAY_I64, nrows); + if (RAY_IS_ERR(new_e)) return new_e; + ray_t* new_a = ray_vec_new(RAY_SYM, nrows); + if (RAY_IS_ERR(new_a)) { ray_release(new_e); return new_a; } + ray_t* new_v = ray_vec_new(RAY_I64, nrows); + if (RAY_IS_ERR(new_v)) { ray_release(new_e); ray_release(new_a); return new_v; } + + for (int64_t r = 0; r < nrows; r++) { + if (e_data[r] == match_e && a_data[r] == match_a && v_data[r] == match_v) + continue; /* skip this row */ + new_e = ray_vec_append(new_e, &e_data[r]); + if (RAY_IS_ERR(new_e)) { ray_release(new_a); ray_release(new_v); return new_e; } + new_a = ray_vec_append(new_a, &a_data[r]); + if (RAY_IS_ERR(new_a)) { ray_release(new_e); ray_release(new_v); return new_a; } + new_v = ray_vec_append(new_v, &v_data[r]); + if (RAY_IS_ERR(new_v)) { ray_release(new_e); ray_release(new_a); return new_v; } + } + + /* Build result table */ + ray_t* result = ray_table_new(3); + if (RAY_IS_ERR(result)) { ray_release(new_e); ray_release(new_a); ray_release(new_v); return result; } + result = ray_table_add_col(result, ray_table_col_name(db, 0), new_e); + ray_release(new_e); + if (RAY_IS_ERR(result)) { ray_release(new_a); ray_release(new_v); return result; } + result = ray_table_add_col(result, ray_table_col_name(db, 1), new_a); + ray_release(new_a); + if (RAY_IS_ERR(result)) { ray_release(new_v); return result; } + result = ray_table_add_col(result, ray_table_col_name(db, 2), new_v); + ray_release(new_v); + return result; +} + +/* (scan-eav db attr) — filter by attribute, return [e v] table + (scan-eav db entity attr) — filter by entity+attr, return single value */ +ray_t* ray_scan_eav_fn(ray_t** args, int64_t n) { + if (n < 2 || n > 3) + return ray_error("arity", "scan-eav expects 2 or 3 arguments"); + + ray_t* db = args[0]; + if (db->type != RAY_TABLE || ray_table_ncols(db) != 3) + return ray_error("type", "scan-eav: first arg must be a datoms table"); + + ray_t* e_col = ray_table_get_col_idx(db, 0); + ray_t* a_col = ray_table_get_col_idx(db, 1); + ray_t* v_col = ray_table_get_col_idx(db, 2); + int64_t nrows = ray_table_nrows(db); + + if (n == 2) { + /* (scan-eav db attr) — filter by attribute, return [e v] table */ + ray_t* attr_arg = args[1]; + if (attr_arg->type != -RAY_SYM) + return ray_error("type", "scan-eav: attr must be a symbol"); + int64_t attr_id = attr_arg->i64; + + int64_t e_name = ray_sym_intern("e", 1); + int64_t v_name = ray_sym_intern("v", 1); + + ray_t* re = ray_vec_new(RAY_I64, nrows); + if (RAY_IS_ERR(re)) return re; + ray_t* rv = ray_vec_new(RAY_I64, nrows); + if (RAY_IS_ERR(rv)) { ray_release(re); return rv; } + + const int64_t* e_data = (const int64_t*)ray_data(e_col); + const int64_t* v_data = (const int64_t*)ray_data(v_col); + + for (int64_t r = 0; r < nrows; r++) { + int64_t a_val = ray_read_sym(ray_data(a_col), r, a_col->type, a_col->attrs); + if (a_val == attr_id) { + re = ray_vec_append(re, &e_data[r]); + if (RAY_IS_ERR(re)) { ray_release(rv); return re; } + rv = ray_vec_append(rv, &v_data[r]); + if (RAY_IS_ERR(rv)) { ray_release(re); return rv; } + } + } + + ray_t* result = ray_table_new(2); + if (RAY_IS_ERR(result)) { ray_release(re); ray_release(rv); return result; } + result = ray_table_add_col(result, e_name, re); + ray_release(re); + if (RAY_IS_ERR(result)) { ray_release(rv); return result; } + result = ray_table_add_col(result, v_name, rv); + ray_release(rv); + return result; + + } else { + /* (scan-eav db entity attr) — filter by entity+attr, return single value */ + ray_t* entity_arg = args[1]; + ray_t* attr_arg = args[2]; + + if (entity_arg->type != -RAY_I64) + return ray_error("type", "scan-eav: entity must be an integer"); + if (attr_arg->type != -RAY_SYM) + return ray_error("type", "scan-eav: attr must be a symbol"); + + int64_t entity_id = entity_arg->i64; + int64_t attr_id = attr_arg->i64; + + const int64_t* e_data = (const int64_t*)ray_data(e_col); + + const int64_t* v_data = (const int64_t*)ray_data(v_col); + + for (int64_t r = 0; r < nrows; r++) { + if (e_data[r] != entity_id) continue; + int64_t a_val = ray_read_sym(ray_data(a_col), r, a_col->type, a_col->attrs); + if (a_val == attr_id) { + return ray_i64(v_data[r]); + } + } + + return ray_error("value", "scan-eav: no matching triple found"); + } +} + +/* (pull db entity) — all attributes of entity as dict + (pull db entity [attrs]) — only specified attributes as dict */ +ray_t* ray_pull_fn(ray_t** args, int64_t n) { + if (n < 2 || n > 3) + return ray_error("arity", "pull expects 2 or 3 arguments: db entity [attrs]"); + + ray_t* db = args[0]; + ray_t* entity = args[1]; + + if (db->type != RAY_TABLE || ray_table_ncols(db) != 3) + return ray_error("type", "pull: first arg must be a datoms table"); + if (entity->type != -RAY_I64) + return ray_error("type", "pull: entity must be an integer"); + + /* Optional attribute filter */ + ray_t* attr_filter = NULL; + int64_t n_filter = 0; + const int64_t* filter_ids = NULL; + if (n == 3) { + attr_filter = args[2]; + if (!ray_is_vec(attr_filter) || attr_filter->type != RAY_SYM) + return ray_error("type", "pull: third arg must be a symbol vector [attr ...]"); + n_filter = attr_filter->len; + filter_ids = (const int64_t*)ray_data(attr_filter); + } + + int64_t entity_id = entity->i64; + ray_t* e_col = ray_table_get_col_idx(db, 0); + ray_t* a_col = ray_table_get_col_idx(db, 1); + ray_t* v_col = ray_table_get_col_idx(db, 2); + int64_t nrows = ray_table_nrows(db); + + const int64_t* e_data = (const int64_t*)ray_data(e_col); + const int64_t* v_data = (const int64_t*)ray_data(v_col); + + /* Build dict: keys SYM vec of attribute IDs, vals LIST of i64 atoms. */ + ray_t* keys = ray_sym_vec_new(RAY_SYM_W64, 8); + if (RAY_IS_ERR(keys)) return keys; + ray_t* vals = ray_list_new(8); + if (RAY_IS_ERR(vals)) { ray_release(keys); return vals; } + + for (int64_t r = 0; r < nrows; r++) { + if (e_data[r] != entity_id) continue; + int64_t a_val = ray_read_sym(ray_data(a_col), r, a_col->type, a_col->attrs); + + /* Check filter if present */ + if (attr_filter) { + int found = 0; + for (int64_t f = 0; f < n_filter; f++) { + if (filter_ids[f] == a_val) { found = 1; break; } + } + if (!found) continue; + } + + keys = ray_vec_append(keys, &a_val); + if (RAY_IS_ERR(keys)) { ray_release(vals); return keys; } + + ray_t* val = ray_i64(v_data[r]); + if (RAY_IS_ERR(val)) { ray_release(keys); ray_release(vals); return val; } + vals = ray_list_append(vals, val); + ray_release(val); + if (RAY_IS_ERR(vals)) { ray_release(keys); return vals; } + } + + return ray_dict_new(keys, vals); +} + +/* ══════════════════════════════════════════ + * Datalog — rule definitions and query compilation + * ══════════════════════════════════════════ */ + +/* Check if a symbol name starts with '?' (Datalog variable) */ +static int is_dl_var(ray_t* x) { + if (!x || x->type != -RAY_SYM) return 0; + ray_t* s = ray_sym_str(x->i64); + if (!s) return 0; + const char* p = ray_str_ptr(s); + return p && p[0] == '?'; +} + +/* ══════════════════════════════════════════ + * Datalog wrappers — thin layer over src/datalog/datalog.h + * + * Global rule storage lives in g_dl_rules[] / g_dl_n_rules. + * ray_rule_fn parses Rayfall (rule ...) syntax and stores rules. + * ray_query_fn builds a temporary dl_program_t, copies global rules, + * registers the EAV table, evaluates to fixpoint, and returns results. + * ══════════════════════════════════════════ */ + +/* Global rule storage: rules defined via (rule ...) persist across queries */ +static dl_rule_t g_dl_rules[DL_MAX_RULES]; +static int g_dl_n_rules = 0; + +void dl_append_global_rules(dl_program_t* prog) { + if (!prog) return; + for (int i = 0; i < g_dl_n_rules; i++) + dl_add_rule(prog, &g_dl_rules[i]); +} + +/* Variable name -> index map for parsing a single rule or query body */ +typedef struct { + int64_t syms[DL_MAX_ARITY * DL_MAX_BODY]; + int n; +} dl_var_map_t; + +static int dl_var_get_or_create(dl_var_map_t* map, int64_t sym_id) { + for (int i = 0; i < map->n; i++) + if (map->syms[i] == sym_id) return i; + if (map->n >= DL_MAX_ARITY * DL_MAX_BODY) return -1; + map->syms[map->n] = sym_id; + return map->n++; +} + +/* Map Rayfall comparison operator name to DL_CMP_* constant. + * Returns -1 if not a recognized comparison. */ +static int dl_cmp_op_from_name(const char* name) { + if (strcmp(name, ">") == 0) return DL_CMP_GT; + if (strcmp(name, ">=") == 0) return DL_CMP_GE; + if (strcmp(name, "<") == 0) return DL_CMP_LT; + if (strcmp(name, "<=") == 0) return DL_CMP_LE; + if (strcmp(name, "==") == 0) return DL_CMP_EQ; + if (strcmp(name, "!=") == 0) return DL_CMP_NE; + return -1; +} + +/* Map Rayfall arithmetic operator name to OP_* constant for dl_expr_t. + * Returns -1 if not recognized. */ +static int dl_arith_op_from_name(const char* name) { + if (strcmp(name, "+") == 0) return OP_ADD; + if (strcmp(name, "-") == 0) return OP_SUB; + if (strcmp(name, "*") == 0) return OP_MUL; + if (strcmp(name, "/") == 0) return OP_DIV; + return -1; +} + +/* Build a dl_expr_t from a Rayfall AST node. + * Handles: integer constants, ?variables, (op expr expr). */ +static dl_expr_t* dl_build_expr(ray_t* node, dl_var_map_t* vars) { + if (!node) return NULL; + if (node->type == -RAY_I64) + return dl_expr_const(node->i64); + if (node->type == -RAY_F64) + return dl_expr_const_f64(node->f64); + if (node->type == -RAY_SYM && is_dl_var(node)) { + int vi = dl_var_get_or_create(vars, node->i64); + return (vi >= 0) ? dl_expr_var(vi) : NULL; + } + if (is_list(node) && ray_len(node) == 3) { + ray_t** elems = (ray_t**)ray_data(node); + if (elems[0]->type == -RAY_SYM) { + ray_t* op_str = ray_sym_str(elems[0]->i64); + if (op_str) { + int op = dl_arith_op_from_name(ray_str_ptr(op_str)); + if (op >= 0) { + dl_expr_t* l = dl_build_expr(elems[1], vars); + dl_expr_t* r = dl_build_expr(elems[2], vars); + if (l && r) return dl_expr_binop(op, l, r); + } + } + } + } + /* Fallback: treat symbols (non-variable) as constants (sym ID) */ + if (node->type == -RAY_SYM) + return dl_expr_const(node->i64); + return NULL; +} + +/* Check if a Rayfall list clause is a triple pattern: (?e :attr ?v) + * A triple pattern has exactly 3 elements and the first element is a + * ?variable (distinguishing it from rule invocations where the first + * element is a predicate name symbol). */ +static bool dl_is_wildcard(ray_t* node) { + if (node->type != -RAY_SYM) return false; + ray_t* s = ray_sym_str(node->i64); + return s && ray_str_len(s) == 1 && ray_str_ptr(s)[0] == '_'; +} + + + +static bool dl_is_triple_pattern(ray_t* clause) { + if (!is_list(clause) || ray_len(clause) != 3) return false; + ray_t** ce = (ray_t**)ray_data(clause); + /* Position 0 must be a ?variable, wildcard _, integer constant, + * or quoted symbol (not a bare name that could be a rule predicate). + * Triple patterns: (?e :attr ?v), (_ :attr ?v), (1 :attr ?v) */ + if (is_dl_var(ce[0])) return true; + if (ce[0]->type == -RAY_I64) return true; + if (dl_is_wildcard(ce[0]) && ce[1]->type == -RAY_SYM && !is_dl_var(ce[1])) + return true; /* _ is always wildcard -- reserved, never a predicate */ + /* Quoted symbol (no RAY_ATTR_NAME) in position 0 + non-var symbol in position 1 */ + if (ce[0]->type == -RAY_SYM && !(ce[0]->attrs & RAY_ATTR_NAME)) { + if (ce[1]->type == -RAY_SYM && !is_dl_var(ce[1])) + return true; + } + return false; +} + +/* Check if a clause is a negation: (not (...)) */ +static bool dl_is_negation(ray_t* clause) { + if (!is_list(clause) || ray_len(clause) != 2) return false; + ray_t** ce = (ray_t**)ray_data(clause); + if (ce[0]->type != -RAY_SYM) return false; + ray_t* name = ray_sym_str(ce[0]->i64); + return name && strcmp(ray_str_ptr(name), "not") == 0; +} + +/* Check if a clause is a comparison: (> ?x ?y) or (> ?x 100) */ +static bool dl_is_comparison(ray_t* clause) { + if (!is_list(clause) || ray_len(clause) < 3) return false; + ray_t** ce = (ray_t**)ray_data(clause); + if (ce[0]->type != -RAY_SYM) return false; + ray_t* name = ray_sym_str(ce[0]->i64); + if (!name) return false; + return dl_cmp_op_from_name(ray_str_ptr(name)) >= 0; +} + +/* Check if a clause is an assignment: (= ?var expr) */ +static bool dl_is_assignment(ray_t* clause) { + if (!is_list(clause) || ray_len(clause) != 3) return false; + ray_t** ce = (ray_t**)ray_data(clause); + if (ce[0]->type != -RAY_SYM) return false; + ray_t* name = ray_sym_str(ce[0]->i64); + if (!name || strcmp(ray_str_ptr(name), "=") != 0) return false; + /* LHS must be a variable */ + return is_dl_var(ce[1]); +} + +static bool dl_is_aggregate(ray_t* clause) { + if (!is_list(clause) || ray_len(clause) < 3) return false; + ray_t** ce = (ray_t**)ray_data(clause); + if (ce[0]->type != -RAY_SYM) return false; + ray_t* name = ray_sym_str(ce[0]->i64); + if (!name) return false; + const char* n = ray_str_ptr(name); + return strcmp(n, "count") == 0 || strcmp(n, "sum") == 0 + || strcmp(n, "min") == 0 || strcmp(n, "max") == 0 + || strcmp(n, "avg") == 0; +} + +static int dl_agg_op_from_name(const char* n) { + if (strcmp(n, "count") == 0) return DL_AGG_COUNT; + if (strcmp(n, "sum") == 0) return DL_AGG_SUM; + if (strcmp(n, "min") == 0) return DL_AGG_MIN; + if (strcmp(n, "max") == 0) return DL_AGG_MAX; + if (strcmp(n, "avg") == 0) return DL_AGG_AVG; + return -1; +} + +static bool dl_sym_is_name(ray_t* sym, const char* lit) { + if (!sym || sym->type != -RAY_SYM) return false; + ray_t* s = ray_sym_str(sym->i64); + return s && strcmp(ray_str_ptr(s), lit) == 0; +} + +/* Resolve an AST node to a variable or constant in a body atom. + * Sets the body position to either a variable or constant. + * For expressions like (quote x), evaluates them first. */ +static ray_t* dl_set_body_pos(dl_rule_t* rule, int bidx, int pos, + ray_t* node, dl_var_map_t* vars) { + if (is_dl_var(node)) { + int vi = dl_var_get_or_create(vars, node->i64); + dl_body_set_var(rule, bidx, pos, vi); + return NULL; + } + if (node->type == -RAY_I64) { + dl_body_set_const(rule, bidx, pos, node->i64); + return NULL; + } + if (node->type == -RAY_SYM) { + ray_t* s = ray_sym_str(node->i64); + if (s && strcmp(ray_str_ptr(s), "_") == 0) { + /* Wildcard: create a fresh variable */ + int vi = vars->n++; + vars->syms[vi] = -1 - vi; + dl_body_set_var(rule, bidx, pos, vi); + } else { + dl_body_set_const(rule, bidx, pos, node->i64); + } + return NULL; + } + if (node->type == -RAY_STR) { + /* Quoted string literal in body: intern as sym so it compares + * equal to other sym-interned constants. Mirrors the head + * parser convention. */ + int64_t sym = ray_sym_intern(ray_str_ptr(node), ray_str_len(node)); + dl_body_set_const(rule, bidx, pos, sym); + return NULL; + } + /* For other forms (e.g., (quote x)), evaluate to get constant */ + ray_t* val = ray_eval(node); + if (!val || RAY_IS_ERR(val)) + return val ? val : ray_error("type", "rule: cannot evaluate constant in body"); + if (val->type == -RAY_I64) { + dl_body_set_const(rule, bidx, pos, val->i64); + } else if (val->type == -RAY_SYM) { + dl_body_set_const(rule, bidx, pos, val->i64); + } else { + ray_release(val); + return ray_error("type", "rule: unsupported constant type in body"); + } + ray_release(val); + return NULL; +} + +/* Parse a single body clause and add it to the dl_rule_t. + * Handles triple patterns, negations, comparisons, assignments, + * and rule invocations (positive atoms). */ +static ray_t* dl_parse_body_clause(dl_rule_t* rule, ray_t* clause, + dl_var_map_t* vars, dl_program_t* prog) { + if (!is_list(clause) || ray_len(clause) < 1) + return ray_error("type", "rule/query: body clause must be a list"); + + ray_t** ce = (ray_t**)ray_data(clause); + int64_t clen = ray_len(clause); + + /* -- Triple pattern: (?e :attr ?v) -- */ + if (dl_is_triple_pattern(clause)) { + /* Register as 3-arity atom on "eav" relation: + * position 0 = entity, 1 = attr (constant), 2 = value */ + int bidx = dl_rule_add_atom(rule, "eav", 3); + if (bidx < 0) return ray_error("domain", "rule: too many body literals"); + + ray_t* err; + err = dl_set_body_pos(rule, bidx, 0, ce[0], vars); + if (err) return err; + err = dl_set_body_pos(rule, bidx, 1, ce[1], vars); + if (err) return err; + err = dl_set_body_pos(rule, bidx, 2, ce[2], vars); + if (err) return err; + return NULL; /* success */ + } + + /* -- Negation: (not (?e :attr ?v)) or (not (rule-name ?args...)) -- */ + if (dl_is_negation(clause)) { + ray_t* inner = ce[1]; + if (!is_list(inner) || ray_len(inner) < 1) + return ray_error("type", "not: inner clause must be a list"); + ray_t** ie = (ray_t**)ray_data(inner); + int64_t ilen = ray_len(inner); + + if (dl_is_triple_pattern(inner)) { + /* Negated triple: (not (?e :attr ?v)) */ + int bidx = dl_rule_add_neg(rule, "eav", 3); + if (bidx < 0) return ray_error("domain", "rule: too many body literals"); + + ray_t* err; + err = dl_set_body_pos(rule, bidx, 0, ie[0], vars); + if (err) return err; + err = dl_set_body_pos(rule, bidx, 1, ie[1], vars); + if (err) return err; + err = dl_set_body_pos(rule, bidx, 2, ie[2], vars); + if (err) return err; + } else { + /* Negated rule invocation: (not (rule-name ?a ?b)) */ + if (ie[0]->type != -RAY_SYM) + return ray_error("type", "not: inner clause head must be a symbol"); + ray_t* pred_name = ray_sym_str(ie[0]->i64); + if (!pred_name) + return ray_error("type", "not: cannot resolve predicate name"); + + int bidx = dl_rule_add_neg(rule, ray_str_ptr(pred_name), (int)(ilen - 1)); + if (bidx < 0) return ray_error("domain", "rule: too many body literals"); + + for (int64_t j = 1; j < ilen; j++) { + ray_t* err = dl_set_body_pos(rule, bidx, (int)(j - 1), ie[j], vars); + if (err) return err; + } + } + return NULL; + } + + /* -- Aggregate: (count ?N pred) | (sum ?S pred col) | ... [by ?k col ...] -- */ + if (dl_is_aggregate(clause)) { + ray_t* op_str = ray_sym_str(ce[0]->i64); + if (!op_str) return ray_error("type", "aggregate: bad operator"); + int op = dl_agg_op_from_name(ray_str_ptr(op_str)); + if (op < 0) return ray_error("type", "aggregate: unknown operator"); + + if (!is_dl_var(ce[1])) + return ray_error("type", "aggregate: first argument must be ?variable"); + int target_vi = dl_var_get_or_create(vars, ce[1]->i64); + if (target_vi < 0) + return ray_error("domain", "aggregate: too many variables"); + + if (ce[2]->type != -RAY_SYM) + return ray_error("type", "aggregate: predicate must be a symbol"); + ray_t* pred_sym = ray_sym_str(ce[2]->i64); + if (!pred_sym) + return ray_error("type", "aggregate: cannot resolve predicate name"); + const char* pred_name = ray_str_ptr(pred_sym); + + /* Record arity=0 as "unknown" when we can't resolve it against the + * program (prog=NULL or predicate not yet registered). The compiler + * and env auto-register treat 0 as a wildcard and resolve against the + * source relation at evaluation time. A hardcoded 1 would spuriously + * reject any env-bound table whose arity isn't 1. */ + int pred_arity = 0; + if (prog) { + int ri = dl_find_rel(prog, pred_name); + if (ri >= 0) pred_arity = prog->rels[ri].arity; + } + + int i = 3; + bool has_value_col = false; + int value_col = 0; + int key_vars[DL_AGG_MAX_KEYS]; + int key_cols[DL_AGG_MAX_KEYS]; + int n_keys = 0; + + while (i < clen) { + if (dl_sym_is_name(ce[i], "by")) { + i++; + while (i < clen) { + if (!is_dl_var(ce[i])) + return ray_error("type", "aggregate: group key must be ?variable"); + if (n_keys >= DL_AGG_MAX_KEYS) + return ray_error("domain", "aggregate: too many group keys"); + key_vars[n_keys] = dl_var_get_or_create(vars, ce[i]->i64); + i++; + if (i >= clen || ce[i]->type != -RAY_I64) + return ray_error("type", "aggregate: group key column must be integer"); + key_cols[n_keys] = (int)ce[i]->i64; + i++; + n_keys++; + } + break; + } + if (ce[i]->type == -RAY_I64) { + if (has_value_col) + return ray_error("type", "aggregate: at most one value column index"); + has_value_col = true; + value_col = (int)ce[i]->i64; + i++; + continue; + } + return ray_error("type", "aggregate: unexpected token in aggregate clause"); + } + + if (op == DL_AGG_COUNT) { + if (has_value_col) + return ray_error("type", "aggregate: count does not take a value column"); + } else { + if (!has_value_col) + return ray_error("type", "aggregate: sum/min/max/avg require a value column index"); + } + + int bidx = dl_rule_add_agg(rule, op, target_vi, pred_name, pred_arity, has_value_col ? value_col : 0); + if (bidx < 0) return ray_error("domain", "rule: too many body literals"); + if (n_keys > 0) { + if (dl_rule_agg_set_group(rule, bidx, key_vars, key_cols, n_keys) != 0) + return ray_error("domain", "aggregate: cannot attach group keys"); + } + return NULL; + } + + /* -- Between sugar: (between ?x lo hi) -> (>= ?x lo) and (<= ?x hi) -- */ + if (clen == 4 && ce[0]->type == -RAY_SYM) { + ray_t* nm = ray_sym_str(ce[0]->i64); + if (nm && strcmp(ray_str_ptr(nm), "between") == 0) { + if (!is_dl_var(ce[1])) + return ray_error("type", "between target must be a ?variable"); + int vi = dl_var_get_or_create(vars, ce[1]->i64); + if (vi < 0) + return ray_error("domain", "between: too many variables"); + if (ce[2]->type != -RAY_I64 || ce[3]->type != -RAY_I64) + return ray_error("type", "between bounds must be integer constants"); + if (dl_rule_add_cmp_const(rule, DL_CMP_GE, vi, ce[2]->i64) < 0) + return ray_error("domain", "rule: too many body literals"); + if (dl_rule_add_cmp_const(rule, DL_CMP_LE, vi, ce[3]->i64) < 0) + return ray_error("domain", "rule: too many body literals"); + return NULL; + } + } + + /* -- Assignment: (= ?var expr) -- */ + if (dl_is_assignment(clause)) { + int target_vi = dl_var_get_or_create(vars, ce[1]->i64); + dl_expr_t* expr = dl_build_expr(ce[2], vars); + if (!expr) + return ray_error("type", "rule: cannot parse assignment expression"); + dl_rule_add_assign(rule, target_vi, DL_OP_EQ, expr); + return NULL; + } + + /* -- Comparison: (> ?x ?y) or (> ?x 100) -- */ + if (dl_is_comparison(clause)) { + ray_t* op_str = ray_sym_str(ce[0]->i64); + int cmp_op = dl_cmp_op_from_name(ray_str_ptr(op_str)); + + /* LHS */ + bool lhs_is_var = is_dl_var(ce[1]); + int lhs_vi = lhs_is_var ? dl_var_get_or_create(vars, ce[1]->i64) : -1; + bool lhs_is_const = (!lhs_is_var && (ce[1]->type == -RAY_I64 || ce[1]->type == -RAY_SYM)); + int64_t lhs_const = lhs_is_const ? ce[1]->i64 : 0; + + /* RHS */ + bool rhs_is_var = (clen > 2) && is_dl_var(ce[2]); + int rhs_vi = rhs_is_var ? dl_var_get_or_create(vars, ce[2]->i64) : -1; + bool rhs_is_const = (clen > 2) && !rhs_is_var && + (ce[2]->type == -RAY_I64 || ce[2]->type == -RAY_SYM); + int64_t rhs_const = rhs_is_const ? ce[2]->i64 : 0; + + if (lhs_is_var && rhs_is_var) { + dl_rule_add_cmp(rule, cmp_op, lhs_vi, rhs_vi); + } else if (lhs_is_var && rhs_is_const) { + dl_rule_add_cmp_const(rule, cmp_op, lhs_vi, rhs_const); + } else if (lhs_is_const && rhs_is_var) { + /* Flip: const op var -> var flipped_op const */ + int flipped = cmp_op; + switch (cmp_op) { + case DL_CMP_GT: flipped = DL_CMP_LT; break; + case DL_CMP_GE: flipped = DL_CMP_LE; break; + case DL_CMP_LT: flipped = DL_CMP_GT; break; + case DL_CMP_LE: flipped = DL_CMP_GE; break; + default: break; + } + dl_rule_add_cmp_const(rule, flipped, rhs_vi, lhs_const); + } else { + /* Expression-based comparison */ + dl_expr_t* le = dl_build_expr(ce[1], vars); + dl_expr_t* re = (clen > 2) ? dl_build_expr(ce[2], vars) : NULL; + if (le && re) + dl_rule_add_cmp_expr(rule, cmp_op, le, re); + else + return ray_error("type", "rule: cannot parse comparison operands"); + } + return NULL; + } + + /* -- Rule invocation / positive atom: (pred-name ?a ?b ...) -- */ + if (ce[0]->type == -RAY_SYM) { + ray_t* pred_name = ray_sym_str(ce[0]->i64); + if (!pred_name) + return ray_error("type", "rule: cannot resolve predicate name"); + + int bidx = dl_rule_add_atom(rule, ray_str_ptr(pred_name), (int)(clen - 1)); + if (bidx < 0) return ray_error("domain", "rule: too many body literals"); + + for (int64_t j = 1; j < clen; j++) { + ray_t* err = dl_set_body_pos(rule, bidx, (int)(j - 1), ce[j], vars); + if (err) return err; + } + return NULL; + } + + return ray_error("type", "rule/query: unrecognized body clause form"); +} + +/* Parse head + body clauses into out (shared by rule and query inline rules). */ +static ray_t* dl_parse_rule_from_head_and_body(dl_rule_t* out, ray_t* head, + ray_t** body_args, int64_t n_body, + dl_var_map_t* vars, dl_program_t* prog) { + if (!is_list(head) || ray_len(head) < 1) + return ray_error("type", "rule: head must be (name ?var ...)"); + + ray_t** hd = (ray_t**)ray_data(head); + int64_t hlen = ray_len(head); + + if (hd[0]->type != -RAY_SYM) + return ray_error("type", "rule: head name must be a symbol"); + + ray_t* head_name_str = ray_sym_str(hd[0]->i64); + if (!head_name_str) + return ray_error("type", "rule: cannot resolve head name"); + + if (ray_str_len(head_name_str) == 1 && ray_str_ptr(head_name_str)[0] == '_') + return ray_error("domain", "rule: _ is reserved as wildcard"); + + int head_arity = (int)(hlen - 1); + dl_rule_init(out, ray_str_ptr(head_name_str), head_arity); + + for (int i = 0; i < head_arity; i++) { + ray_t* harg = hd[i + 1]; + if (is_dl_var(harg)) { + int vi = dl_var_get_or_create(vars, harg->i64); + dl_rule_head_var(out, i, vi); + } else if (harg->type == -RAY_I64) { + dl_rule_head_const_typed(out, i, harg->i64, RAY_I64); + } else if (harg->type == -RAY_SYM) { + dl_rule_head_const_typed(out, i, harg->i64, RAY_SYM); + } else if (harg->type == -RAY_F64) { + int64_t bits; + memcpy(&bits, &harg->f64, sizeof(bits)); + dl_rule_head_const_typed(out, i, bits, RAY_F64); + } else if (harg->type == -RAY_STR) { + /* Intern the string as a sym so it can be stored in a RAY_SYM + * column. Matches the body-literal parser convention. */ + int64_t sym = ray_sym_intern(ray_str_ptr(harg), ray_str_len(harg)); + dl_rule_head_const_typed(out, i, sym, RAY_SYM); + } else { + return ray_error("type", "rule: head arguments must be ?variables or constants"); + } + } + + for (int64_t i = 0; i < n_body; i++) { + ray_t* err = dl_parse_body_clause(out, body_args[i], vars, prog); + if (err) return err; + } + + out->n_vars = vars->n; + return NULL; +} + +/* One inline rule: ((head-name ?a ...) body1 body2 ...) */ +static ray_t* dl_parse_inline_rule(dl_rule_t* out, ray_t* rule_list, dl_program_t* prog) { + if (!is_list(rule_list) || ray_len(rule_list) < 1) + return ray_error("type", "query: each (rules ...) entry must be a non-empty list"); + + ray_t** re = (ray_t**)ray_data(rule_list); + int64_t rlen = ray_len(rule_list); + dl_var_map_t vars; + memset(&vars, 0, sizeof(vars)); + return dl_parse_rule_from_head_and_body(out, re[0], &re[1], rlen - 1, &vars, prog); +} + +/* (rule (head-name ?v1 ?v2 ...) clause1 clause2 ...) + * Special form: args are NOT evaluated. + * Parses the head and body into a dl_rule_t and stores it globally. */ +ray_t* ray_rule_fn(ray_t** args, int64_t n) { + if (n < 2) + return ray_error("arity", "rule expects at least a head and one body clause"); + + if (g_dl_n_rules >= DL_MAX_RULES) + return ray_error("domain", "rule: too many rules (max 128)"); + + dl_var_map_t vars; + memset(&vars, 0, sizeof(vars)); + dl_rule_t rule; + ray_t* perr = dl_parse_rule_from_head_and_body(&rule, args[0], &args[1], n - 1, &vars, NULL); + if (perr) return perr; + + memcpy(&g_dl_rules[g_dl_n_rules++], &rule, sizeof(dl_rule_t)); + return ray_bool(true); +} + +/* (query db (find ?a ?b ...) (where clause1 clause2 ...) [(rules ...)]) + * Optional fourth arg (rules ...) supplies inline rules only (globals ignored). + * Special form: db is evaluated, find/where are NOT evaluated. + * Creates a temporary dl_program_t, registers the EAV table, + * copies global rules (unless inline rules), builds a synthetic query rule, and evaluates. */ +ray_t* ray_query_fn(ray_t** args, int64_t n) { + if (n < 3 || n > 4) + return ray_error("arity", "query expects: db (find ...) (where ...) [(rules ...)]"); + + /* Evaluate db (first arg) */ + ray_t* db = ray_eval(args[0]); + if (!db || RAY_IS_ERR(db)) return db ? db : ray_error("type", "query: db is null"); + if (db->type != RAY_TABLE) { ray_release(db); return ray_error("type", "query: first arg must be a datoms table"); } + + /* Parse find clause */ + ray_t* find_clause = args[1]; + if (!is_list(find_clause) || ray_len(find_clause) < 2) { + ray_release(db); + return ray_error("type", "query: second arg must be (find ?var ...)"); + } + ray_t** find_elems = (ray_t**)ray_data(find_clause); + int64_t find_len = ray_len(find_clause); + + /* Verify it starts with 'find' */ + if (find_elems[0]->type != -RAY_SYM) { + ray_release(db); + return ray_error("type", "query: expected (find ...)"); + } + ray_t* find_name = ray_sym_str(find_elems[0]->i64); + if (!find_name || strcmp(ray_str_ptr(find_name), "find") != 0) { + ray_release(db); + return ray_error("type", "query: expected (find ...) as second argument"); + } + + /* Collect find variable sym IDs */ + int64_t find_var_syms[DL_MAX_ARITY]; + int n_find_vars = 0; + for (int64_t i = 1; i < find_len && n_find_vars < DL_MAX_ARITY; i++) { + if (!is_dl_var(find_elems[i])) { + ray_release(db); + return ray_error("type", "query: find arguments must be ?variables"); + } + find_var_syms[n_find_vars++] = find_elems[i]->i64; + } + + /* Parse where clause */ + ray_t* where_clause = args[2]; + if (!is_list(where_clause) || ray_len(where_clause) < 2) { + ray_release(db); + return ray_error("type", "query: third arg must be (where clause ...)"); + } + ray_t** where_elems = (ray_t**)ray_data(where_clause); + int64_t where_len = ray_len(where_clause); + + /* Verify it starts with 'where' */ + if (where_elems[0]->type != -RAY_SYM) { + ray_release(db); + return ray_error("type", "query: expected (where ...)"); + } + ray_t* where_name = ray_sym_str(where_elems[0]->i64); + if (!where_name || strcmp(ray_str_ptr(where_name), "where") != 0) { + ray_release(db); + return ray_error("type", "query: expected (where ...) as third argument"); + } + + /* Optional 4th arg must be (rules ...) — inline rules override globals */ + ray_t* rules_clause = NULL; + if (n == 4) { + ray_t* fourth = args[3]; + if (!is_list(fourth) || ray_len(fourth) < 1) { + ray_release(db); + return ray_error("type", "query: fourth argument must be (rules ...)"); + } + ray_t** re4 = (ray_t**)ray_data(fourth); + if (re4[0]->type != -RAY_SYM) { + ray_release(db); + return ray_error("type", "query: fourth argument must be (rules ...)"); + } + ray_t* rname = ray_sym_str(re4[0]->i64); + if (!rname || strcmp(ray_str_ptr(rname), "rules") != 0) { + ray_release(db); + return ray_error("type", "query: fourth argument must be (rules ...)"); + } + rules_clause = fourth; + } + + /* Build variable map for the query */ + dl_var_map_t vars; + memset(&vars, 0, sizeof(vars)); + + /* Pre-populate the variable map with find variables so they get + * the lowest indices (0, 1, 2, ...) -- makes projection trivial */ + for (int i = 0; i < n_find_vars; i++) + dl_var_get_or_create(&vars, find_var_syms[i]); + + /* Build synthetic query rule: __query(?find_vars...) :- body_clauses... */ + dl_rule_t qrule; + dl_rule_init(&qrule, "__query", n_find_vars); + for (int i = 0; i < n_find_vars; i++) + dl_rule_head_var(&qrule, i, i); + + /* Parse body clauses into the query rule */ + for (int64_t i = 1; i < where_len; i++) { + ray_t* err = dl_parse_body_clause(&qrule, where_elems[i], &vars, NULL); + if (err) { ray_release(db); return err; } + } + qrule.n_vars = vars.n; + + /* Create temporary program */ + dl_program_t* prog = dl_program_new(); + if (!prog) { ray_release(db); return ray_error("oom", "query: cannot create program"); } + + /* Register the EAV table as a 3-arity "eav" relation. + * The 'a' column is RAY_SYM with adaptive width -- the Datalog engine + * operates on I64 data only, so convert SYM columns to I64 first. */ + { + int64_t nrows_db = ray_table_nrows(db); + ray_t* eav_tbl = ray_table_new(3); + for (int c = 0; c < 3; c++) { + ray_t* col = ray_table_get_col_idx(db, c); + if (!col) continue; + if (col->type == RAY_SYM) { + /* Convert SYM -> I64: read sym IDs via ray_read_sym */ + ray_t* i64col = ray_vec_new(RAY_I64, nrows_db); + if (i64col && !RAY_IS_ERR(i64col)) { + i64col->len = nrows_db; + int64_t* d = (int64_t*)ray_data(i64col); + for (int64_t r = 0; r < nrows_db; r++) + d[r] = ray_read_sym(ray_data(col), r, col->type, col->attrs); + eav_tbl = ray_table_add_col(eav_tbl, ray_table_col_name(db, c), i64col); + ray_release(i64col); + } + } else { + eav_tbl = ray_table_add_col(eav_tbl, ray_table_col_name(db, c), col); + } + } + dl_add_edb(prog, "eav", eav_tbl, 3); + ray_release(eav_tbl); + } + + if (rules_clause) { + ray_t** re = (ray_t**)ray_data(rules_clause); + int64_t rlen = ray_len(rules_clause); + for (int64_t i = 1; i < rlen; i++) { + dl_rule_t irule; + ray_t* rerr = dl_parse_inline_rule(&irule, re[i], prog); + if (rerr) { + dl_program_free(prog); + ray_release(db); + return rerr; + } + if (dl_add_rule(prog, &irule) < 0) { + dl_program_free(prog); + ray_release(db); + return ray_error("domain", "query: too many rules"); + } + } + } else { + for (int i = 0; i < g_dl_n_rules; i++) + dl_add_rule(prog, &g_dl_rules[i]); + } + + /* Add the synthetic query rule */ + dl_add_rule(prog, &qrule); + + /* Auto-register env-bound EDB tables referenced from rule bodies. + * + * Rationale: the primary `db` argument becomes the `eav` EDB (above). + * User rules can also reference additional relations by name + * (e.g. `(facts_i64 ?e ?a ?v)`). Rather than force callers to pre-declare + * every EDB, scan the program's rule bodies for positive / negative atom + * predicates that are not yet known as a relation, look them up in the + * global ray env, and register them when they resolve to a RAY_TABLE of + * matching arity. SYM columns are converted to I64 (same treatment as + * the primary `eav` table). + * + * Aggregate sources are handled too (`DL_AGG` uses `agg_pred`). + * The built-in synthetic "__query" / "eav" names are skipped. */ + for (int ri = 0; ri < prog->n_rules; ri++) { + dl_rule_t* rr = &prog->rules[ri]; + for (int bi = 0; bi < rr->n_body; bi++) { + dl_body_t* bd = &rr->body[bi]; + const char* pred_name = NULL; + int pred_arity = 0; + + if (bd->type == DL_POS || bd->type == DL_NEG) { + pred_name = bd->pred; + pred_arity = bd->arity; + } else if (bd->type == DL_AGG) { + pred_name = bd->agg_pred; + pred_arity = bd->agg_arity; + } else { + continue; + } + + if (!pred_name || pred_name[0] == '\0') continue; + if (strcmp(pred_name, "eav") == 0) continue; + if (dl_find_rel(prog, pred_name) >= 0) continue; + + int64_t env_sym = ray_sym_intern(pred_name, strlen(pred_name)); + ray_t* env_val = ray_env_get(env_sym); + if (!env_val || env_val->type != RAY_TABLE) continue; + int64_t ncols = ray_table_ncols(env_val); + /* pred_arity == 0 is a "not yet known" sentinel used when the + * aggregate parser couldn't resolve the source predicate's arity + * at parse time (prog=NULL, surface syntax). Resolve it from the + * env-bound table's column count now. */ + if (pred_arity == 0) pred_arity = (int)ncols; + if (ncols != pred_arity) continue; + + int64_t nrows_env = ray_table_nrows(env_val); + ray_t* clean = ray_table_new(pred_arity); + if (!clean || RAY_IS_ERR(clean)) { + if (clean) ray_release(clean); + dl_program_free(prog); + ray_release(db); + return ray_error("memory", "query: failed to create env-backed EDB table"); + } + for (int c = 0; c < pred_arity; c++) { + ray_t* col = ray_table_get_col_idx(env_val, c); + ray_t* next_clean; + if (!col) { + /* Silently skipping would build `clean` with fewer than + * pred_arity columns yet still register it via dl_add_edb + * — the program would see a schema-inconsistent EDB. */ + ray_release(clean); + dl_program_free(prog); + ray_release(db); + return ray_error("schema", "query: env-backed EDB table missing expected column"); + } + if (col->type == RAY_SYM) { + ray_t* i64col = ray_vec_new(RAY_I64, nrows_env); + if (!i64col) { + ray_release(clean); + dl_program_free(prog); + ray_release(db); + return ray_error("memory", "query: failed to convert env-backed SYM column"); + } + if (RAY_IS_ERR(i64col)) { + ray_error_free(i64col); + ray_release(clean); + dl_program_free(prog); + ray_release(db); + return ray_error("memory", "query: failed to convert env-backed SYM column"); + } + i64col->len = nrows_env; + int64_t* d = (int64_t*)ray_data(i64col); + for (int64_t r = 0; r < nrows_env; r++) + d[r] = ray_read_sym(ray_data(col), r, col->type, col->attrs); + next_clean = ray_table_add_col(clean, ray_table_col_name(env_val, c), i64col); + ray_release(i64col); + } else { + next_clean = ray_table_add_col(clean, ray_table_col_name(env_val, c), col); + } + if (!next_clean) { + ray_release(clean); + dl_program_free(prog); + ray_release(db); + return ray_error("memory", "query: failed to build env-backed EDB table"); + } + if (RAY_IS_ERR(next_clean)) { + ray_error_free(next_clean); + ray_release(clean); + dl_program_free(prog); + ray_release(db); + return ray_error("memory", "query: failed to build env-backed EDB table"); + } + clean = next_clean; + } + if (dl_add_edb(prog, pred_name, clean, pred_arity) < 0) { + ray_release(clean); + dl_program_free(prog); + ray_release(db); + return ray_error("domain", "query: failed to register env-backed EDB table"); + } + ray_release(clean); + } + } + + /* Stratify and evaluate */ + if (dl_stratify(prog) != 0) { + dl_program_free(prog); + ray_release(db); + return ray_error("domain", "query: unstratifiable negation cycle"); + } + + if (dl_eval(prog) != 0) { + dl_program_free(prog); + ray_release(db); + return ray_error("domain", "query: evaluation failed"); + } + + /* Get the result */ + ray_t* raw = dl_query(prog, "__query"); + if (!raw || RAY_IS_ERR(raw)) { + dl_program_free(prog); + ray_release(db); + return raw ? raw : ray_error("domain", "query: no result"); + } + + /* Build result table with user-friendly column names (the ?variable names) */ + int64_t nrows = ray_table_nrows(raw); + int64_t ncols = ray_table_ncols(raw); + ray_t* result = ray_table_new(n_find_vars); + for (int i = 0; i < n_find_vars && i < (int)ncols; i++) { + ray_t* col = ray_table_get_col_idx(raw, i); + if (col) + result = ray_table_add_col(result, find_var_syms[i], col); + } + + /* Handle empty result: ensure schema is correct */ + if (nrows == 0 && n_find_vars > 0 && ray_table_ncols(result) == 0) { + ray_release(result); + result = ray_table_new(n_find_vars); + for (int i = 0; i < n_find_vars; i++) { + ray_t* ev = ray_vec_new(RAY_I64, 0); + if (!RAY_IS_ERR(ev)) { + result = ray_table_add_col(result, find_var_syms[i], ev); + ray_release(ev); + } + } + } + + dl_program_free(prog); + ray_release(db); + return result; +} + +/* ══════════════════════════════════════════ + * Programmatic Datalog API builtins + * ══════════════════════════════════════════ */ + +/* Opaque handle for dl_program_t stored in a ray_t atom. + * We store the pointer in the i64 field. */ +static ray_t* dl_wrap_program(dl_program_t* prog) { + ray_t* obj = ray_alloc(0); + if (!obj || RAY_IS_ERR(obj)) return ray_error("oom", NULL); + obj->type = -RAY_I64; + obj->i64 = (int64_t)(uintptr_t)prog; + return obj; +} + +static dl_program_t* dl_unwrap_program(ray_t* obj) { + if (!obj || obj->type != -RAY_I64) return NULL; + return (dl_program_t*)(uintptr_t)obj->i64; +} + +/* (dl-program) — create a new empty dl_program_t */ +ray_t* ray_dl_program_fn(ray_t** args, int64_t n) { + (void)args; + if (n != 0) return ray_error("arity", "dl-program takes no arguments"); + dl_program_t* prog = dl_program_new(); + if (!prog) return ray_error("oom", "dl-program: cannot allocate"); + return dl_wrap_program(prog); +} + +/* (dl-add-edb prog "name" table arity) — register EDB */ +ray_t* ray_dl_add_edb_fn(ray_t** args, int64_t n) { + if (n != 4) return ray_error("arity", "dl-add-edb expects: prog name table arity"); + dl_program_t* prog = dl_unwrap_program(args[0]); + if (!prog) return ray_error("type", "dl-add-edb: first arg must be a dl-program"); + + /* Name can be a symbol or string */ + const char* name = NULL; + ray_t* name_str = NULL; + if (args[1]->type == -RAY_SYM) { + name_str = ray_sym_str(args[1]->i64); + name = name_str ? ray_str_ptr(name_str) : NULL; + } + if (!name) return ray_error("type", "dl-add-edb: name must be a symbol"); + + if (args[2]->type != RAY_TABLE) + return ray_error("type", "dl-add-edb: third arg must be a table"); + if (args[3]->type != -RAY_I64) + return ray_error("type", "dl-add-edb: arity must be an integer"); + + int rc = dl_add_edb(prog, name, args[2], (int)args[3]->i64); + return (rc >= 0) ? ray_bool(true) : ray_error("domain", "dl-add-edb: failed"); +} + +/* (dl-stratify prog) — compute strata */ +ray_t* ray_dl_stratify_fn(ray_t* x) { + dl_program_t* prog = dl_unwrap_program(x); + if (!prog) return ray_error("type", "dl-stratify: arg must be a dl-program"); + int rc = dl_stratify(prog); + return (rc == 0) ? ray_bool(true) : ray_error("domain", "dl-stratify: unstratifiable"); +} + +/* (dl-eval prog) — evaluate to fixpoint */ +ray_t* ray_dl_eval_fn(ray_t* x) { + dl_program_t* prog = dl_unwrap_program(x); + if (!prog) return ray_error("type", "dl-eval: arg must be a dl-program"); + int rc = dl_eval(prog); + return (rc == 0) ? ray_bool(true) : ray_error("domain", "dl-eval: evaluation failed"); +} + +/* (dl-query prog "pred") — get result table */ +ray_t* ray_dl_query_fn(ray_t* prog_obj, ray_t* pred_obj) { + dl_program_t* prog = dl_unwrap_program(prog_obj); + if (!prog) return ray_error("type", "dl-query: first arg must be a dl-program"); + + const char* pred = NULL; + if (pred_obj->type == -RAY_SYM) { + ray_t* s = ray_sym_str(pred_obj->i64); + pred = s ? ray_str_ptr(s) : NULL; + } + if (!pred) return ray_error("type", "dl-query: pred must be a symbol"); + + ray_t* result = dl_query(prog, pred); + if (!result) return ray_error("domain", "dl-query: predicate not found"); + ray_retain(result); + return result; +} + +/* (dl-provenance prog "pred") — get provenance column */ +ray_t* ray_dl_provenance_fn(ray_t* prog_obj, ray_t* pred_obj) { + dl_program_t* prog = dl_unwrap_program(prog_obj); + if (!prog) return ray_error("type", "dl-provenance: first arg must be a dl-program"); + + const char* pred = NULL; + if (pred_obj->type == -RAY_SYM) { + ray_t* s = ray_sym_str(pred_obj->i64); + pred = s ? ray_str_ptr(s) : NULL; + } + if (!pred) return ray_error("type", "dl-provenance: pred must be a symbol"); + + ray_t* prov = dl_get_provenance(prog, pred); + if (!prov) return ray_error("domain", "dl-provenance: not available"); + ray_retain(prov); + return prov; +} + +/* Reset global Datalog rule storage (called from ray_lang_destroy) */ +void ray_dl_reset_rules(void) { + g_dl_n_rules = 0; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/datalog.h b/crates/rayforce-sys/vendor/rayforce/src/ops/datalog.h new file mode 100644 index 0000000..3141097 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/datalog.h @@ -0,0 +1,344 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +/* + * datalog.h — Datalog evaluation engine for Rayforce + * + * Compiles Datalog rules into ray_graph_t operation DAGs and evaluates + * them to fixpoint using Rayforce's vectorized columnar execution engine. + * Supports semi-naive evaluation, stratified negation, and multi-rule heads. + */ +#ifndef RAYFORCE_DATALOG_H +#define RAYFORCE_DATALOG_H + +#include "rayforce.h" +#include "ops/ops.h" +#include +#include + +/* ===== Body literal types ===== */ +#define DL_POS 0 /* positive atom: pred(X, Y, ...) */ +#define DL_NEG 1 /* negated atom: not pred(X, Y, ...) */ +#define DL_CMP 2 /* comparison: X < Y, X = c, etc. */ +#define DL_ASSIGN 3 /* assignment: X = expr */ +#define DL_BUILTIN 4 /* builtin predicate */ +#define DL_INTERVAL 5 /* interval bind: F @[S, E] */ +#define DL_AGG 6 /* aggregate: (count ?N pred), (sum ?S ?expr pred), ... */ + +/* ===== Comparison operators (for DL_CMP) ===== */ +#define DL_CMP_EQ 0 +#define DL_CMP_NE 1 +#define DL_CMP_LT 2 +#define DL_CMP_LE 3 +#define DL_CMP_GT 4 +#define DL_CMP_GE 5 + +/* ===== Aggregate operators (for DL_AGG) ===== */ +#define DL_AGG_COUNT 0 +#define DL_AGG_SUM 1 +#define DL_AGG_MIN 2 +#define DL_AGG_MAX 3 +#define DL_AGG_AVG 4 + +#define DL_AGG_MAX_KEYS 8 + +/* ===== Assignment operators (for DL_ASSIGN) ===== */ +#define DL_OP_EQ 0 /* simple assignment: X = expr */ + +/* ===== Builtin predicate IDs (for DL_BUILTIN) ===== */ +#define DL_BUILTIN_BEFORE 0 /* before(S, E, T): filter T < S */ +#define DL_BUILTIN_DURATION_SINCE 1 /* duration_since(T1, T2, D): D = T2 - T1 */ +#define DL_BUILTIN_ABS 2 /* abs(X, Y): Y = |X| */ + +/* ===== Expression AST for assignments ===== */ +typedef enum { + DL_EXPR_CONST, /* integer constant (back-compat) */ + DL_EXPR_CONST_F64, /* float constant */ + DL_EXPR_VAR, /* bound variable reference */ + DL_EXPR_BINOP, /* binary op: +, -, *, / */ +} dl_expr_kind_t; + +typedef struct dl_expr { + dl_expr_kind_t kind; + int64_t const_val; /* for DL_EXPR_CONST */ + double const_f64; /* for DL_EXPR_CONST_F64 */ + int var_idx; /* for DL_EXPR_VAR */ + int binop; /* for DL_EXPR_BINOP: OP_ADD, OP_SUB, etc. */ + struct dl_expr *left; /* for DL_EXPR_BINOP */ + struct dl_expr *right; /* for DL_EXPR_BINOP */ +} dl_expr_t; + +/* Variable index sentinel: constant value, not a variable */ +#define DL_CONST (-1) + +/* Maximum arity for any relation */ +#define DL_MAX_ARITY 16 + +/* Maximum number of body literals per rule */ +#define DL_MAX_BODY 16 + +/* Maximum number of rules in a program */ +#define DL_MAX_RULES 128 + +/* Maximum number of relations */ +#define DL_MAX_RELS 64 + +/* Maximum strata */ +#define DL_MAX_STRATA 16 + +/* Program flags */ +#define DL_FLAG_PROVENANCE (1 << 0) /* track which rule derived each tuple */ + +/* ===== Body literal ===== */ +typedef struct { + int type; /* DL_POS, DL_NEG, DL_CMP, DL_ASSIGN */ + char pred[64]; /* predicate name (for DL_POS/DL_NEG) */ + int arity; /* number of argument positions */ + int vars[DL_MAX_ARITY]; /* variable indices (DL_CONST for constants) */ + int64_t const_vals[DL_MAX_ARITY]; /* constant values (I64/SYM) */ + int cmp_op; /* comparison operator (for DL_CMP) */ + int cmp_lhs; /* left variable index (for DL_CMP) */ + int cmp_rhs; /* right variable index or DL_CONST */ + int64_t cmp_const; /* constant value if cmp_rhs == DL_CONST */ + int assign_var; /* target variable index (for DL_ASSIGN) */ + dl_expr_t *assign_expr; /* expression tree (for DL_ASSIGN) */ + int builtin_id; /* builtin ID (for DL_BUILTIN) */ + dl_expr_t *cmp_lhs_expr; /* expression tree for LHS (for DL_CMP with expressions) */ + dl_expr_t *cmp_rhs_expr; /* expression tree for RHS (for DL_CMP with expressions) */ + int interval_fact_var; /* fact variable index (for DL_INTERVAL) */ + int interval_start_var; /* start variable index (for DL_INTERVAL) */ + int interval_end_var; /* end variable index (for DL_INTERVAL) */ + int agg_op; /* aggregate operator (for DL_AGG) */ + int agg_target_var; /* variable that receives the aggregate result */ + char agg_pred[64]; /* predicate name being aggregated over */ + int agg_arity; /* arity of agg_pred */ + int agg_value_col; /* column index inside agg_pred to aggregate (sum/min/max/avg) */ + int agg_n_group_keys; /* 0 = scalar; >0 = grouped */ + int agg_group_key_vars[DL_AGG_MAX_KEYS]; + int agg_group_key_cols[DL_AGG_MAX_KEYS]; +} dl_body_t; + +/* ===== Datalog rule: head :- body ===== */ +typedef struct { + char head_pred[64]; /* head predicate name */ + int head_arity; + int head_vars[DL_MAX_ARITY]; /* variable indices in head */ + int64_t head_consts[DL_MAX_ARITY]; /* constants (when head_vars[i] == DL_CONST) */ + int8_t head_const_types[DL_MAX_ARITY]; /* ray type tag per head slot: + * RAY_I64 / RAY_SYM / RAY_F64 when head_vars[i] == DL_CONST, + * 0 when head_vars[i] is a variable. */ + int n_body; /* number of body literals */ + dl_body_t body[DL_MAX_BODY]; + int n_vars; /* total distinct variable count in rule */ + int stratum; /* assigned stratum (-1 if not yet stratified) */ +} dl_rule_t; + +/* ===== Datalog relation ===== */ +typedef struct { + char name[64]; /* relation name */ + ray_t* table; /* backing columnar table */ + int arity; /* number of columns */ + bool is_idb; /* true = derived (intensional) */ + int64_t col_names[DL_MAX_ARITY]; /* interned column name symbols */ + ray_t* prov_col; /* provenance column (when DL_FLAG_PROVENANCE) */ + ray_t* prov_src_offsets; /* CSR offsets into prov_src_data, length nrows+1 */ + ray_t* prov_src_data; /* packed source refs: (rel_idx << 32) | row_idx */ +} dl_rel_t; + +/* ===== Datalog program ===== */ +typedef struct { + dl_rel_t rels[DL_MAX_RELS]; + int n_rels; + dl_rule_t rules[DL_MAX_RULES]; + int n_rules; + int strata[DL_MAX_STRATA][DL_MAX_RELS]; /* predicate indices per stratum */ + int strata_sizes[DL_MAX_STRATA]; /* number of predicates per stratum */ + int n_strata; + uint32_t flags; /* DL_FLAG_* bitmask */ + bool eval_err; /* set by compile/eval on + unrecoverable failure + (distinct from "rule + produced no rows"); read + by dl_eval to return -1 */ +} dl_program_t; + +/* ===== Public API ===== */ + +/* Create a new empty Datalog program */ +dl_program_t* dl_program_new(void); + +/* Free a Datalog program and release all owned tables */ +void dl_program_free(dl_program_t* prog); + +/** Append rules registered via the Rayfall (rule ...) special form into a program. */ +void dl_append_global_rules(dl_program_t* prog); + +/* Register an EDB (extensional) relation backed by an existing table. + * Column names are auto-generated as "c0", "c1", ... unless the table + * already has named columns. */ +int dl_add_edb(dl_program_t* prog, const char* name, ray_t* table, int arity); + +/* Add a rule to the program. The rule struct is copied. */ +int dl_add_rule(dl_program_t* prog, const dl_rule_t* rule); + +/* Compute stratification (topological sort of negation dependency graph). + * Returns 0 on success, -1 if program has unstratifiable negation cycle. */ +int dl_stratify(dl_program_t* prog); + +/* Evaluate the program to fixpoint using semi-naive evaluation. + * Returns 0 on success, -1 on error. */ +int dl_eval(dl_program_t* prog); + +/* Query the result of a derived relation after evaluation. + * Returns the backing table (caller does NOT own it). */ +ray_t* dl_query(dl_program_t* prog, const char* pred_name); + +/* Retrieve the provenance column from a derived relation. + * Only valid when DL_FLAG_PROVENANCE is set. Returns the I64 column + * of rule indices, or NULL if provenance not enabled/available. */ +ray_t* dl_get_provenance(dl_program_t* prog, const char* pred_name); + +/* Retrieve deep provenance source offsets for a derived relation. + * Returns an I64 vector of length nrows+1 in CSR format: offsets[i] is the + * start index in the source-data vector for derived row i. + * Only valid when DL_FLAG_PROVENANCE is set. Returns NULL if unavailable. */ +ray_t* dl_get_provenance_src_offsets(dl_program_t* prog, const char* pred_name); + +/* Retrieve deep provenance source data for a derived relation. + * Returns a flat I64 vector of packed source references. Each entry encodes + * (relation_index << 32) | row_index, identifying which EDB or IDB relation + * and row contributed to deriving a given output tuple. Row indices are + * truncated to 32 bits (max ~4 billion rows per relation). + * + * For rules with body-only variables (variables appearing in body atoms but + * not in the head), source entries include all body rows consistent with + * head-visible bindings. Cross-body join constraints are not re-enforced + * during source lookup, so entries may be a superset of the true derivation. + * + * Only valid when DL_FLAG_PROVENANCE is set. Returns NULL if unavailable. */ +ray_t* dl_get_provenance_src_data(dl_program_t* prog, const char* pred_name); + +/* ===== Rule builder helpers ===== */ + +/* Initialize a rule with the given head predicate and arity */ +void dl_rule_init(dl_rule_t* rule, const char* head_pred, int head_arity); + +/* Set a head argument to a variable */ +void dl_rule_head_var(dl_rule_t* rule, int pos, int var_idx); + +/* Set a head argument to an I64 constant — backward-compatible + * signature. Equivalent to dl_rule_head_const_typed(rule, pos, val, + * RAY_I64). Prefer the typed variant for new code. */ +void dl_rule_head_const(dl_rule_t* rule, int pos, int64_t val); + +/* Set a head argument to a typed constant. + * type must be RAY_I64, RAY_SYM, or RAY_F64. + * For RAY_F64 callers should pass a double reinterpreted via memcpy/union + * into val's int64 slot; dl_rule_head_const_f64 is the safe wrapper. */ +void dl_rule_head_const_typed(dl_rule_t* rule, int pos, int64_t val, int8_t type); + +/* Convenience wrapper: set a head argument to a RAY_F64 constant. */ +void dl_rule_head_const_f64(dl_rule_t* rule, int pos, double val); + +/* Add a positive body atom. Returns body literal index. */ +int dl_rule_add_atom(dl_rule_t* rule, const char* pred, int arity); + +/* Set a body atom argument to a variable */ +void dl_body_set_var(dl_rule_t* rule, int body_idx, int pos, int var_idx); + +/* Set a body atom argument to a constant */ +void dl_body_set_const(dl_rule_t* rule, int body_idx, int pos, int64_t val); + +/* Add a negated body atom. Returns body literal index. */ +int dl_rule_add_neg(dl_rule_t* rule, const char* pred, int arity); + +/* Add a comparison. Returns body literal index. */ +int dl_rule_add_cmp(dl_rule_t* rule, int cmp_op, int lhs_var, int rhs_var); + +/* Add a comparison with a constant RHS. Returns body literal index. */ +int dl_rule_add_cmp_const(dl_rule_t* rule, int cmp_op, int lhs_var, int64_t rhs_val); + +/* Add an assignment: target_var = expr. Returns body literal index. */ +int dl_rule_add_assign(dl_rule_t* rule, int target_var, int op, dl_expr_t* expr); + +/* Add a builtin predicate. Returns body literal index. + * Arguments are set via dl_body_set_var (same as atoms). */ +int dl_rule_add_builtin(dl_rule_t* rule, int builtin_id, int arity); + +/* Add a comparison with expression trees on both sides. + * E.g., "X + Y < Z * 2" -> cmp_op=DL_CMP_LT, lhs=binop(+,X,Y), rhs=binop(*,Z,2). + * Returns body literal index. */ +int dl_rule_add_cmp_expr(dl_rule_t* rule, int cmp_op, dl_expr_t* lhs, dl_expr_t* rhs); + +/* Add an interval bind: decompose two consecutive columns at the fact variable's + * position into start_var and end_var. Returns body literal index. */ +int dl_rule_add_interval(dl_rule_t* rule, int fact_var, int start_var, int end_var); + +/* pred_arity is advisory; evaluator re-resolves against program EDB/IDB at compile time. */ +/* Add an aggregate body literal: (op ?target pred col) + * - op: DL_AGG_COUNT (col is ignored), DL_AGG_SUM/MIN/MAX/AVG + * - target_var: variable that receives the aggregate result + * - pred: predicate to aggregate over + * - pred_arity: arity of that predicate + * - value_col: which column to aggregate (ignored for COUNT) + * Returns body literal index. */ +int dl_rule_add_agg(dl_rule_t* rule, int op, int target_var, + const char* pred, int pred_arity, int value_col); + +/* Attach group-by keys to an aggregate body literal previously added via + * dl_rule_add_agg. body_idx is that builder's return value. + * key_vars and key_cols have n_keys entries (<= DL_AGG_MAX_KEYS). + * Returns 0 on success, -1 if n_keys is out of range. */ +int dl_rule_agg_set_group(dl_rule_t* rule, int body_idx, + const int* key_vars, const int* key_cols, int n_keys); + +/* ===== Expression tree builders ===== */ + +/* Create a constant expression */ +dl_expr_t* dl_expr_const(int64_t val); + +/* Create a float constant expression */ +dl_expr_t* dl_expr_const_f64(double val); + +/* Create a variable reference expression */ +dl_expr_t* dl_expr_var(int var_idx); + +/* Create a binary operation expression (OP_ADD, OP_SUB, OP_MUL, OP_DIV) */ +dl_expr_t* dl_expr_binop(int op, dl_expr_t* left, dl_expr_t* right); + +/* ===== Internal (used by compiler) ===== */ + +/* Find relation by name. Returns index or -1. */ +int dl_find_rel(dl_program_t* prog, const char* name); + +/* Ensure an IDB relation exists for the given head predicate. + * Creates it with the correct arity if it doesn't exist yet. */ +int dl_ensure_idb(dl_program_t* prog, const char* name, int arity); + +/* Compile one rule into a ray_graph_t for one fixpoint iteration. + * delta_pos: which body atom uses the delta relation (-1 for initial pass). + * rule_idx: index of this rule in prog->rules (used for provenance). + * Returns the output node in g that produces new head tuples. */ +ray_op_t* dl_compile_rule(dl_program_t* prog, dl_rule_t* rule, + int delta_pos, int rule_idx, ray_graph_t* g); + +#endif /* RAYFORCE_DATALOG_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/dump.c b/crates/rayforce-sys/vendor/rayforce/src/ops/dump.c new file mode 100644 index 0000000..3f849e8 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/dump.c @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ops.h" +#include + +/* Duplicate of find_ext() from opt.c — kept local for self-containment. */ +static ray_op_ext_t* find_ext(ray_graph_t* g, uint32_t node_id) { + for (uint32_t i = 0; i < g->ext_count; i++) { + if (g->ext_nodes[i] && g->ext_nodes[i]->base.id == node_id) + return g->ext_nodes[i]; + } + return NULL; +} + +const char* ray_opcode_name(uint16_t op) { + switch (op) { + case OP_SCAN: return "SCAN"; + case OP_CONST: return "CONST"; + case OP_NEG: return "NEG"; + case OP_ABS: return "ABS"; + case OP_NOT: return "NOT"; + case OP_SQRT: return "SQRT"; + case OP_LOG: return "LOG"; + case OP_EXP: return "EXP"; + case OP_CEIL: return "CEIL"; + case OP_FLOOR: return "FLOOR"; + case OP_ISNULL: return "ISNULL"; + case OP_CAST: return "CAST"; + case OP_ADD: return "ADD"; + case OP_SUB: return "SUB"; + case OP_MUL: return "MUL"; + case OP_DIV: return "DIV"; + case OP_MOD: return "MOD"; + case OP_EQ: return "EQ"; + case OP_NE: return "NE"; + case OP_LT: return "LT"; + case OP_LE: return "LE"; + case OP_GT: return "GT"; + case OP_GE: return "GE"; + case OP_AND: return "AND"; + case OP_OR: return "OR"; + case OP_MIN2: return "MIN2"; + case OP_MAX2: return "MAX2"; + case OP_IF: return "IF"; + case OP_LIKE: return "LIKE"; + case OP_ILIKE: return "ILIKE"; + case OP_UPPER: return "UPPER"; + case OP_LOWER: return "LOWER"; + case OP_STRLEN: return "STRLEN"; + case OP_SUBSTR: return "SUBSTR"; + case OP_REPLACE: return "REPLACE"; + case OP_TRIM: return "TRIM"; + case OP_CONCAT: return "CONCAT"; + case OP_EXTRACT: return "EXTRACT"; + case OP_DATE_TRUNC: return "DATE_TRUNC"; + case OP_SUM: return "SUM"; + case OP_PROD: return "PROD"; + case OP_MIN: return "MIN"; + case OP_MAX: return "MAX"; + case OP_COUNT: return "COUNT"; + case OP_AVG: return "AVG"; + case OP_FIRST: return "FIRST"; + case OP_LAST: return "LAST"; + case OP_COUNT_DISTINCT:return "COUNT_DISTINCT"; + case OP_STDDEV: return "STDDEV"; + case OP_STDDEV_POP: return "STDDEV_POP"; + case OP_VAR: return "VAR"; + case OP_VAR_POP: return "VAR_POP"; + case OP_FILTER: return "FILTER"; + case OP_SORT: return "SORT"; + case OP_GROUP: return "GROUP"; + case OP_PIVOT: return "PIVOT"; + case OP_ANTIJOIN: return "ANTIJOIN"; + case OP_JOIN: return "JOIN"; + case OP_WINDOW_JOIN: return "WINDOW_JOIN"; + case OP_SELECT: return "SELECT"; + case OP_HEAD: return "HEAD"; + case OP_TAIL: return "TAIL"; + case OP_WINDOW: return "WINDOW"; + case OP_ALIAS: return "ALIAS"; + case OP_MATERIALIZE: return "MATERIALIZE"; + case OP_EXPAND: return "EXPAND"; + case OP_VAR_EXPAND: return "VAR_EXPAND"; + case OP_SHORTEST_PATH: return "SHORTEST_PATH"; + case OP_WCO_JOIN: return "WCO_JOIN"; + case OP_PAGERANK: return "PAGERANK"; + case OP_CONNECTED_COMP: return "CONNECTED_COMP"; + case OP_DIJKSTRA: return "DIJKSTRA"; + case OP_LOUVAIN: return "LOUVAIN"; + case OP_DEGREE_CENT: return "DEGREE_CENT"; + case OP_TOPSORT: return "TOPSORT"; + case OP_DFS: return "DFS"; + case OP_ASTAR: return "ASTAR"; + case OP_K_SHORTEST: return "K_SHORTEST"; + case OP_CLUSTER_COEFF: return "CLUSTER_COEFF"; + case OP_RANDOM_WALK: return "RANDOM_WALK"; + case OP_COSINE_SIM: return "COSINE_SIM"; + case OP_EUCLIDEAN_DIST:return "EUCLIDEAN_DIST"; + case OP_KNN: return "KNN"; + case OP_HNSW_KNN: return "HNSW_KNN"; + case OP_ANN_RERANK: return "ANN_RERANK"; + case OP_KNN_RERANK: return "KNN_RERANK"; + default: return "UNKNOWN"; + } +} + +static const char* type_name(int8_t t) { + switch (t) { + case RAY_LIST: return "LIST"; + case RAY_BOOL: return "BOOL"; + case RAY_U8: return "U8"; + case RAY_I16: return "I16"; + case RAY_I32: return "I32"; + case RAY_I64: return "I64"; + case RAY_F64: return "F64"; + case RAY_DATE: return "DATE"; + case RAY_TIME: return "TIME"; + case RAY_TIMESTAMP: return "TIMESTAMP"; + case RAY_TABLE: return "TABLE"; + case RAY_SEL: return "SEL"; + case RAY_SYM: return "SYM"; + default: return "?"; + } +} + +static void dump_node(FILE* f, ray_graph_t* g, ray_op_t* node, int depth) { + if (!node) return; + + /* Indentation */ + for (int i = 0; i < depth; i++) + fprintf(f, " "); + + /* Opcode name */ + fprintf(f, "%s", ray_opcode_name(node->opcode)); + + /* Find extended node for annotations */ + ray_op_ext_t* ext = find_ext(g, node->id); + + /* Annotations by opcode */ + switch (node->opcode) { + case OP_SCAN: + if (ext) { + ray_t* s = ray_sym_str(ext->sym); + if (s) + fprintf(f, "(%.*s)", (int)s->len, (char*)ray_data(s)); + } + break; + case OP_CONST: + if (ext && ext->literal) { + ray_t* lit = ext->literal; + switch (lit->type) { + case RAY_I64: fprintf(f, "(%lld)", (long long)lit->i64); break; + case RAY_F64: fprintf(f, "(%.6g)", lit->f64); break; + case RAY_BOOL: fprintf(f, "(%s)", lit->i64 ? "true" : "false"); break; + case RAY_TABLE:fprintf(f, "(table)"); break; + default: fprintf(f, "(?)"); break; + } + } + break; + case OP_JOIN: + if (ext) { + const char* jt = "INNER"; + if (ext->join.join_type == 1) jt = "LEFT"; + else if (ext->join.join_type == 2) jt = "FULL"; + fprintf(f, "(%s, keys=%u)", jt, ext->join.n_join_keys); + } + break; + case OP_GROUP: + if (ext) + fprintf(f, "(keys=%u, aggs=%u)", ext->n_keys, ext->n_aggs); + break; + case OP_HEAD: + case OP_TAIL: + if (ext) + fprintf(f, "(N=%lld)", (long long)ext->sym); + break; + default: + break; + } + + /* Output type */ + fprintf(f, " -> %s", type_name(node->out_type)); + + /* Flags */ + if (node->flags & OP_FLAG_FUSED) + fprintf(f, " [fused]"); + + /* Estimated rows */ + if (node->est_rows > 0) + fprintf(f, " ~%u rows", node->est_rows); + + /* Node ID */ + fprintf(f, " #%u", node->id); + + fprintf(f, "\n"); + + /* Recurse into children */ + switch (node->opcode) { + case OP_GROUP: + if (ext) { + /* keys */ + for (uint8_t i = 0; i < ext->n_keys; i++) + dump_node(f, g, ext->keys[i], depth + 1); + /* agg inputs */ + for (uint8_t i = 0; i < ext->n_aggs; i++) + dump_node(f, g, ext->agg_ins[i], depth + 1); + } + /* Also recurse into standard inputs */ + for (uint8_t i = 0; i < node->arity && i < 2; i++) + dump_node(f, g, node->inputs[i], depth + 1); + break; + case OP_SORT: + case OP_SELECT: + if (ext) { + for (uint8_t i = 0; i < ext->sort.n_cols; i++) + dump_node(f, g, ext->sort.columns[i], depth + 1); + } + for (uint8_t i = 0; i < node->arity && i < 2; i++) + dump_node(f, g, node->inputs[i], depth + 1); + break; + default: + for (uint8_t i = 0; i < node->arity && i < 2; i++) + dump_node(f, g, node->inputs[i], depth + 1); + break; + } +} + +void ray_graph_dump(ray_graph_t* g, ray_op_t* root, void* out) { + FILE* f = out ? (FILE*)out : stderr; + fprintf(f, "=== Query Plan ===\n"); + dump_node(f, g, root, 0); + fprintf(f, "==================\n"); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/embedding.c b/crates/rayforce-sys/vendor/rayforce/src/ops/embedding.c new file mode 100644 index 0000000..4a4fc05 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/embedding.c @@ -0,0 +1,870 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ops/internal.h" +#include "lang/internal.h" +#include "mem/sys.h" + +/* -------------------------------------------------------------------------- + * exec_cosine_sim: cosine similarity between embedding column and query vector. + * dot(a,b) / (||a|| * ||b||) per row. + * Input: RAY_F32 embedding column (flat N*D floats) + * Output: RAY_F64 vector of similarities (one per row) + * -------------------------------------------------------------------------- */ +ray_t* exec_cosine_sim(ray_graph_t* g, ray_op_t* op, ray_t* emb_vec) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + const float* query = ext->vector.query_vec; + int32_t dim = ext->vector.dim; + + if (!query || dim <= 0) return ray_error("schema", NULL); + if (emb_vec->type != RAY_F32) return ray_error("type", NULL); + + int64_t total = emb_vec->len; + int64_t nrows = total / dim; + if (nrows * dim != total) return ray_error("length", NULL); + + const float* data = (const float*)ray_data(emb_vec); + + /* Precompute query norm */ + double q_norm_sq = 0.0; + for (int32_t j = 0; j < dim; j++) { + q_norm_sq += (double)query[j] * (double)query[j]; + } + double q_norm = sqrt(q_norm_sq); + + /* Compute per-row similarity */ + ray_t* result = ray_vec_new(RAY_F64, nrows); + if (!result || RAY_IS_ERR(result)) return ray_error("oom", NULL); + result->len = nrows; + double* out = (double*)ray_data(result); + + for (int64_t i = 0; i < nrows; i++) { + const float* row = data + i * dim; + double dot = 0.0; + double r_norm_sq = 0.0; + for (int32_t j = 0; j < dim; j++) { + dot += (double)row[j] * (double)query[j]; + r_norm_sq += (double)row[j] * (double)row[j]; + } + double r_norm = sqrt(r_norm_sq); + double denom = q_norm * r_norm; + out[i] = (denom > 0.0) ? dot / denom : 0.0; + } + + return result; +} + +/* -------------------------------------------------------------------------- + * exec_euclidean_dist: euclidean distance between embedding column and query. + * sqrt(sum((a_i - b_i)^2)) per row. + * -------------------------------------------------------------------------- */ +ray_t* exec_euclidean_dist(ray_graph_t* g, ray_op_t* op, ray_t* emb_vec) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + const float* query = ext->vector.query_vec; + int32_t dim = ext->vector.dim; + + if (!query || dim <= 0) return ray_error("schema", NULL); + if (emb_vec->type != RAY_F32) return ray_error("type", NULL); + + int64_t total = emb_vec->len; + int64_t nrows = total / dim; + if (nrows * dim != total) return ray_error("length", NULL); + + const float* data = (const float*)ray_data(emb_vec); + + ray_t* result = ray_vec_new(RAY_F64, nrows); + if (!result || RAY_IS_ERR(result)) return ray_error("oom", NULL); + result->len = nrows; + double* out = (double*)ray_data(result); + + for (int64_t i = 0; i < nrows; i++) { + const float* row = data + i * dim; + double sum_sq = 0.0; + for (int32_t j = 0; j < dim; j++) { + double d = (double)row[j] - (double)query[j]; + sum_sq += d * d; + } + out[i] = sqrt(sum_sq); + } + + return result; +} + +/* -------------------------------------------------------------------------- + * exec_knn: brute-force top-K nearest neighbors over a flat RAY_F32 column. + * + * Dispatches on ext->vector.metric (default COSINE — 0-initialized struct). + * Returns RAY_TABLE with _rowid (I64) and _dist (F64), sorted ascending so + * lower = closer across all metrics. + * + * Distance encoding: + * COSINE → 1 - cosine_similarity + * L2 → sqrt(Σ (a - b)^2) + * IP → -dot(a, b) + * -------------------------------------------------------------------------- */ + +/* Max-heap entry keyed on distance (root = farthest of top-K kept). */ +typedef struct { + double dist; + int64_t rowid; +} knn_entry_t; + +static void knn_heap_insert(knn_entry_t* heap, int64_t k, int64_t* size, + double dist, int64_t rowid) { + if (*size < k) { + int64_t i = (*size)++; + heap[i].dist = dist; + heap[i].rowid = rowid; + /* Sift up (max-heap: root = largest distance = worst kept) */ + while (i > 0) { + int64_t parent = (i - 1) / 2; + if (heap[parent].dist >= heap[i].dist) break; + knn_entry_t tmp = heap[parent]; heap[parent] = heap[i]; heap[i] = tmp; + i = parent; + } + } else if (dist < heap[0].dist) { + heap[0].dist = dist; + heap[0].rowid = rowid; + int64_t i = 0; + while (1) { + int64_t left = 2*i+1, right = 2*i+2, best = i; + if (left < *size && heap[left].dist > heap[best].dist) best = left; + if (right < *size && heap[right].dist > heap[best].dist) best = right; + if (best == i) break; + knn_entry_t tmp = heap[i]; heap[i] = heap[best]; heap[best] = tmp; + i = best; + } + } +} + +static double knn_row_dist(int32_t metric, + const float* row, const float* query, + double q_norm, int32_t dim) { + if (metric == RAY_HNSW_L2) { + double s = 0.0; + for (int32_t j = 0; j < dim; j++) { + double d = (double)row[j] - (double)query[j]; + s += d * d; + } + return sqrt(s); + } + double dot = 0.0, r_norm_sq = 0.0; + for (int32_t j = 0; j < dim; j++) { + dot += (double)row[j] * (double)query[j]; + if (metric == RAY_HNSW_COSINE) r_norm_sq += (double)row[j] * (double)row[j]; + } + if (metric == RAY_HNSW_IP) return -dot; + /* COSINE */ + double denom = q_norm * sqrt(r_norm_sq); + return (denom > 0.0) ? 1.0 - (dot / denom) : 1.0; +} + +ray_t* exec_knn(ray_graph_t* g, ray_op_t* op, ray_t* emb_vec) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + const float* query = ext->vector.query_vec; + int32_t dim = ext->vector.dim; + int64_t k = ext->vector.k; + int32_t metric = ext->vector.metric; + if (metric < RAY_HNSW_COSINE || metric > RAY_HNSW_IP) metric = RAY_HNSW_COSINE; + + if (!query || dim <= 0 || k <= 0) return ray_error("schema", NULL); + if (emb_vec->type != RAY_F32) return ray_error("type", NULL); + + int64_t total = emb_vec->len; + int64_t nrows = total / dim; + if (nrows * dim != total) return ray_error("length", NULL); + if (k > nrows) k = nrows; + + const float* data = (const float*)ray_data(emb_vec); + + /* Precompute query norm once (only used by cosine). */ + double q_norm = 0.0; + if (metric == RAY_HNSW_COSINE) { + double q_norm_sq = 0.0; + for (int32_t j = 0; j < dim; j++) + q_norm_sq += (double)query[j] * (double)query[j]; + q_norm = sqrt(q_norm_sq); + } + + ray_t* heap_hdr = NULL; + knn_entry_t* heap = (knn_entry_t*)scratch_alloc(&heap_hdr, (size_t)k * sizeof(knn_entry_t)); + if (!heap) return ray_error("oom", NULL); + int64_t heap_size = 0; + + for (int64_t i = 0; i < nrows; i++) { + double d = knn_row_dist(metric, data + i * dim, query, q_norm, dim); + knn_heap_insert(heap, k, &heap_size, d, i); + } + + /* Insertion sort ascending by distance (k is small). */ + for (int64_t i = 1; i < heap_size; i++) { + knn_entry_t key = heap[i]; + int64_t j = i - 1; + while (j >= 0 && heap[j].dist > key.dist) { + heap[j + 1] = heap[j]; + j--; + } + heap[j + 1] = key; + } + + ray_t* rowid_vec = ray_vec_new(RAY_I64, heap_size); + ray_t* dist_vec = ray_vec_new(RAY_F64, heap_size); + if (!rowid_vec || RAY_IS_ERR(rowid_vec) || !dist_vec || RAY_IS_ERR(dist_vec)) { + scratch_free(heap_hdr); + if (rowid_vec && !RAY_IS_ERR(rowid_vec)) ray_release(rowid_vec); + if (dist_vec && !RAY_IS_ERR(dist_vec)) ray_release(dist_vec); + return ray_error("oom", NULL); + } + + int64_t* rdata = (int64_t*)ray_data(rowid_vec); + double* ddata = (double*)ray_data(dist_vec); + for (int64_t i = 0; i < heap_size; i++) { + rdata[i] = heap[i].rowid; + ddata[i] = heap[i].dist; + } + rowid_vec->len = heap_size; + dist_vec->len = heap_size; + scratch_free(heap_hdr); + + ray_t* result = ray_table_new(2); + if (!result || RAY_IS_ERR(result)) { + ray_release(rowid_vec); + ray_release(dist_vec); + return ray_error("oom", NULL); + } + result = ray_table_add_col(result, sym_intern_safe("_rowid", 6), rowid_vec); + ray_release(rowid_vec); + result = ray_table_add_col(result, sym_intern_safe("_dist", 5), dist_vec); + ray_release(dist_vec); + return result; +} + +ray_t* exec_hnsw_knn(ray_graph_t* g, ray_op_t* op) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + ray_hnsw_t* idx = (ray_hnsw_t*)ext->hnsw.hnsw_idx; + const float* query = ext->hnsw.query_vec; + int32_t dim = ext->hnsw.dim; + int64_t k = ext->hnsw.k; + int32_t ef = ext->hnsw.ef_search; + + if (!idx || !query || dim <= 0 || k <= 0) return ray_error("schema", NULL); + + /* Pre-allocate output arrays */ + ray_t* ids_hdr = NULL; + int64_t* out_ids = (int64_t*)scratch_alloc(&ids_hdr, (size_t)k * sizeof(int64_t)); + if (!out_ids) return ray_error("oom", NULL); + + ray_t* dists_hdr = NULL; + double* out_dists = (double*)scratch_alloc(&dists_hdr, (size_t)k * sizeof(double)); + if (!out_dists) { scratch_free(ids_hdr); return ray_error("oom", NULL); } + + int64_t n_found = ray_hnsw_search(idx, query, dim, k, ef, out_ids, out_dists); + if (n_found < 0) { + scratch_free(ids_hdr); + scratch_free(dists_hdr); + return ray_error("oom", NULL); + } + + /* Build output table: _rowid (I64), _dist (F64). ray_hnsw_search writes + * metric-native distances (lower = closer across COSINE / L2 / IP), so we + * pass them through unchanged. */ + ray_t* rowid_vec = ray_vec_new(RAY_I64, n_found); + ray_t* dist_vec = ray_vec_new(RAY_F64, n_found); + if (!rowid_vec || RAY_IS_ERR(rowid_vec) || !dist_vec || RAY_IS_ERR(dist_vec)) { + scratch_free(ids_hdr); + scratch_free(dists_hdr); + if (rowid_vec && !RAY_IS_ERR(rowid_vec)) ray_release(rowid_vec); + if (dist_vec && !RAY_IS_ERR(dist_vec)) ray_release(dist_vec); + return ray_error("oom", NULL); + } + + int64_t* rdata = (int64_t*)ray_data(rowid_vec); + double* ddata = (double*)ray_data(dist_vec); + for (int64_t i = 0; i < n_found; i++) { + rdata[i] = out_ids[i]; + ddata[i] = out_dists[i]; + } + rowid_vec->len = n_found; + dist_vec->len = n_found; + + scratch_free(ids_hdr); + scratch_free(dists_hdr); + + ray_t* result = ray_table_new(2); + if (!result || RAY_IS_ERR(result)) { + ray_release(rowid_vec); + ray_release(dist_vec); + return ray_error("oom", NULL); + } + result = ray_table_add_col(result, sym_intern_safe("_rowid", 6), rowid_vec); + ray_release(rowid_vec); + result = ray_table_add_col(result, sym_intern_safe("_dist", 5), dist_vec); + ray_release(dist_vec); + + return result; +} + +/* ========================================================================== + * Rayfall builtins — direct metrics, exact KNN, HNSW lifecycle/query + * + * Column shape for all builtins accepting a "column" argument: + * RAY_LIST whose entries are numeric vectors (RAY_F32 preferred, + * RAY_F64/RAY_I32/RAY_I64 coerced to double). All entries must have + * the same length == D. + * + * Output of knn / ann: table {_rowid: I64, _dist: F64} sorted ascending. + * ========================================================================== */ + +static bool rayvec_is_numeric(ray_t* v) { + if (!v || !ray_is_vec(v)) return false; + return v->type == RAY_F32 || v->type == RAY_F64 + || v->type == RAY_I32 || v->type == RAY_I64; +} + +static double rayvec_at_f64(ray_t* v, int64_t i) { + void* d = ray_data(v); + switch (v->type) { + case RAY_F32: return (double)((float*)d)[i]; + case RAY_F64: return ((double*)d)[i]; + case RAY_I32: return (double)((int32_t*)d)[i]; + case RAY_I64: return (double)((int64_t*)d)[i]; + default: return 0.0; + } +} + +/* Copy a numeric vector into a float buffer. Assumes v->len == dim. */ +static void rayvec_to_floats(ray_t* v, float* dst, int32_t dim) { + if (v->type == RAY_F32) { + memcpy(dst, ray_data(v), (size_t)dim * sizeof(float)); + return; + } + for (int32_t i = 0; i < dim; i++) dst[i] = (float)rayvec_at_f64(v, i); +} + +/* Validate list of numeric vectors, set *out_dim to the common length. + * Returns 0 on success, non-zero on error. */ +static int list_vec_validate(ray_t* list, int32_t* out_dim) { + if (!list || list->type != RAY_LIST) return 1; + if (list->len <= 0) { *out_dim = 0; return 0; } + ray_t* first = ray_list_get(list, 0); + if (!rayvec_is_numeric(first) || first->len <= 0) return 2; + int32_t dim = (int32_t)first->len; + for (int64_t i = 1; i < list->len; i++) { + ray_t* e = ray_list_get(list, i); + if (!rayvec_is_numeric(e) || e->len != dim) return 3; + } + *out_dim = dim; + return 0; +} + +/* Flatten LIST of numeric vectors into a new float[] buffer. + * Caller frees with ray_sys_free. */ +static float* list_flatten_floats(ray_t* list, int32_t dim, int64_t* out_n) { + int64_t n = list->len; + *out_n = n; + if (n == 0) return NULL; + float* buf = (float*)ray_sys_alloc((size_t)n * (size_t)dim * sizeof(float)); + if (!buf) return NULL; + for (int64_t i = 0; i < n; i++) { + ray_t* e = ray_list_get(list, i); + rayvec_to_floats(e, buf + i * dim, dim); + } + return buf; +} + +/* Metric kinds: + * COS_DIST → 1 - cos(a, b) (lower = closer, range [0, 2]) + * INNER_PROD → raw dot(a, b) (sign varies — not a distance) + * L2_DIST → sqrt(Σ (a - b)^2) (lower = closer) + * These are the values returned by cos-dist / inner-prod / l2-dist builtins. */ +typedef enum { MET_COS_DIST, MET_INNER_PROD, MET_L2_DIST } metric_kind_t; + +static double row_score(metric_kind_t k, ray_t* row, + const double* q, double q_norm, int32_t dim) { + double acc = 0.0, r_norm_sq = 0.0; + if (k == MET_L2_DIST) { + for (int32_t j = 0; j < dim; j++) { + double d = rayvec_at_f64(row, j) - q[j]; + acc += d * d; + } + return sqrt(acc); + } + for (int32_t j = 0; j < dim; j++) { + double a = rayvec_at_f64(row, j); + acc += a * q[j]; + if (k == MET_COS_DIST) r_norm_sq += a * a; + } + if (k == MET_INNER_PROD) return acc; + /* COS_DIST = 1 - cos_sim */ + double denom = q_norm * sqrt(r_norm_sq); + double sim = (denom > 0.0) ? acc / denom : 0.0; + return 1.0 - sim; +} + +/* Extract query vector to a double[] scratch buffer. */ +static double* query_to_doubles(ray_t* q, int32_t dim, double* q_norm_out) { + double* buf = (double*)ray_sys_alloc((size_t)dim * sizeof(double)); + if (!buf) return NULL; + double ns = 0.0; + for (int32_t j = 0; j < dim; j++) { + buf[j] = rayvec_at_f64(q, j); + ns += buf[j] * buf[j]; + } + *q_norm_out = sqrt(ns); + return buf; +} + +/* Binary dispatcher for cos-dist / inner-prod / l2-dist. */ +static ray_t* vec_binary_metric(metric_kind_t kind, ray_t* a, ray_t* b) { + if (!a || !b) return ray_error("type", NULL); + + /* LIST × vec → F64 vector (one score per list entry). + * vec × LIST → same (treat the LIST as the column). */ + ray_t* list = NULL; + ray_t* query = NULL; + if (a->type == RAY_LIST && rayvec_is_numeric(b)) { list = a; query = b; } + else if (b->type == RAY_LIST && rayvec_is_numeric(a)) { list = b; query = a; } + + if (list) { + int32_t dim; + if (list_vec_validate(list, &dim) != 0) return ray_error("type", NULL); + if (query->len != dim) return ray_error("length", NULL); + + double q_norm; + double* q = query_to_doubles(query, dim, &q_norm); + if (!q) return ray_error("oom", NULL); + + int64_t n = list->len; + ray_t* result = ray_vec_new(RAY_F64, n); + if (!result || RAY_IS_ERR(result)) { ray_sys_free(q); return ray_error("oom", NULL); } + result->len = n; + double* out = (double*)ray_data(result); + for (int64_t i = 0; i < n; i++) { + ray_t* row = ray_list_get(list, i); + out[i] = row_score(kind, row, q, q_norm, dim); + } + ray_sys_free(q); + return result; + } + + /* vec × vec → scalar */ + if (!rayvec_is_numeric(a) || !rayvec_is_numeric(b)) return ray_error("type", NULL); + if (a->len != b->len || a->len <= 0) return ray_error("length", NULL); + int32_t dim = (int32_t)a->len; + + double q_norm; + double* q = query_to_doubles(b, dim, &q_norm); + if (!q) return ray_error("oom", NULL); + double v = row_score(kind, a, q, q_norm, dim); + ray_sys_free(q); + return make_f64(v); +} + +ray_t* ray_cos_dist_fn (ray_t* a, ray_t* b) { return vec_binary_metric(MET_COS_DIST, a, b); } +ray_t* ray_inner_prod_fn (ray_t* a, ray_t* b) { return vec_binary_metric(MET_INNER_PROD, a, b); } +ray_t* ray_l2_dist_fn (ray_t* a, ray_t* b) { return vec_binary_metric(MET_L2_DIST, a, b); } + +/* (norm x): x is numeric vec → F64 scalar; x is LIST of numeric vecs → F64 vector. */ +ray_t* ray_norm_fn(ray_t* x) { + if (!x) return ray_error("type", NULL); + if (x->type == RAY_LIST) { + int32_t dim; + if (list_vec_validate(x, &dim) != 0) return ray_error("type", NULL); + int64_t n = x->len; + ray_t* result = ray_vec_new(RAY_F64, n); + if (!result || RAY_IS_ERR(result)) return ray_error("oom", NULL); + result->len = n; + double* out = (double*)ray_data(result); + for (int64_t i = 0; i < n; i++) { + ray_t* v = ray_list_get(x, i); + double s = 0.0; + for (int32_t j = 0; j < dim; j++) { + double e = rayvec_at_f64(v, j); + s += e * e; + } + out[i] = sqrt(s); + } + return result; + } + if (!rayvec_is_numeric(x)) return ray_error("type", NULL); + double s = 0.0; + for (int64_t i = 0; i < x->len; i++) { + double e = rayvec_at_f64(x, i); + s += e * e; + } + return make_f64(sqrt(s)); +} + +/* Parse a metric symbol. Accepted: 'cosine, 'l2, 'ip. Matches the three + * distance flavors. */ +static int parse_metric_sym(ray_t* s, ray_hnsw_metric_t* out) { + if (!s || s->type != -RAY_SYM) return 0; + int64_t id = s->i64; + if (id == ray_sym_find("cosine", 6)) { *out = RAY_HNSW_COSINE; return 1; } + if (id == ray_sym_find("l2", 2)) { *out = RAY_HNSW_L2; return 1; } + if (id == ray_sym_find("ip", 2)) { *out = RAY_HNSW_IP; return 1; } + return 0; +} + +static int64_t atom_to_i64(ray_t* a) { + if (!a) return 0; + switch (a->type) { + case -RAY_I64: return a->i64; + case -RAY_I32: return (int64_t)a->i32; + case -RAY_I16: return (int64_t)a->i16; + default: return 0; + } +} + +static bool atom_is_int(ray_t* a) { + return a && (a->type == -RAY_I64 || a->type == -RAY_I32 || a->type == -RAY_I16); +} + +/* (knn col query k [metric]) → table {_rowid, _dist} */ +ray_t* ray_knn_fn(ray_t** args, int64_t n) { + if (n < 3 || n > 4) return ray_error("rank", NULL); + ray_t* col = args[0]; + ray_t* query = args[1]; + ray_t* katom = args[2]; + if (!col || col->type != RAY_LIST) return ray_error("type", NULL); + if (!rayvec_is_numeric(query)) return ray_error("type", NULL); + if (!atom_is_int(katom)) return ray_error("type", NULL); + + ray_hnsw_metric_t metric = RAY_HNSW_COSINE; + if (n == 4 && !parse_metric_sym(args[3], &metric)) return ray_error("domain", NULL); + + int32_t dim; + if (list_vec_validate(col, &dim) != 0) return ray_error("type", NULL); + if (query->len != dim) return ray_error("length", NULL); + + int64_t k = atom_to_i64(katom); + if (k <= 0) return ray_error("domain", NULL); + int64_t nrows = col->len; + if (k > nrows) k = nrows; + if (nrows == 0) { + /* Empty result table. */ + ray_t* rv = ray_vec_new(RAY_I64, 0); + ray_t* dv = ray_vec_new(RAY_F64, 0); + ray_t* tbl = ray_table_new(2); + tbl = ray_table_add_col(tbl, sym_intern_safe("_rowid", 6), rv); + tbl = ray_table_add_col(tbl, sym_intern_safe("_dist", 5), dv); + ray_release(rv); ray_release(dv); + return tbl; + } + + /* Prepare query as doubles (cached across all rows). */ + double q_norm; + double* q = query_to_doubles(query, dim, &q_norm); + if (!q) return ray_error("oom", NULL); + + /* Max-heap on distance (root = farthest of top-K kept). */ + typedef struct { double d; int64_t id; } ent_t; + ent_t* heap = (ent_t*)ray_sys_alloc((size_t)k * sizeof(ent_t)); + if (!heap) { ray_sys_free(q); return ray_error("oom", NULL); } + int64_t hsz = 0; + + for (int64_t i = 0; i < nrows; i++) { + ray_t* row = ray_list_get(col, i); + double d; + switch (metric) { + case RAY_HNSW_L2: + d = row_score(MET_L2_DIST, row, q, q_norm, dim); + break; + case RAY_HNSW_IP: + /* Negate inner product so lower = closer. */ + d = -row_score(MET_INNER_PROD, row, q, q_norm, dim); + break; + case RAY_HNSW_COSINE: + default: + d = row_score(MET_COS_DIST, row, q, q_norm, dim); + break; + } + + if (hsz < k) { + int64_t j = hsz++; + heap[j] = (ent_t){ d, i }; + while (j > 0) { + int64_t p = (j - 1) / 2; + if (heap[p].d >= heap[j].d) break; + ent_t t = heap[p]; heap[p] = heap[j]; heap[j] = t; + j = p; + } + } else if (d < heap[0].d) { + heap[0] = (ent_t){ d, i }; + int64_t j = 0; + for (;;) { + int64_t l = 2*j+1, r = 2*j+2, best = j; + if (l < hsz && heap[l].d > heap[best].d) best = l; + if (r < hsz && heap[r].d > heap[best].d) best = r; + if (best == j) break; + ent_t t = heap[j]; heap[j] = heap[best]; heap[best] = t; + j = best; + } + } + } + + ray_sys_free(q); + + /* Sort ascending by distance. */ + for (int64_t i = 1; i < hsz; i++) { + ent_t key = heap[i]; + int64_t j = i - 1; + while (j >= 0 && heap[j].d > key.d) { + heap[j + 1] = heap[j]; + j--; + } + heap[j + 1] = key; + } + + ray_t* rv = ray_vec_new(RAY_I64, hsz); + ray_t* dv = ray_vec_new(RAY_F64, hsz); + if (!rv || RAY_IS_ERR(rv) || !dv || RAY_IS_ERR(dv)) { + ray_sys_free(heap); + if (rv && !RAY_IS_ERR(rv)) ray_release(rv); + if (dv && !RAY_IS_ERR(dv)) ray_release(dv); + return ray_error("oom", NULL); + } + int64_t* rd = (int64_t*)ray_data(rv); + double* dd = (double*)ray_data(dv); + for (int64_t i = 0; i < hsz; i++) { rd[i] = heap[i].id; dd[i] = heap[i].d; } + rv->len = hsz; + dv->len = hsz; + ray_sys_free(heap); + + ray_t* tbl = ray_table_new(2); + if (!tbl || RAY_IS_ERR(tbl)) { ray_release(rv); ray_release(dv); return ray_error("oom", NULL); } + tbl = ray_table_add_col(tbl, sym_intern_safe("_rowid", 6), rv); + ray_release(rv); + tbl = ray_table_add_col(tbl, sym_intern_safe("_dist", 5), dv); + ray_release(dv); + return tbl; +} + +/* ---------- HNSW handle plumbing ---------- */ + +static ray_hnsw_t* hnsw_unwrap(ray_t* h) { + if (!h) return NULL; + if (h->type != -RAY_I64) return NULL; + if (!(h->attrs & RAY_ATTR_HNSW)) return NULL; + return (ray_hnsw_t*)(uintptr_t)h->i64; +} + +static ray_t* hnsw_wrap(ray_hnsw_t* idx) { + ray_t* h = ray_alloc(0); + if (!h || RAY_IS_ERR(h)) return h ? h : ray_error("oom", NULL); + h->type = -RAY_I64; + h->attrs |= RAY_ATTR_HNSW; + h->i64 = (int64_t)(uintptr_t)idx; + return h; +} + +/* (hnsw-build col [metric] [M] [ef_c]) → I64 handle (RAY_ATTR_HNSW) */ +ray_t* ray_hnsw_build_fn(ray_t** args, int64_t n) { + if (n < 1 || n > 4) return ray_error("rank", NULL); + ray_t* col = args[0]; + if (!col || col->type != RAY_LIST) return ray_error("type", NULL); + + ray_hnsw_metric_t metric = RAY_HNSW_COSINE; + if (n >= 2 && !parse_metric_sym(args[1], &metric)) return ray_error("domain", NULL); + + int32_t M = HNSW_DEFAULT_M; + if (n >= 3) { + if (!atom_is_int(args[2])) return ray_error("type", NULL); + int64_t v = atom_to_i64(args[2]); + if (v > 0 && v <= 512) M = (int32_t)v; + } + int32_t ef_c = HNSW_DEFAULT_EF_C; + if (n >= 4) { + if (!atom_is_int(args[3])) return ray_error("type", NULL); + int64_t v = atom_to_i64(args[3]); + if (v > 0 && v <= 4096) ef_c = (int32_t)v; + } + + int32_t dim; + if (list_vec_validate(col, &dim) != 0) return ray_error("type", NULL); + if (dim <= 0) return ray_error("length", NULL); + + int64_t n_rows; + float* flat = list_flatten_floats(col, dim, &n_rows); + if (!flat && n_rows > 0) return ray_error("oom", NULL); + + ray_hnsw_t* idx = ray_hnsw_build(flat, n_rows, dim, metric, M, ef_c); + /* ray_hnsw_build COPIES the vectors (idx->owns_data == true), so free our scratch. */ + if (flat) ray_sys_free(flat); + if (!idx) return ray_error("oom", NULL); + + ray_t* h = hnsw_wrap(idx); + if (!h || RAY_IS_ERR(h)) { ray_hnsw_free(idx); return h; } + return h; +} + +/* (ann handle query k [ef_s]) → table {_rowid, _dist} */ +ray_t* ray_ann_fn(ray_t** args, int64_t n) { + if (n < 3 || n > 4) return ray_error("rank", NULL); + ray_hnsw_t* idx = hnsw_unwrap(args[0]); + if (!idx) return ray_error("type", NULL); + if (!rayvec_is_numeric(args[1])) return ray_error("type", NULL); + if (!atom_is_int(args[2])) return ray_error("type", NULL); + + int32_t dim = idx->dim; + if (args[1]->len != dim) return ray_error("length", NULL); + int64_t k = atom_to_i64(args[2]); + if (k <= 0) return ray_error("domain", NULL); + + int32_t ef = (int32_t)k; + if (ef < HNSW_DEFAULT_EF_S) ef = HNSW_DEFAULT_EF_S; + if (n == 4) { + if (!atom_is_int(args[3])) return ray_error("type", NULL); + int64_t v = atom_to_i64(args[3]); + if (v > 0 && v <= 4096) ef = (int32_t)v; + } + + /* Copy query into float[] scratch. */ + float* qbuf = (float*)ray_sys_alloc((size_t)dim * sizeof(float)); + if (!qbuf) return ray_error("oom", NULL); + rayvec_to_floats(args[1], qbuf, dim); + + int64_t* out_ids = (int64_t*)ray_sys_alloc((size_t)k * sizeof(int64_t)); + double* out_ds = (double*)ray_sys_alloc((size_t)k * sizeof(double)); + if (!out_ids || !out_ds) { + ray_sys_free(qbuf); + if (out_ids) ray_sys_free(out_ids); + if (out_ds) ray_sys_free(out_ds); + return ray_error("oom", NULL); + } + + int64_t found = ray_hnsw_search(idx, qbuf, dim, k, ef, out_ids, out_ds); + if (found < 0) { + ray_sys_free(qbuf); ray_sys_free(out_ids); ray_sys_free(out_ds); + return ray_error("oom", NULL); + } + + ray_t* rv = ray_vec_new(RAY_I64, found); + ray_t* dv = ray_vec_new(RAY_F64, found); + if (!rv || RAY_IS_ERR(rv) || !dv || RAY_IS_ERR(dv)) { + ray_sys_free(qbuf); ray_sys_free(out_ids); ray_sys_free(out_ds); + if (rv && !RAY_IS_ERR(rv)) ray_release(rv); + if (dv && !RAY_IS_ERR(dv)) ray_release(dv); + return ray_error("oom", NULL); + } + int64_t* rd = (int64_t*)ray_data(rv); + double* dd = (double*)ray_data(dv); + for (int64_t i = 0; i < found; i++) { rd[i] = out_ids[i]; dd[i] = out_ds[i]; } + rv->len = found; + dv->len = found; + ray_sys_free(qbuf); ray_sys_free(out_ids); ray_sys_free(out_ds); + + ray_t* tbl = ray_table_new(2); + if (!tbl || RAY_IS_ERR(tbl)) { ray_release(rv); ray_release(dv); return ray_error("oom", NULL); } + tbl = ray_table_add_col(tbl, sym_intern_safe("_rowid", 6), rv); + ray_release(rv); + tbl = ray_table_add_col(tbl, sym_intern_safe("_dist", 5), dv); + ray_release(dv); + return tbl; +} + +/* (hnsw-free handle) → null. Idempotent: clearing the ATTR on success + * means a second call returns a type error rather than double-freeing. */ +ray_t* ray_hnsw_free_fn(ray_t* h) { + ray_hnsw_t* idx = hnsw_unwrap(h); + if (!idx) return ray_error("type", NULL); + ray_hnsw_free(idx); + h->i64 = 0; + h->attrs &= ~RAY_ATTR_HNSW; + return RAY_NULL_OBJ; +} + +/* (hnsw-save handle path) → null */ +ray_t* ray_hnsw_save_fn(ray_t* h, ray_t* path) { + ray_hnsw_t* idx = hnsw_unwrap(h); + if (!idx) return ray_error("type", NULL); + if (!path || path->type != -RAY_STR) return ray_error("type", NULL); + const char* p = ray_str_ptr(path); + size_t len = ray_str_len(path); + if (!p || len == 0 || len >= 1023) return ray_error("domain", NULL); + char buf[1024]; + memcpy(buf, p, len); + buf[len] = '\0'; + ray_err_t err = ray_hnsw_save(idx, buf); + if (err != RAY_OK) return ray_error("io", NULL); + return RAY_NULL_OBJ; +} + +/* (hnsw-load path) → I64 handle */ +ray_t* ray_hnsw_load_fn(ray_t* path) { + if (!path || path->type != -RAY_STR) return ray_error("type", NULL); + const char* p = ray_str_ptr(path); + size_t len = ray_str_len(path); + if (!p || len == 0 || len >= 1023) return ray_error("domain", NULL); + char buf[1024]; + memcpy(buf, p, len); + buf[len] = '\0'; + ray_hnsw_t* idx = ray_hnsw_load(buf); + if (!idx) return ray_error("io", NULL); + ray_t* h = hnsw_wrap(idx); + if (!h || RAY_IS_ERR(h)) { ray_hnsw_free(idx); return h; } + return h; +} + +/* (hnsw-info handle) → dict { nrows, dim, metric, nlayers, M, efc }. + * Keys avoid hyphens so the 'quote-tick' syntax works: 'nrows, 'dim, etc. */ +ray_t* ray_hnsw_info_fn(ray_t* h) { + ray_hnsw_t* idx = hnsw_unwrap(h); + if (!idx) return ray_error("type", NULL); + + const char* mname = "cosine"; + switch ((ray_hnsw_metric_t)idx->metric) { + case RAY_HNSW_L2: mname = "l2"; break; + case RAY_HNSW_IP: mname = "ip"; break; + default: break; + } + + ray_t* keys = ray_sym_vec_new(RAY_SYM_W64, 6); + if (RAY_IS_ERR(keys)) return keys; + ray_t* vals = ray_list_new(6); + if (RAY_IS_ERR(vals)) { ray_release(keys); return vals; } + + struct { const char* name; size_t nlen; ray_t* val; } rows[] = { + { "nrows", 5, make_i64(idx->n_nodes) }, + { "dim", 3, make_i64((int64_t)idx->dim) }, + { "metric", 6, ray_sym(sym_intern_safe(mname, strlen(mname))) }, + { "nlayers", 7, make_i64((int64_t)idx->n_layers) }, + { "M", 1, make_i64((int64_t)idx->M) }, + { "efc", 3, make_i64((int64_t)idx->ef_construction) }, + }; + for (size_t i = 0; i < sizeof(rows)/sizeof(rows[0]); i++) { + int64_t s = sym_intern_safe(rows[i].name, rows[i].nlen); + keys = ray_vec_append(keys, &s); + vals = ray_list_append(vals, rows[i].val); + ray_release(rows[i].val); + } + return ray_dict_new(keys, vals); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/exec.c b/crates/rayforce-sys/vendor/rayforce/src/ops/exec.c new file mode 100644 index 0000000..a28f41a --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/exec.c @@ -0,0 +1,2272 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ops/internal.h" +#include "ops/rowsel.h" +#include "mem/sys.h" + +/* Global profiler instance (zero-initialized = inactive) */ +ray_profile_t g_ray_profile; + +/* -------------------------------------------------------------------------- + * Materialize a MAPCOMMON column into a flat RAY_SYM vector. + * Expands key_values × row_counts into one SYM ID per row. + * -------------------------------------------------------------------------- */ +ray_t* materialize_mapcommon(ray_t* mc) { + ray_t** mc_ptrs = (ray_t**)ray_data(mc); + ray_t* kv = mc_ptrs[0]; /* key_values: typed vec (DATE/I64/SYM) */ + ray_t* rc = mc_ptrs[1]; /* row_counts: RAY_I64 vec of n_parts */ + int64_t n_parts = kv->len; + int8_t kv_type = kv->type; + size_t esz = (size_t)ray_sym_elem_size(kv_type, kv->attrs); + const char* kdata = (const char*)ray_data(kv); + const int64_t* counts = (const int64_t*)ray_data(rc); + + int64_t total = 0; + for (int64_t p = 0; p < n_parts; p++) total += counts[p]; + + ray_t* flat = ray_vec_new(kv_type, total); + if (!flat || RAY_IS_ERR(flat)) return ray_error("oom", NULL); + flat->len = total; + + /* Pattern-fill: broadcast each partition's key value across its row range. + * Typed fill avoids per-element memcpy overhead. */ + char* out = (char*)ray_data(flat); + int64_t off = 0; + for (int64_t p = 0; p < n_parts; p++) { + int64_t cnt = counts[p]; + if (esz == 8) { + uint64_t v; + memcpy(&v, kdata + (size_t)p * 8, 8); + uint64_t* dst = (uint64_t*)(out + off * 8); + for (int64_t r = 0; r < cnt; r++) dst[r] = v; + } else if (esz == 4) { + uint32_t v; + memcpy(&v, kdata + (size_t)p * 4, 4); + uint32_t* dst = (uint32_t*)(out + off * 4); + for (int64_t r = 0; r < cnt; r++) dst[r] = v; + } else { + for (int64_t r = 0; r < cnt; r++) + memcpy(out + (off + r) * esz, kdata + (size_t)p * esz, esz); + } + off += cnt; + } + return flat; +} + +/* Materialize first N rows of a MAPCOMMON column into a flat typed vector. */ +ray_t* materialize_mapcommon_head(ray_t* mc, int64_t n) { + ray_t** mc_ptrs = (ray_t**)ray_data(mc); + ray_t* kv = mc_ptrs[0]; + ray_t* rc = mc_ptrs[1]; + int64_t n_parts = kv->len; + int8_t kv_type = kv->type; + size_t esz = (size_t)ray_sym_elem_size(kv_type, kv->attrs); + const char* kdata = (const char*)ray_data(kv); + const int64_t* counts = (const int64_t*)ray_data(rc); + + ray_t* flat = ray_vec_new(kv_type, n); + if (!flat || RAY_IS_ERR(flat)) return ray_error("oom", NULL); + flat->len = n; + + char* out = (char*)ray_data(flat); + int64_t off = 0; + for (int64_t p = 0; p < n_parts && off < n; p++) { + int64_t take = counts[p]; + if (take > n - off) take = n - off; + if (esz == 8) { + uint64_t v; + memcpy(&v, kdata + (size_t)p * 8, 8); + uint64_t* dst = (uint64_t*)(out + off * 8); + for (int64_t r = 0; r < take; r++) dst[r] = v; + } else if (esz == 4) { + uint32_t v; + memcpy(&v, kdata + (size_t)p * 4, 4); + uint32_t* dst = (uint32_t*)(out + off * 4); + for (int64_t r = 0; r < take; r++) dst[r] = v; + } else { + for (int64_t r = 0; r < take; r++) + memcpy(out + (off + r) * esz, kdata + (size_t)p * esz, esz); + } + off += take; + } + return flat; +} + +/* Materialize MAPCOMMON through a boolean filter predicate. */ +ray_t* materialize_mapcommon_filter(ray_t* mc, ray_t* pred, int64_t pass_count) { + ray_t** mc_ptrs = (ray_t**)ray_data(mc); + ray_t* kv = mc_ptrs[0]; + ray_t* rc = mc_ptrs[1]; + int64_t n_parts = kv->len; + int8_t kv_type = kv->type; + size_t esz = (size_t)ray_sym_elem_size(kv_type, kv->attrs); + const char* kdata = (const char*)ray_data(kv); + const int64_t* counts = (const int64_t*)ray_data(rc); + + ray_t* flat = ray_vec_new(kv_type, pass_count); + if (!flat || RAY_IS_ERR(flat)) return ray_error("oom", NULL); + flat->len = pass_count; + + char* out = (char*)ray_data(flat); + int64_t out_idx = 0; + int64_t row = 0; + int64_t part_idx = 0; + int64_t part_end = counts[0]; + + ray_morsel_t mp; + ray_morsel_init(&mp, pred); + while (ray_morsel_next(&mp)) { + const uint8_t* bits = (const uint8_t*)mp.morsel_ptr; + for (int64_t i = 0; i < mp.morsel_len; i++, row++) { + while (part_idx < n_parts - 1 && row >= part_end) { + part_idx++; + part_end += counts[part_idx]; + } + if (bits[i]) + memcpy(out + (size_t)out_idx++ * esz, + kdata + (size_t)part_idx * esz, esz); + } + } + return flat; +} + + +/* ============================================================================ + * Parallel index gather — used by filter, sort, and join + * ============================================================================ */ + +void multi_gather_fn(void* raw, uint32_t wid, int64_t start, int64_t end) { + (void)wid; + multi_gather_ctx_t* c = (multi_gather_ctx_t*)raw; + const int64_t* restrict idx = c->idx; + int64_t nc = c->ncols; + + /* Process one column at a time per batch of rows. + * This focuses random reads on a single source array, giving the + * hardware prefetcher only 1 stream to track (instead of ncols + * concurrent streams, which overflows the L2 miss queue). */ +#define MG_BATCH 512 +#define MG_PF 32 + for (int64_t base = start; base < end; base += MG_BATCH) { + int64_t bstart = base; + int64_t bend = base + MG_BATCH; + if (bend > end) bend = end; + for (int64_t col = 0; col < nc; col++) { + uint8_t e = c->esz[col]; + char* src = c->srcs[col]; + char* dst = c->dsts[col]; + if (e == 8) { + const uint64_t* restrict s8 = (const uint64_t*)src; + uint64_t* restrict d8 = (uint64_t*)dst; + for (int64_t i = bstart; i < bend; i++) { + if (i + MG_PF < bend) + __builtin_prefetch(&s8[idx[i + MG_PF]], 0, 0); + d8[i] = s8[idx[i]]; + } + } else if (e == 4) { + const uint32_t* restrict s4 = (const uint32_t*)src; + uint32_t* restrict d4 = (uint32_t*)dst; + for (int64_t i = bstart; i < bend; i++) { + if (i + MG_PF < bend) + __builtin_prefetch(&s4[idx[i + MG_PF]], 0, 0); + d4[i] = s4[idx[i]]; + } + } else { + for (int64_t i = bstart; i < bend; i++) { + if (i + MG_PF < bend) + __builtin_prefetch(src + idx[i + MG_PF] * e, 0, 0); + memcpy(dst + i * e, src + idx[i] * e, e); + } + } + } + } +#undef MG_PF +#undef MG_BATCH +} + +/* Parallel index gather — single column with prefetching */ +void gather_fn(void* raw, uint32_t wid, int64_t start, int64_t end) { + (void)wid; + gather_ctx_t* c = (gather_ctx_t*)raw; + char* restrict src = (char*)ray_data(c->src_col); + char* restrict dst = (char*)ray_data(c->dst_col); + uint8_t esz = c->esz; + const int64_t* restrict idx = c->idx; +#define GATHER_PF 16 + + if (c->nullable) { + for (int64_t i = start; i < end; i++) { + if (i + GATHER_PF < end) { + int64_t pf = idx[i + GATHER_PF]; + if (pf >= 0) __builtin_prefetch(src + pf * esz, 0, 0); + } + int64_t r = idx[i]; + if (r >= 0) + memcpy(dst + i * esz, src + r * esz, esz); + else + memset(dst + i * esz, 0, esz); + } + } else { + for (int64_t i = start; i < end; i++) { + if (i + GATHER_PF < end) + __builtin_prefetch(src + idx[i + GATHER_PF] * esz, 0, 0); + memcpy(dst + i * esz, src + idx[i] * esz, esz); + } + } +#undef GATHER_PF +} + +/* ============================================================================ + * Partitioned gather — cache-conscious column rearrangement + * + * Standard gather: dst[i] = src[idx[i]] — sequential writes, random reads. + * With 10M rows the source data (~hundreds of MB) far exceeds L2 cache, so + * every read is a main-memory miss (~60ns even with prefetching). + * + * Partitioned gather groups work by source ranges: for each 16K-row source + * block, process all indices that point into it. The block fits in L2, so + * reads become L2 hits (~5ns). Output writes become random but the CPU's + * store buffer absorbs them without stalling (~20ns effective). + * + * Three phases: + * 1. Histogram — count indices per source block (parallel) + * 2. Route — scatter (dest, src) pairs into buckets (parallel) + * 3. Block-gather — per block, source in L2 → fast reads (parallel) + * ============================================================================ */ + +/* Block = 16K source rows. 16K × 16 cols × 8B = 2MB ≈ L2 cache per core. */ +#define PG_BSHIFT 14 +#define PG_BSIZE (1 << PG_BSHIFT) /* 16384 */ +#define PG_MIN (PG_BSIZE * 8) /* 131072 — below this, routing overhead > benefit */ + +/* Phase 1+2 use dispatch_n with explicit task-to-range mapping so that + * histogram and scatter have consistent per-task assignments regardless + * of which worker picks up each task (work-stealing is non-deterministic). */ + +typedef struct { + const int64_t* idx; + int64_t* hist; /* n_tasks × n_parts, row-major */ + int64_t n_parts; + int64_t n; /* total rows */ + uint32_t n_tasks; +} pg_hist_ctx_t; + +static void pg_hist_fn(void* arg, uint32_t wid, int64_t start, int64_t end) { + (void)wid; (void)end; + pg_hist_ctx_t* c = (pg_hist_ctx_t*)arg; + int64_t task = start; + + int64_t chunk = (c->n + c->n_tasks - 1) / c->n_tasks; + int64_t lo = task * chunk; + int64_t hi = lo + chunk; + if (hi > c->n) hi = c->n; + if (lo >= hi) { memset(c->hist + task * c->n_parts, 0, + (size_t)c->n_parts * sizeof(int64_t)); return; } + + int64_t* h = c->hist + task * c->n_parts; + memset(h, 0, (size_t)c->n_parts * sizeof(int64_t)); + const int64_t* idx = c->idx; + for (int64_t i = lo; i < hi; i++) + h[idx[i] >> PG_BSHIFT]++; +} + +typedef struct { + const int64_t* idx; + int32_t* rdest; + int32_t* rsrc; + int64_t* offsets; /* n_tasks × n_parts write cursors */ + int64_t n_parts; + int64_t n; + uint32_t n_tasks; +} pg_route_ctx_t; + +static void pg_route_fn(void* arg, uint32_t wid, int64_t start, int64_t end) { + (void)wid; (void)end; + pg_route_ctx_t* c = (pg_route_ctx_t*)arg; + int64_t task = start; + + int64_t chunk = (c->n + c->n_tasks - 1) / c->n_tasks; + int64_t lo = task * chunk; + int64_t hi = lo + chunk; + if (hi > c->n) hi = c->n; + if (lo >= hi) return; + + int64_t* off = c->offsets + task * c->n_parts; + const int64_t* idx = c->idx; + int32_t* rd = c->rdest; + int32_t* rs = c->rsrc; + for (int64_t i = lo; i < hi; i++) { + int64_t src = idx[i]; + int64_t pos = off[src >> PG_BSHIFT]++; + rd[pos] = (int32_t)i; + rs[pos] = (int32_t)src; + } +} + +/* Phase 3: per-block gather — one task per source block */ +typedef struct { + const int32_t* rdest; + const int32_t* rsrc; + const int64_t* part_off; /* partition start offsets (n_parts + 1) */ + char** srcs; + char** dsts; + const uint8_t* esz; + int64_t ncols; +} pg_block_ctx_t; + +static void pg_block_fn(void* arg, uint32_t wid, int64_t start, int64_t end) { + (void)wid; (void)end; + pg_block_ctx_t* c = (pg_block_ctx_t*)arg; + int64_t blk = start; /* dispatch_n: one task per call */ + + int64_t lo = c->part_off[blk]; + int64_t hi = c->part_off[blk + 1]; + if (lo >= hi) return; + + const int32_t* rd = c->rdest + lo; + const int32_t* rs = c->rsrc + lo; + int64_t cnt = hi - lo; + + /* Column-at-a-time: keeps the source block hot in L2. + * After the first few reads, the entire 16K-row source slice + * is cache-resident, so subsequent reads are L2 hits. */ + for (int64_t col = 0; col < c->ncols; col++) { + uint8_t e = c->esz[col]; + const char* src = c->srcs[col]; + char* dst = c->dsts[col]; + if (e == 8) { + const uint64_t* s8 = (const uint64_t*)src; + uint64_t* d8 = (uint64_t*)dst; + for (int64_t j = 0; j < cnt; j++) + d8[rd[j]] = s8[rs[j]]; + } else if (e == 4) { + const uint32_t* s4 = (const uint32_t*)src; + uint32_t* d4 = (uint32_t*)dst; + for (int64_t j = 0; j < cnt; j++) + d4[rd[j]] = s4[rs[j]]; + } else if (e == 2) { + const uint16_t* s2 = (const uint16_t*)src; + uint16_t* d2 = (uint16_t*)dst; + for (int64_t j = 0; j < cnt; j++) + d2[rd[j]] = s2[rs[j]]; + } else if (e == 1) { + for (int64_t j = 0; j < cnt; j++) + dst[rd[j]] = src[rs[j]]; + } else { + for (int64_t j = 0; j < cnt; j++) + memcpy(dst + (int64_t)rd[j] * e, + src + (int64_t)rs[j] * e, e); + } + } +} + +/* Public entry point: partitioned gather for n > PG_MIN, fallback otherwise. + * n: number of index entries (output rows) + * src_rows: number of rows in the source columns (indices may reference [0, src_rows)) */ +void partitioned_gather(ray_pool_t* pool, const int64_t* idx, int64_t n, + int64_t src_rows, char** srcs, char** dsts, + const uint8_t* esz, int64_t ncols) { + /* Fallback for small arrays or no pool */ + if (!pool || n < PG_MIN || n > INT32_MAX || src_rows > INT32_MAX) { + multi_gather_ctx_t mg = { .idx = idx, .ncols = 0 }; + for (int64_t c = 0; c < ncols && c < MGATHER_MAX_COLS; c++) { + mg.srcs[c] = srcs[c]; mg.dsts[c] = dsts[c]; mg.esz[c] = esz[c]; + mg.ncols++; + } + if (pool) ray_pool_dispatch(pool, multi_gather_fn, &mg, n); + else multi_gather_fn(&mg, 0, 0, n); + return; + } + + /* Partition by SOURCE range — indices can reference any row in [0, src_rows) */ + int64_t n_parts = (src_rows + PG_BSIZE - 1) >> PG_BSHIFT; + uint32_t nw = ray_pool_total_workers(pool); + + /* Allocate routing buffers */ + ray_t *hist_hdr = NULL, *off_hdr = NULL; + ray_t *rdest_hdr = NULL, *rsrc_hdr = NULL, *poff_hdr = NULL; + + int64_t* hist = (int64_t*)scratch_alloc(&hist_hdr, + (size_t)nw * (size_t)n_parts * sizeof(int64_t)); + int64_t* offsets = (int64_t*)scratch_alloc(&off_hdr, + (size_t)nw * (size_t)n_parts * sizeof(int64_t)); + int32_t* rdest = (int32_t*)scratch_alloc(&rdest_hdr, + (size_t)n * sizeof(int32_t)); + int32_t* rsrc = (int32_t*)scratch_alloc(&rsrc_hdr, + (size_t)n * sizeof(int32_t)); + int64_t* part_off = (int64_t*)scratch_alloc(&poff_hdr, + (size_t)(n_parts + 1) * sizeof(int64_t)); + + if (!hist || !offsets || !rdest || !rsrc || !part_off) { + scratch_free(hist_hdr); scratch_free(off_hdr); + scratch_free(rdest_hdr); scratch_free(rsrc_hdr); + scratch_free(poff_hdr); + /* Fallback to regular gather on allocation failure */ + multi_gather_ctx_t mg = { .idx = idx, .ncols = 0 }; + for (int64_t c = 0; c < ncols && c < MGATHER_MAX_COLS; c++) { + mg.srcs[c] = srcs[c]; mg.dsts[c] = dsts[c]; mg.esz[c] = esz[c]; + mg.ncols++; + } + ray_pool_dispatch(pool, multi_gather_fn, &mg, n); + return; + } + + /* Phase 1: parallel histogram (dispatch_n for deterministic task→range) */ + pg_hist_ctx_t hctx = { + .idx = idx, .hist = hist, .n_parts = n_parts, + .n = n, .n_tasks = nw, + }; + ray_pool_dispatch_n(pool, pg_hist_fn, &hctx, nw); + + /* Phase 2: prefix sum → per-task scatter offsets + partition boundaries */ + int64_t running = 0; + for (int64_t p = 0; p < n_parts; p++) { + part_off[p] = running; + for (uint32_t t = 0; t < nw; t++) { + offsets[t * n_parts + p] = running; + running += hist[t * n_parts + p]; + } + } + part_off[n_parts] = running; + + /* Phase 3: parallel route (same task→range mapping as histogram) */ + pg_route_ctx_t rctx = { + .idx = idx, .rdest = rdest, .rsrc = rsrc, + .offsets = offsets, .n_parts = n_parts, + .n = n, .n_tasks = nw, + }; + ray_pool_dispatch_n(pool, pg_route_fn, &rctx, nw); + + /* Phase 4: parallel per-block gather */ + pg_block_ctx_t bctx = { + .rdest = rdest, .rsrc = rsrc, .part_off = part_off, + .srcs = srcs, .dsts = dsts, .esz = esz, .ncols = ncols, + }; + ray_pool_dispatch_n(pool, pg_block_fn, &bctx, (uint32_t)n_parts); + + scratch_free(hist_hdr); + scratch_free(off_hdr); + scratch_free(rdest_hdr); + scratch_free(rsrc_hdr); + scratch_free(poff_hdr); +} + +/* (filter execution moved to filter.c) */ + + +/* ============================================================================ + * Sort execution (simple insertion sort) + * ============================================================================ */ + +/* Forward declarations — exec_node wraps exec_node_inner with profiling */ +/* exec_node declared extern in exec_internal.h */ +static ray_t* exec_node_inner(ray_graph_t* g, ray_op_t* op); + + + +/* Broadcast a scalar atom to a column vector of nrows elements. + * Returns a new vector (caller owns). On failure returns ray_error(). */ +ray_t* broadcast_scalar(ray_t* atom, int64_t nrows) { + if (!atom) return ray_error("domain", NULL); + if (nrows <= 0) { + /* Empty table: return an empty vector of the matching type */ + int8_t at = atom->type; + int8_t vt; + if (at == -RAY_STR) vt = RAY_STR; + else if (at == -RAY_I64) vt = RAY_I64; + else if (at == -RAY_F64) vt = RAY_F64; + else if (at == -RAY_BOOL) vt = RAY_BOOL; + else if (at == -RAY_SYM) vt = RAY_SYM; + else return ray_error("type", NULL); + return ray_vec_new(vt, 0); + } + int8_t at = atom->type; + + /* -RAY_STR → RAY_STR column */ + if (at == -RAY_STR) { + const char* sp = ray_str_ptr(atom); + size_t sl = ray_str_len(atom); + ray_t* vec = ray_vec_new(RAY_STR, nrows); + if (!vec || RAY_IS_ERR(vec)) return vec; + for (int64_t r = 0; r < nrows; r++) { + vec = ray_str_vec_append(vec, sp, sl); + if (RAY_IS_ERR(vec)) return vec; + } + return vec; + } + + /* Numeric / bool / sym scalars */ + int8_t vt; + if (at == -RAY_I64) vt = RAY_I64; + else if (at == -RAY_F64) vt = RAY_F64; + else if (at == -RAY_BOOL) vt = RAY_BOOL; + else if (at == -RAY_SYM) vt = RAY_SYM; + else return ray_error("type", NULL); + + size_t esz = (vt == RAY_BOOL) ? 1 : 8; + ray_t* vec = ray_vec_new(vt, nrows); + if (!vec || RAY_IS_ERR(vec)) return vec; + uint8_t elem[8] = {0}; + memcpy(elem, &atom->i64, esz); + for (int64_t r = 0; r < nrows; r++) { + vec = ray_vec_append(vec, elem); + if (RAY_IS_ERR(vec)) return vec; + } + return vec; +} + +/* OP_IN worker — process [start, end) of the BOOL output buffer. + * Disjoint slices, no synchronization. */ +typedef struct { + ray_t* col; + const double* svf; + const int64_t* svi; + int64_t sv_len; + uint8_t* ob; + int8_t ct; + bool col_has_nulls; + bool col_atom_null; + bool col_is_atom; + bool use_double; + bool negate; +} in_worker_ctx_t; + +static void exec_in_worker(void* vctx, uint32_t worker_id, + int64_t start, int64_t end) { + (void)worker_id; + in_worker_ctx_t* c = (in_worker_ctx_t*)vctx; + ray_t* col = c->col; + const void* cd = c->col_is_atom ? NULL : ray_data(col); + int8_t ct = c->ct; + uint8_t cattrs = c->col_is_atom ? 0 : col->attrs; + uint8_t* ob = c->ob; + int64_t sv_len = c->sv_len; + int negate = c->negate ? 1 : 0; + + #define IN_READ_I64(dst, idx) do { \ + switch (ct) { \ + case RAY_BOOL: case RAY_U8: (dst) = ((const uint8_t*)cd)[idx]; break; \ + case RAY_I16: (dst) = ((const int16_t*)cd)[idx]; break; \ + case RAY_I32: case RAY_DATE: case RAY_TIME: \ + (dst) = ((const int32_t*)cd)[idx]; break; \ + case RAY_I64: case RAY_TIMESTAMP: \ + (dst) = ((const int64_t*)cd)[idx]; break; \ + case RAY_SYM: (dst) = ray_read_sym(cd, (idx), ct, cattrs); break; \ + default: (dst) = 0; break; \ + } \ + } while (0) + + #define IN_READ_F64(dst, idx) do { \ + switch (ct) { \ + case RAY_BOOL: case RAY_U8: (dst) = (double)((const uint8_t*)cd)[idx]; break; \ + case RAY_I16: (dst) = (double)((const int16_t*)cd)[idx]; break; \ + case RAY_I32: case RAY_DATE: case RAY_TIME: \ + (dst) = (double)((const int32_t*)cd)[idx]; break; \ + case RAY_I64: case RAY_TIMESTAMP: \ + (dst) = (double)((const int64_t*)cd)[idx]; break; \ + case RAY_F32: (dst) = (double)((const float*)cd)[idx]; break; \ + case RAY_F64: (dst) = ((const double*)cd)[idx]; break; \ + default: (dst) = 0.0; break; \ + } \ + } while (0) + + if (c->use_double) { + const double* svf = c->svf; + for (int64_t i = start; i < end; i++) { + bool row_null = c->col_atom_null || + (c->col_has_nulls && !c->col_is_atom && + ray_vec_is_null(col, i)); + if (row_null) { ob[i] = 0; continue; } + double cv; + if (c->col_is_atom) cv = (ct == RAY_F64) ? col->f64 : (double)col->i64; + else IN_READ_F64(cv, i); + int found = 0; + for (int64_t j = 0; j < sv_len; j++) + if (cv == svf[j]) { found = 1; break; } + ob[i] = (uint8_t)(found ^ negate); + } + } else { + const int64_t* svi = c->svi; + for (int64_t i = start; i < end; i++) { + bool row_null = c->col_atom_null || + (c->col_has_nulls && !c->col_is_atom && + ray_vec_is_null(col, i)); + if (row_null) { ob[i] = 0; continue; } + int64_t cv; + if (c->col_is_atom) cv = col->i64; + else IN_READ_I64(cv, i); + int found = 0; + for (int64_t j = 0; j < sv_len; j++) + if (cv == svi[j]) { found = 1; break; } + ob[i] = (uint8_t)(found ^ negate); + } + } + #undef IN_READ_I64 + #undef IN_READ_F64 +} + +/* ============================================================================ + * exec_in — membership test (col IN set_vec) + * + * Evaluates each element of `col` against `set`. Returns a RAY_BOOL + * vector of col->len. For OP_NOT_IN the output is inverted. + * + * Type handling: + * - SYM ∈ SYM → compare interned sym IDs as i64 + * - Integer-family (BOOL/U8/I16/I32/I64/DATE/TIME/TIMESTAMP) on both + * sides → compare values as signed int64 (narrow types are + * sign-extended during read). + * - Any float on either side, mixed with each other or with + * integer family → promote both sides to double and compare with + * `==`. This covers the common case `(in price [1 2 3])` where + * price is F64 and the set literal parses as I64. + * - SYM mixed with anything else → no matches (type-mismatch; we + * don't error because it's a legal Rayfall comparison that + * simply produces false). + * - RAY_STR: deferred (returns nyi). + * ============================================================================ */ +static ray_t* exec_in(ray_graph_t* g, ray_op_t* op, ray_t* col, ray_t* set) { + (void)g; + bool negate = (op->opcode == OP_NOT_IN); + + int64_t col_len = ray_is_atom(col) ? 1 : col->len; + int64_t set_len = ray_is_atom(set) ? 1 : set->len; + + /* Empty col: the main loop produces an empty BOOL result + * correctly, but there's nothing to iterate, so short-circuit. */ + if (col_len == 0) { + ray_t* out = ray_vec_new(RAY_BOOL, 0); + if (!out || RAY_IS_ERR(out)) return out; + out->len = 0; + return out; + } + + /* NOTE: we intentionally do NOT short-circuit on set_len == 0. + * Even for an empty probe, the main loop still needs to check + * each col row's null flag so null rows never leak through as + * true for `not-in` (the old memset bypass did exactly that). */ + + int8_t ct = ray_is_atom(col) ? (int8_t)(-col->type) : col->type; + int8_t st = ray_is_atom(set) ? (int8_t)(-set->type) : set->type; + if (RAY_IS_PARTED(ct)) ct = (int8_t)RAY_PARTED_BASETYPE(ct); + if (RAY_IS_PARTED(st)) st = (int8_t)RAY_PARTED_BASETYPE(st); + + if (ct == RAY_STR || st == RAY_STR) + return ray_error("nyi", "OP_IN on RAY_STR not yet implemented"); + + /* Classify each side: 0=int-family, 1=float-family, 2=sym. */ + #define CLASSIFY(t) \ + ((t) == RAY_SYM ? 2 : \ + ((t) == RAY_F32 || (t) == RAY_F64) ? 1 : 0) + + int col_class = CLASSIFY(ct); + int set_class = CLASSIFY(st); + + /* Mixed SYM vs non-SYM → treat as an empty probe. A SYM set + * containing resolved sym IDs has no meaning when compared to a + * raw integer column, so nothing can match — but we still drop + * through to the main loop so null rows are handled consistently + * (they emit 0 regardless of negate). */ + if ((col_class == 2) != (set_class == 2)) { + set_len = 0; + } + + /* Float-promoted path: at least one side is float. Read both as + * double and compare. */ + int use_double = (col_class == 1 || set_class == 1); + + ray_t* out = ray_vec_new(RAY_BOOL, col_len); + if (!out || RAY_IS_ERR(out)) return out; + out->len = col_len; + uint8_t* ob = (uint8_t*)ray_data(out); + + /* Null-aware: null rows in the column never pass either `in` or + * `not-in`. Mirrors SQL-style semantics where NULL IN (…) and + * NULL NOT IN (…) both yield UNKNOWN / false in a boolean + * context. Also skip null elements when building the probe + * buffer so a non-null col row doesn't accidentally match the + * sentinel value of a null set element. */ + bool col_has_nulls = !ray_is_atom(col) && (col->attrs & RAY_ATTR_HAS_NULLS); + bool col_atom_null = ray_is_atom(col) && RAY_ATOM_IS_NULL(col); + bool set_has_nulls = !ray_is_atom(set) && (set->attrs & RAY_ATTR_HAS_NULLS); + + #define READ_I64(dst, vec, type, idx) do { \ + const void* _d = ray_data(vec); \ + switch (type) { \ + case RAY_BOOL: case RAY_U8: (dst) = ((const uint8_t*)_d)[idx]; break; \ + case RAY_I16: (dst) = ((const int16_t*)_d)[idx]; break; \ + case RAY_I32: case RAY_DATE: case RAY_TIME: \ + (dst) = ((const int32_t*)_d)[idx]; break; \ + case RAY_I64: case RAY_TIMESTAMP: \ + (dst) = ((const int64_t*)_d)[idx]; break; \ + case RAY_SYM: (dst) = ray_read_sym(_d, (idx), (type), \ + (vec)->attrs); break; \ + default: (dst) = 0; break; \ + } \ + } while (0) + + #define READ_F64(dst, vec, type, idx) do { \ + const void* _d = ray_data(vec); \ + switch (type) { \ + case RAY_BOOL: case RAY_U8: (dst) = (double)((const uint8_t*)_d)[idx]; break; \ + case RAY_I16: (dst) = (double)((const int16_t*)_d)[idx]; break; \ + case RAY_I32: case RAY_DATE: case RAY_TIME: \ + (dst) = (double)((const int32_t*)_d)[idx]; break; \ + case RAY_I64: case RAY_TIMESTAMP: \ + (dst) = (double)((const int64_t*)_d)[idx]; break; \ + case RAY_F32: (dst) = (double)((const float*)_d)[idx]; break; \ + case RAY_F64: (dst) = ((const double*)_d)[idx]; break; \ + default: (dst) = 0.0; break; \ + } \ + } while (0) + + /* Compact probe buffer: drop null set elements up front so the + * inner loop doesn't special-case them. */ + int64_t sv_len = 0; + double svf_stack[32]; + int64_t svi_stack[32]; + double* svf = svf_stack; + int64_t* svi = svi_stack; + ray_t* sv_hdr = NULL; + if (set_len > 32) { + size_t bytes = (size_t)set_len * (use_double ? sizeof(double) : sizeof(int64_t)); + sv_hdr = ray_alloc(bytes); + if (!sv_hdr) { ray_release(out); return ray_error("oom", NULL); } + if (use_double) svf = (double*)ray_data(sv_hdr); + else svi = (int64_t*)ray_data(sv_hdr); + } + + /* set_len is 0 when we want to suppress the set entirely + * (SYM-vs-non-SYM type mismatch). Respect it in BOTH the + * atom and vec branches so the probe stays empty. */ + if (use_double) { + if (set_len > 0 && ray_is_atom(set)) { + if (!RAY_ATOM_IS_NULL(set)) { + svf[0] = (st == RAY_F64) ? set->f64 : (double)set->i64; + sv_len = 1; + } + } else if (set_len > 0) { + for (int64_t i = 0; i < set_len; i++) { + if (set_has_nulls && ray_vec_is_null(set, i)) continue; + READ_F64(svf[sv_len], set, st, i); + sv_len++; + } + } + } else { + if (set_len > 0 && ray_is_atom(set)) { + if (!RAY_ATOM_IS_NULL(set)) { svi[0] = set->i64; sv_len = 1; } + } else if (set_len > 0) { + for (int64_t i = 0; i < set_len; i++) { + if (set_has_nulls && ray_vec_is_null(set, i)) continue; + READ_I64(svi[sv_len], set, st, i); + sv_len++; + } + } + } + + in_worker_ctx_t in_ctx = { + .col = col, + .svf = svf, .svi = svi, .sv_len = sv_len, + .ob = ob, .ct = ct, + .col_has_nulls = col_has_nulls, + .col_atom_null = col_atom_null, + .col_is_atom = ray_is_atom(col), + .use_double = use_double, + .negate = negate, + }; + + ray_pool_t* pool = ray_pool_get(); + if (pool && col_len >= RAY_PARALLEL_THRESHOLD && !ray_is_atom(col)) + ray_pool_dispatch(pool, exec_in_worker, &in_ctx, col_len); + else + exec_in_worker(&in_ctx, 0, 0, col_len); + + if (sv_hdr) ray_free(sv_hdr); + + #undef READ_I64 + #undef READ_F64 + #undef CLASSIFY + return out; +} + +/* ============================================================================ + * Recursive executor + * ============================================================================ */ + +/* Is this opcode a "heavy" pipeline breaker worth profiling? */ +static inline bool op_is_heavy(uint16_t opc) { + return opc == OP_FILTER || opc == OP_SORT || opc == OP_GROUP || + opc == OP_JOIN || opc == OP_WINDOW_JOIN || opc == OP_SELECT || + opc == OP_HEAD || opc == OP_TAIL || opc == OP_WINDOW || + opc == OP_PIVOT || + (opc >= OP_EXPAND && opc <= OP_KNN_RERANK); +} + +ray_t* exec_node(ray_graph_t* g, ray_op_t* op) { + if (!op) return ray_error("nyi", NULL); + + /* Per-op cancellation checkpoint. Long fused pipelines iterate + * exec_node many times; this catches Ctrl-C between operators + * without adding cost to the per-row hot path. */ + if (ray_interrupted()) return ray_error("cancel", "interrupted"); + + bool heavy = op_is_heavy(op->opcode); + bool profiling = g_ray_profile.active && heavy; + const char* oname = NULL; + if (heavy) { + oname = ray_opcode_name(op->opcode); + /* Relabel progress without touching counters — leaf ops that + * drive their own rows_done/rows_total still work; ops that + * don't get a spinner-style indeterminate bar until they + * either finish or emit their own update. */ + ray_progress_label(oname, NULL); + if (profiling) ray_profile_span_start(oname); + } + + ray_t* _prof_result = exec_node_inner(g, op); + + if (profiling) + ray_profile_span_end(oname); + + return _prof_result; +} + +static ray_t* exec_node_inner(ray_graph_t* g, ray_op_t* op) { + if (!op) return ray_error("nyi", NULL); + + switch (op->opcode) { + case OP_SCAN: { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + /* Resolve table: pad[0..1] stores table_id+1 (0 = default g->table) */ + uint16_t stored_table_id = 0; + memcpy(&stored_table_id, ext->base.pad, sizeof(uint16_t)); + ray_t* scan_tbl; + if (stored_table_id > 0 && g->tables && (stored_table_id - 1) < g->n_tables) { + scan_tbl = g->tables[stored_table_id - 1]; + } else { + scan_tbl = g->table; + } + if (!scan_tbl) return ray_error("schema", NULL); + ray_t* col = ray_table_get_col(scan_tbl, ext->sym); + if (!col) return ray_error("schema", NULL); + if (col->type == RAY_MAPCOMMON) + return materialize_mapcommon(col); + if (RAY_IS_PARTED(col->type)) { + /* Concat parted segments into flat vector (cold path) */ + int8_t base = (int8_t)RAY_PARTED_BASETYPE(col->type); + ray_t** sps = (ray_t**)ray_data(col); + int64_t total = ray_parted_nrows(col); + + /* RAY_STR: deep-copy to handle multi-pool segments */ + if (base == RAY_STR) + return parted_flatten_str(sps, col->len, total); + + uint8_t sba = (base == RAY_SYM) + ? parted_first_attrs(sps, col->len) : 0; + ray_t* flat = typed_vec_new(base, sba, total); + if (!flat || RAY_IS_ERR(flat)) return ray_error("oom", NULL); + flat->len = total; + ray_t** segs = sps; + size_t esz = (size_t)ray_sym_elem_size(base, sba); + int64_t off = 0; + for (int64_t s = 0; s < col->len; s++) { + if (segs[s] && segs[s]->len > 0 && + parted_seg_esz_ok(segs[s], base, (uint8_t)esz)) { + memcpy((char*)ray_data(flat) + off * esz, + ray_data(segs[s]), (size_t)segs[s]->len * esz); + off += segs[s]->len; + } else if (segs[s] && segs[s]->len > 0) { + memset((char*)ray_data(flat) + off * esz, 0, + (size_t)segs[s]->len * esz); + off += segs[s]->len; + } + } + return flat; + } + ray_retain(col); + return col; + } + + case OP_CONST: { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext || !ext->literal) return ray_error("nyi", NULL); + ray_retain(ext->literal); + return ext->literal; + } + + case OP_TIL: { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext || !ext->literal) return ray_error("nyi", NULL); + int64_t n = ext->literal->i64; + if (n <= 0) return ray_vec_new(RAY_I64, 0); + ray_t* vec = ray_vec_new(RAY_I64, n); + if (!vec || RAY_IS_ERR(vec)) return vec; + vec->len = n; + int64_t* d = (int64_t*)ray_data(vec); + for (int64_t i = 0; i < n; i++) d[i] = i; + return vec; + } + + /* Membership: col IN set_vec */ + case OP_IN: case OP_NOT_IN: { + ray_t* col = exec_node(g, op->inputs[0]); + if (!col || RAY_IS_ERR(col)) return col; + ray_t* set = exec_node(g, op->inputs[1]); + if (!set || RAY_IS_ERR(set)) { ray_release(col); return set; } + ray_t* result = exec_in(g, op, col, set); + ray_release(col); + ray_release(set); + return result; + } + + /* Unary element-wise */ + case OP_NEG: case OP_ABS: case OP_NOT: case OP_SQRT: + case OP_LOG: case OP_EXP: case OP_CEIL: case OP_FLOOR: case OP_ROUND: + case OP_ISNULL: case OP_CAST: + /* Binary element-wise */ + case OP_ADD: case OP_SUB: case OP_MUL: case OP_DIV: case OP_MOD: + case OP_EQ: case OP_NE: case OP_LT: case OP_LE: + case OP_GT: case OP_GE: case OP_AND: case OP_OR: + case OP_MIN2: case OP_MAX2: { + /* Try compiled expression first (fuses entire subtree) */ + if (g->table) { + int64_t nr = ray_table_nrows(g->table); + if (nr > 0) { + ray_expr_t ex; + if (expr_compile(g, g->table, op, &ex)) { + ray_t* vec = expr_eval_full(&ex, nr); + if (vec && !RAY_IS_ERR(vec)) return vec; + } + } + } + /* Fallback: recursive per-node evaluation */ + if (op->arity == 1) { + ray_t* input = exec_node(g, op->inputs[0]); + if (!input || RAY_IS_ERR(input)) return input; + ray_t* result = exec_elementwise_unary(g, op, input); + ray_release(input); + return result; + } else { + ray_t* lhs = exec_node(g, op->inputs[0]); + ray_t* rhs = exec_node(g, op->inputs[1]); + if (!lhs || RAY_IS_ERR(lhs)) { if (rhs && !RAY_IS_ERR(rhs)) ray_release(rhs); return lhs; } + if (!rhs || RAY_IS_ERR(rhs)) { ray_release(lhs); return rhs; } + ray_t* result = exec_elementwise_binary(g, op, lhs, rhs); + ray_release(lhs); + ray_release(rhs); + return result; + } + } + + /* Reductions */ + case OP_SUM: case OP_PROD: case OP_MIN: case OP_MAX: + case OP_COUNT: case OP_AVG: case OP_FIRST: case OP_LAST: + case OP_STDDEV: case OP_STDDEV_POP: case OP_VAR: case OP_VAR_POP: { + ray_t* input = exec_node(g, op->inputs[0]); + if (!input || RAY_IS_ERR(input)) return input; + /* Compact lazy selection before reducing — filters may have + * set g->selection without materializing a compacted table. */ + bool own_input = (input != g->table); + if (g->selection && input->type == RAY_TABLE) { + ray_t* compacted = sel_compact(g, input, g->selection); + if (own_input) ray_release(input); + ray_release(g->selection); + g->selection = NULL; + input = compacted; + own_input = true; + } + ray_t* result = exec_reduction(g, op, input); + if (own_input) ray_release(input); + return result; + } + + case OP_COUNT_DISTINCT: { + ray_t* input = exec_node(g, op->inputs[0]); + if (!input || RAY_IS_ERR(input)) return input; + ray_t* result = exec_count_distinct(g, op, input); + ray_release(input); + return result; + } + + case OP_FILTER: { + /* HAVING fusion: FILTER(GROUP) — evaluate the predicate against + * the GROUP result rather than the original input table. + * SCAN nodes in the predicate tree resolve column names via + * g->table, so we temporarily swap it to the GROUP output. */ + ray_op_t* filter_child = op->inputs[0]; + if (filter_child && filter_child->opcode == OP_GROUP) { + ray_t* group_result = exec_node(g, filter_child); + if (!group_result || RAY_IS_ERR(group_result)) + return group_result; + + ray_t* saved_table = g->table; + ray_t* saved_sel = g->selection; + g->table = group_result; + g->selection = NULL; + + ray_t* pred = exec_node(g, op->inputs[1]); + + g->table = saved_table; + g->selection = saved_sel; + + if (!pred || RAY_IS_ERR(pred)) { + ray_release(group_result); + return pred; + } + + ray_t* result = exec_filter(g, op, group_result, pred); + ray_release(pred); + ray_release(group_result); + return result; + } + + ray_t* input = exec_node(g, op->inputs[0]); + ray_t* pred = exec_node(g, op->inputs[1]); + if (!input || RAY_IS_ERR(input)) { if (pred && !RAY_IS_ERR(pred)) ray_release(pred); return input; } + if (!pred || RAY_IS_ERR(pred)) { ray_release(input); return pred; } + + /* Lazy filter: convert predicate to a rowsel (morsel-local + * index list) and install on g->selection instead of + * materializing a compacted table. Only for TABLE inputs — + * downstream ops (group-by) walk the rowsel directly, + * boundary ops (sort/join/window) compact on demand via + * sel_compact. Vector inputs must still materialize + * immediately since downstream ops like COUNT rely on + * compacted length. */ + if (pred->type == RAY_BOOL && input->type == RAY_TABLE) { + if (g->selection) { + /* Chained filter: refine the existing selection + * with this predicate in one walk. */ + ray_t* merged = ray_rowsel_refine(g->selection, pred); + ray_release(pred); + ray_release(g->selection); + g->selection = merged; /* may be NULL if all-pass */ + } else { + ray_t* new_sel = ray_rowsel_from_pred(pred); + ray_release(pred); + g->selection = new_sel; /* may be NULL if all-pass */ + } + return input; /* original table, not compacted */ + } + + /* Eager filter for vector inputs and non-BOOL predicates */ + ray_t* result = exec_filter(g, op, input, pred); + ray_release(input); + ray_release(pred); + return result; + } + + case OP_SORT: { + ray_t* input = exec_node(g, op->inputs[0]); + if (!input || RAY_IS_ERR(input)) return input; + ray_t* tbl = (input->type == RAY_TABLE) ? input : g->table; + /* Compact lazy selection before sort (needs dense data) */ + if (g->selection && tbl && !RAY_IS_ERR(tbl) && tbl->type == RAY_TABLE) { + ray_t* compacted = sel_compact(g, tbl, g->selection); + if (input != g->table) ray_release(input); + ray_release(g->selection); + g->selection = NULL; + input = compacted; + tbl = compacted; + } + ray_t* result = exec_sort(g, op, tbl, 0); + if (input != g->table) ray_release(input); + return result; + } + + case OP_GROUP: { + ray_t* tbl = g->table; + ray_t* owned_tbl = NULL; + + /* Factorized pipeline: detect OP_EXPAND (factorized) → OP_GROUP. + * When the group key is _src and there's a factorized expand node + * in the graph, execute the expand first and pipe its output as + * the group input table. This connects the expand→group pipeline + * that would otherwise disconnect since GROUP reads g->table. */ + { + ray_op_ext_t* gext = find_ext(g, op->id); + if (gext && gext->n_keys == 1) { + ray_op_ext_t* kx = find_ext(g, gext->keys[0]->id); + int64_t src_sym = ray_sym_intern("_src", 4); + if (kx && kx->base.opcode == OP_SCAN && kx->sym == src_sym) { + /* Find the factorized OP_EXPAND connected to this GROUP. + * The expand must be the one whose output the GROUP + * is scanning (connected via OP_SCAN inputs). */ + for (uint32_t ei = 0; ei < g->ext_count; ei++) { + ray_op_ext_t* ex = g->ext_nodes[ei]; + if (ex && ex->base.id < g->node_count + && g->nodes[ex->base.id].opcode == OP_EXPAND + && ex->graph.factorized) { + ray_op_t* expand_op = &g->nodes[ex->base.id]; + ray_t* expand_result = exec_node(g, expand_op); + if (!expand_result || RAY_IS_ERR(expand_result)) + return expand_result; + if (expand_result->type == RAY_TABLE) { + ray_t* saved = g->table; + g->table = expand_result; + ray_t* result = exec_group(g, op, expand_result, 0); + g->table = saved; + ray_release(expand_result); + return result; + } + ray_release(expand_result); + break; + } + } + } + } + } + + /* Lazy selection is consumed by exec_group itself — all + * paths (sequential, DA, radix-parallel) honour the + * bitmap via group_rows_range / radix scan loops. We + * must still clear g->selection *after* group runs so + * downstream ops (SORT etc.) don't try to sel_compact the + * aggregated output with a mismatched-length bitmap. */ + ray_t* result = exec_group(g, op, tbl, 0); + if (owned_tbl) ray_release(owned_tbl); + if (g->selection) { + ray_release(g->selection); + g->selection = NULL; + } + return result; + } + + case OP_PIVOT: { + ray_t* tbl = g->table; + ray_t* owned_tbl = NULL; + if (g->selection) { + ray_t* compacted = sel_compact(g, tbl, g->selection); + if (!compacted || RAY_IS_ERR(compacted)) return compacted; + ray_release(g->selection); + g->selection = NULL; + owned_tbl = compacted; + tbl = compacted; + } + ray_t* result = exec_pivot(g, op, tbl); + if (owned_tbl) ray_release(owned_tbl); + return result; + } + + case OP_JOIN: { + ray_t* left = exec_node(g, op->inputs[0]); + ray_t* right = exec_node(g, op->inputs[1]); + if (!left || RAY_IS_ERR(left)) { if (right && !RAY_IS_ERR(right)) ray_release(right); return left; } + if (!right || RAY_IS_ERR(right)) { ray_release(left); return right; } + /* Compact lazy selection before join (needs dense data) */ + if (g->selection && left && !RAY_IS_ERR(left) && left->type == RAY_TABLE) { + ray_t* compacted = sel_compact(g, left, g->selection); + ray_release(left); + ray_release(g->selection); + g->selection = NULL; + left = compacted; + } + ray_t* result = exec_join(g, op, left, right); + ray_release(left); + ray_release(right); + return result; + } + + case OP_ANTIJOIN: { + ray_t* left = exec_node(g, op->inputs[0]); + ray_t* right = exec_node(g, op->inputs[1]); + if (!left || RAY_IS_ERR(left)) { if (right && !RAY_IS_ERR(right)) ray_release(right); return left; } + if (!right || RAY_IS_ERR(right)) { ray_release(left); return right; } + if (g->selection && left && !RAY_IS_ERR(left) && left->type == RAY_TABLE) { + ray_t* compacted = sel_compact(g, left, g->selection); + ray_release(left); + ray_release(g->selection); + g->selection = NULL; + left = compacted; + } + ray_t* result = exec_antijoin(g, op, left, right); + ray_release(left); + ray_release(right); + return result; + } + + case OP_WINDOW_JOIN: { + ray_t* left = exec_node(g, op->inputs[0]); + ray_t* right = exec_node(g, op->inputs[1]); + if (!left || RAY_IS_ERR(left)) { if (right && !RAY_IS_ERR(right)) ray_release(right); return left; } + if (!right || RAY_IS_ERR(right)) { ray_release(left); return right; } + if (g->selection && left && !RAY_IS_ERR(left) && left->type == RAY_TABLE) { + ray_t* compacted = sel_compact(g, left, g->selection); + ray_release(left); + ray_release(g->selection); + g->selection = NULL; + left = compacted; + } + ray_t* result = exec_window_join(g, op, left, right); + ray_release(left); + ray_release(right); + return result; + } + + case OP_WINDOW: { + ray_t* input = exec_node(g, op->inputs[0]); + if (!input || RAY_IS_ERR(input)) return input; + ray_t* wdf = (input->type == RAY_TABLE) ? input : g->table; + /* Compact lazy selection before window (needs dense data) */ + if (g->selection && wdf && !RAY_IS_ERR(wdf) && wdf->type == RAY_TABLE) { + ray_t* compacted = sel_compact(g, wdf, g->selection); + if (input != g->table) ray_release(input); + ray_release(g->selection); + g->selection = NULL; + input = compacted; + wdf = compacted; + } + ray_t* result = exec_window(g, op, wdf); + if (input != g->table) ray_release(input); + return result; + } + + case OP_HEAD: { + ray_op_ext_t* ext = find_ext(g, op->id); + int64_t n = ext ? ext->sym : 10; + + /* Fused sort+limit: detect SORT child → only gather N rows */ + ray_op_t* child_op = op->inputs[0]; + if (child_op && child_op->opcode == OP_SORT) { + ray_t* sort_input = exec_node(g, child_op->inputs[0]); + if (!sort_input || RAY_IS_ERR(sort_input)) return sort_input; + ray_t* tbl = (sort_input->type == RAY_TABLE) ? sort_input : g->table; + /* Compact lazy selection before sort */ + if (g->selection && tbl && !RAY_IS_ERR(tbl) && tbl->type == RAY_TABLE) { + ray_t* compacted = sel_compact(g, tbl, g->selection); + if (sort_input != g->table) ray_release(sort_input); + ray_release(g->selection); + g->selection = NULL; + sort_input = compacted; + tbl = compacted; + } + ray_t* result = exec_sort(g, child_op, tbl, n); + if (sort_input != g->table) ray_release(sort_input); + return result; + } + + /* HEAD(GROUP) optimization: pass limit hint to exec_group + * so it can short-circuit the per-partition loop when all + * GROUP BY keys are MAPCOMMON. The normal HEAD logic below + * still trims the result to N rows regardless. */ + ray_t* input; + if (child_op && child_op->opcode == OP_GROUP) { + ray_t* tbl = g->table; + if (!tbl || RAY_IS_ERR(tbl)) return tbl; + ray_t* owned_tbl = NULL; + if (g->selection && tbl->type == RAY_TABLE) { + int needs = 0; + int64_t nc = ray_table_ncols(tbl); + for (int64_t c = 0; c < nc; c++) { + ray_t* col = ray_table_get_col_idx(tbl, c); + if (col && !RAY_IS_PARTED(col->type) + && col->type != RAY_MAPCOMMON) { + needs = 1; break; + } + } + if (needs) { + ray_t* compacted = sel_compact(g, tbl, g->selection); + if (!compacted || RAY_IS_ERR(compacted)) return compacted; + ray_release(g->selection); + g->selection = NULL; + owned_tbl = compacted; + tbl = compacted; + } + } + input = exec_group(g, child_op, tbl, n); + if (owned_tbl) ray_release(owned_tbl); + } else if (child_op && child_op->opcode == OP_FILTER) { + /* HEAD(FILTER): early-termination filter — gather only + * the first N matching rows instead of all matches. */ + ray_t* filter_input = exec_node(g, child_op->inputs[0]); + if (!filter_input || RAY_IS_ERR(filter_input)) + return filter_input; + + /* Compact lazy selection before filter evaluation */ + ray_t* ftbl = (filter_input->type == RAY_TABLE) + ? filter_input : g->table; + if (g->selection && ftbl && ftbl->type == RAY_TABLE) { + ray_t* compacted = sel_compact(g, ftbl, g->selection); + if (filter_input != g->table) ray_release(filter_input); + ray_release(g->selection); + g->selection = NULL; + filter_input = compacted; + ftbl = compacted; + } + + /* Swap table for predicate evaluation */ + ray_t* saved_table = g->table; + g->table = ftbl; + ray_t* pred = exec_node(g, child_op->inputs[1]); + g->table = saved_table; + + if (!pred || RAY_IS_ERR(pred)) { + if (filter_input != saved_table) + ray_release(filter_input); + return pred; + } + + ray_t* result = exec_filter_head(ftbl, pred, n); + ray_release(pred); + if (filter_input != saved_table) + ray_release(filter_input); + return result; + } else { + input = exec_node(g, op->inputs[0]); + } + if (!input || RAY_IS_ERR(input)) return input; + if (input->type == RAY_TABLE) { + int64_t ncols = ray_table_ncols(input); + int64_t nrows = ray_table_nrows(input); + if (n > nrows) n = nrows; + ray_t* result = ray_table_new(ncols); + for (int64_t c = 0; c < ncols; c++) { + ray_t* col = ray_table_get_col_idx(input, c); + int64_t name_id = ray_table_col_name(input, c); + if (!col) continue; + if (col->type == RAY_MAPCOMMON) { + ray_t* mc_head = materialize_mapcommon_head(col, n); + if (mc_head && !RAY_IS_ERR(mc_head)) { + result = ray_table_add_col(result, name_id, mc_head); + ray_release(mc_head); + } + continue; + } + if (RAY_IS_PARTED(col->type)) { + /* Copy first n rows from parted segments */ + int8_t base = (int8_t)RAY_PARTED_BASETYPE(col->type); + ray_t** sp = (ray_t**)ray_data(col); + ray_t* head_vec; + if (base == RAY_STR) { + head_vec = parted_head_str(sp, col->len, n); + } else { + uint8_t ba = (base == RAY_SYM) + ? parted_first_attrs(sp, col->len) : 0; + uint8_t esz = ray_sym_elem_size(base, ba); + head_vec = typed_vec_new(base, ba, n); + if (head_vec && !RAY_IS_ERR(head_vec)) { + head_vec->len = n; + ray_t** segs = (ray_t**)ray_data(col); + int64_t remaining = n; + int64_t dst_off = 0; + for (int64_t s = 0; s < col->len && remaining > 0; s++) { + if (!segs[s]) continue; + int64_t take = segs[s]->len; + if (take > remaining) take = remaining; + if (parted_seg_esz_ok(segs[s], base, esz)) { + memcpy((char*)ray_data(head_vec) + dst_off * esz, + ray_data(segs[s]), (size_t)take * esz); + } else { + memset((char*)ray_data(head_vec) + dst_off * esz, + 0, (size_t)take * esz); + } + dst_off += take; + remaining -= take; + } + } + } + result = ray_table_add_col(result, name_id, head_vec); + ray_release(head_vec); + } else { + /* Flat column: direct copy */ + uint8_t esz = col_esz(col); + ray_t* head_vec = col_vec_new(col, n); + if (head_vec && !RAY_IS_ERR(head_vec)) { + head_vec->len = n; + memcpy(ray_data(head_vec), ray_data(col), + (size_t)n * esz); + col_propagate_nulls_range(head_vec, 0, col, 0, n); + } + result = ray_table_add_col(result, name_id, head_vec); + ray_release(head_vec); + } + } + ray_release(input); + return result; + } + if (n > input->len) n = input->len; + /* Materialized copy for vector head */ + uint8_t esz = col_esz(input); + ray_t* result = col_vec_new(input, n); + if (result && !RAY_IS_ERR(result)) { + result->len = n; + memcpy(ray_data(result), ray_data(input), (size_t)n * esz); + col_propagate_nulls_range(result, 0, input, 0, n); + } + ray_release(input); + return result; + } + + case OP_TAIL: { + ray_op_ext_t* ext = find_ext(g, op->id); + ray_t* input = exec_node(g, op->inputs[0]); + if (!input || RAY_IS_ERR(input)) return input; + int64_t n = ext ? ext->sym : 10; + if (input->type == RAY_TABLE) { + int64_t ncols = ray_table_ncols(input); + int64_t nrows = ray_table_nrows(input); + if (n > nrows) n = nrows; + int64_t skip = nrows - n; + ray_t* result = ray_table_new(ncols); + for (int64_t c = 0; c < ncols; c++) { + ray_t* col = ray_table_get_col_idx(input, c); + int64_t name_id = ray_table_col_name(input, c); + if (!col) continue; + if (col->type == RAY_MAPCOMMON) { + /* Materialize last N rows from MAPCOMMON partitions */ + ray_t** mc_ptrs = (ray_t**)ray_data(col); + ray_t* kv = mc_ptrs[0]; + ray_t* rc = mc_ptrs[1]; + int64_t n_parts = kv->len; + size_t esz = (size_t)col_esz(kv); + const char* kdata = (const char*)ray_data(kv); + const int64_t* counts = (const int64_t*)ray_data(rc); + ray_t* flat = col_vec_new(kv, n); + if (flat && !RAY_IS_ERR(flat)) { + flat->len = n; + char* out = (char*)ray_data(flat); + /* Walk partitions from end, fill output from end */ + int64_t remaining = n; + int64_t dst = n; + for (int64_t p = n_parts - 1; p >= 0 && remaining > 0; p--) { + int64_t take = counts[p]; + if (take > remaining) take = remaining; + dst -= take; + for (int64_t r = 0; r < take; r++) + memcpy(out + (dst + r) * esz, kdata + (size_t)p * esz, esz); + remaining -= take; + } + } + result = ray_table_add_col(result, name_id, flat); + ray_release(flat); + continue; + } + if (RAY_IS_PARTED(col->type)) { + /* Copy last N rows from parted segments */ + int8_t base = (int8_t)RAY_PARTED_BASETYPE(col->type); + ray_t** tsp = (ray_t**)ray_data(col); + ray_t* tail_vec; + if (base == RAY_STR) { + tail_vec = parted_tail_str(tsp, col->len, n); + } else { + uint8_t tba = (base == RAY_SYM) + ? parted_first_attrs(tsp, col->len) : 0; + uint8_t esz = ray_sym_elem_size(base, tba); + tail_vec = typed_vec_new(base, tba, n); + if (tail_vec && !RAY_IS_ERR(tail_vec)) { + tail_vec->len = n; + ray_t** segs = (ray_t**)ray_data(col); + int64_t remaining = n; + int64_t dst = n; + for (int64_t s = col->len - 1; s >= 0 && remaining > 0; s--) { + if (!segs[s]) continue; + int64_t take = segs[s]->len; + if (take > remaining) take = remaining; + dst -= take; + if (parted_seg_esz_ok(segs[s], base, esz)) { + memcpy((char*)ray_data(tail_vec) + (size_t)dst * esz, + (char*)ray_data(segs[s]) + (size_t)(segs[s]->len - take) * esz, + (size_t)take * esz); + } else { + memset((char*)ray_data(tail_vec) + (size_t)dst * esz, + 0, (size_t)take * esz); + } + remaining -= take; + } + } + } + result = ray_table_add_col(result, name_id, tail_vec); + ray_release(tail_vec); + } else { + /* Flat column: direct copy */ + uint8_t esz = col_esz(col); + ray_t* tail_vec = col_vec_new(col, n); + if (tail_vec && !RAY_IS_ERR(tail_vec)) { + tail_vec->len = n; + memcpy(ray_data(tail_vec), + (char*)ray_data(col) + (size_t)skip * esz, + (size_t)n * esz); + col_propagate_nulls_range(tail_vec, 0, col, skip, n); + } + result = ray_table_add_col(result, name_id, tail_vec); + ray_release(tail_vec); + } + } + ray_release(input); + return result; + } + if (n > input->len) n = input->len; + int64_t skip = input->len - n; + uint8_t esz = col_esz(input); + ray_t* result = col_vec_new(input, n); + if (result && !RAY_IS_ERR(result)) { + result->len = n; + memcpy(ray_data(result), + (char*)ray_data(input) + (size_t)skip * esz, + (size_t)n * esz); + col_propagate_nulls_range(result, 0, input, skip, n); + } + ray_release(input); + return result; + } + + case OP_IF: { + return exec_if(g, op); + } + + case OP_LIKE: { + return exec_like(g, op); + } + + case OP_ILIKE: { + return exec_ilike(g, op); + } + + case OP_UPPER: case OP_LOWER: case OP_TRIM: { + return exec_string_unary(g, op); + } + case OP_STRLEN: { + return exec_strlen(g, op); + } + case OP_SUBSTR: { + return exec_substr(g, op); + } + case OP_REPLACE: { + return exec_replace(g, op); + } + case OP_CONCAT: { + return exec_concat(g, op); + } + + case OP_EXTRACT: { + return exec_extract(g, op); + } + + case OP_DATE_TRUNC: { + return exec_date_trunc(g, op); + } + + case OP_ALIAS: { + return exec_node(g, op->inputs[0]); + } + + case OP_MATERIALIZE: { + return exec_node(g, op->inputs[0]); + } + + case OP_SELECT: { + /* Column projection: select/compute columns from input table */ + ray_t* input = exec_node(g, op->inputs[0]); + if (!input || RAY_IS_ERR(input)) return input; + if (input->type != RAY_TABLE) { + ray_release(input); + return ray_error("nyi", NULL); + } + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) { ray_release(input); return ray_error("nyi", NULL); } + uint8_t n_cols = ext->sort.n_cols; + ray_op_t** columns = ext->sort.columns; + ray_t* result = ray_table_new(n_cols); + + /* Set g->table so SCAN nodes inside expressions resolve correctly */ + ray_t* saved_table = g->table; + g->table = input; + + for (uint8_t c = 0; c < n_cols; c++) { + if (columns[c]->opcode == OP_SCAN) { + /* Direct column reference — copy from input table */ + ray_op_ext_t* col_ext = find_ext(g, columns[c]->id); + if (!col_ext) continue; + int64_t name_id = col_ext->sym; + ray_t* src_col = ray_table_get_col(input, name_id); + if (src_col) { + ray_retain(src_col); + result = ray_table_add_col(result, name_id, src_col); + ray_release(src_col); + } + } else { + /* Expression column — evaluate against input table */ + ray_t* vec = exec_node(g, columns[c]); + if (!vec || RAY_IS_ERR(vec)) { + ray_release(result); + g->table = saved_table; + ray_release(input); + return vec ? vec : ray_error("nyi", NULL); + } + /* Broadcast scalar atoms to full column vectors */ + if (vec->type < 0) { + int64_t nr = ray_table_nrows(input); + ray_t* col = broadcast_scalar(vec, nr); + ray_release(vec); + vec = col; + if (!vec || RAY_IS_ERR(vec)) { + ray_release(result); + g->table = saved_table; + ray_release(input); + return vec ? vec : ray_error("nyi", NULL); + } + } + /* Synthetic name: _expr_0, _expr_1, ... */ + char name_buf[16]; + int n = 0; + name_buf[n++] = '_'; name_buf[n++] = 'e'; + if (c >= 100) name_buf[n++] = '0' + (c / 100); + if (c >= 10) name_buf[n++] = '0' + ((c / 10) % 10); + name_buf[n++] = '0' + (c % 10); + int64_t name_id = ray_sym_intern(name_buf, (size_t)n); + result = ray_table_add_col(result, name_id, vec); + ray_release(vec); + } + } + + g->table = saved_table; + ray_release(input); + return result; + } + + case OP_EXPAND: { + ray_t* src = exec_node(g, op->inputs[0]); + if (!src || RAY_IS_ERR(src)) return src; + ray_t* result = exec_expand(g, op, src); + ray_release(src); + return result; + } + + case OP_VAR_EXPAND: { + ray_t* start = exec_node(g, op->inputs[0]); + if (!start || RAY_IS_ERR(start)) return start; + ray_t* result = exec_var_expand(g, op, start); + ray_release(start); + return result; + } + + case OP_SHORTEST_PATH: { + ray_t* src = exec_node(g, op->inputs[0]); + ray_t* dst = exec_node(g, op->inputs[1]); + if (!src || RAY_IS_ERR(src)) { + if (dst && !RAY_IS_ERR(dst)) ray_release(dst); + return src; + } + if (!dst || RAY_IS_ERR(dst)) { ray_release(src); return dst; } + ray_t* result = exec_shortest_path(g, op, src, dst); + ray_release(src); + ray_release(dst); + return result; + } + + case OP_WCO_JOIN: { + return exec_wco_join(g, op); + } + + case OP_PAGERANK: { + return exec_pagerank(g, op); + } + + case OP_CONNECTED_COMP: { + return exec_connected_comp(g, op); + } + + case OP_DIJKSTRA: { + ray_t* src = exec_node(g, op->inputs[0]); + if (!src || RAY_IS_ERR(src)) return src; + ray_t* dst = op->inputs[1] ? exec_node(g, op->inputs[1]) : NULL; + if (dst && RAY_IS_ERR(dst)) { ray_release(src); return dst; } + ray_t* result = exec_dijkstra(g, op, src, dst); + ray_release(src); + if (dst) ray_release(dst); + return result; + } + + case OP_LOUVAIN: { + return exec_louvain(g, op); + } + + case OP_DEGREE_CENT: { + return exec_degree_cent(g, op); + } + + case OP_TOPSORT: { + return exec_topsort(g, op); + } + + case OP_DFS: { + ray_t* src = exec_node(g, op->inputs[0]); + if (!src || RAY_IS_ERR(src)) return src; + ray_t* result = exec_dfs(g, op, src); + ray_release(src); + return result; + } + + case OP_CLUSTER_COEFF: { + return exec_cluster_coeff(g, op); + } + + case OP_BETWEENNESS: { + return exec_betweenness(g, op); + } + + case OP_CLOSENESS: { + return exec_closeness(g, op); + } + + case OP_MST: { + return exec_mst(g, op); + } + + case OP_RANDOM_WALK: { + ray_t* src = exec_node(g, op->inputs[0]); + if (!src || RAY_IS_ERR(src)) return src; + ray_t* result = exec_random_walk(g, op, src); + ray_release(src); + return result; + } + + case OP_ASTAR: { + ray_t* src = exec_node(g, op->inputs[0]); + if (!src || RAY_IS_ERR(src)) return src; + ray_t* dst = exec_node(g, op->inputs[1]); + if (!dst || RAY_IS_ERR(dst)) { ray_release(src); return dst; } + ray_t* result = exec_astar(g, op, src, dst); + ray_release(src); ray_release(dst); + return result; + } + + case OP_K_SHORTEST: { + ray_t* src = exec_node(g, op->inputs[0]); + if (!src || RAY_IS_ERR(src)) return src; + ray_t* dst = exec_node(g, op->inputs[1]); + if (!dst || RAY_IS_ERR(dst)) { ray_release(src); return dst; } + ray_t* result = exec_k_shortest(g, op, src, dst); + ray_release(src); ray_release(dst); + return result; + } + + case OP_COSINE_SIM: { + ray_t* emb = exec_node(g, op->inputs[0]); + if (!emb || RAY_IS_ERR(emb)) return emb; + ray_t* result = exec_cosine_sim(g, op, emb); + ray_release(emb); + return result; + } + case OP_EUCLIDEAN_DIST: { + ray_t* emb = exec_node(g, op->inputs[0]); + if (!emb || RAY_IS_ERR(emb)) return emb; + ray_t* result = exec_euclidean_dist(g, op, emb); + ray_release(emb); + return result; + } + case OP_KNN: { + ray_t* emb = exec_node(g, op->inputs[0]); + if (!emb || RAY_IS_ERR(emb)) return emb; + ray_t* result = exec_knn(g, op, emb); + ray_release(emb); + return result; + } + case OP_HNSW_KNN: { + return exec_hnsw_knn(g, op); + } + case OP_ANN_RERANK: { + ray_t* src = exec_node(g, op->inputs[0]); + if (!src || RAY_IS_ERR(src)) return src; + ray_t* result = exec_ann_rerank(g, op, src); + ray_release(src); + return result; + } + case OP_KNN_RERANK: { + ray_t* src = exec_node(g, op->inputs[0]); + if (!src || RAY_IS_ERR(src)) return src; + ray_t* result = exec_knn_rerank(g, op, src); + ray_release(src); + return result; + } + + default: + return ray_error("nyi", NULL); + } +} + +/* ============================================================================ + * ray_execute -- top-level entry point (lazy pool init) + * ============================================================================ */ + +/* Merge two partial results from partition-streamed execution. + * Concatenates table columns or vectors across segments. */ +static ray_t* ray_result_merge(ray_t* accum, ray_t* partial) { + if (!accum || RAY_IS_ERR(accum)) { + if (partial && !RAY_IS_ERR(partial)) ray_retain(partial); + return partial; + } + if (!partial || RAY_IS_ERR(partial)) { + ray_retain(accum); + return accum; + } + + /* Table merge: concatenate each column */ + if (accum->type == RAY_TABLE && partial->type == RAY_TABLE) { + int64_t ncols = ray_table_ncols(accum); + ray_t* merged = ray_table_new(ncols); + for (int64_t c = 0; c < ncols; c++) { + int64_t name_id = ray_table_col_name(accum, c); + ray_t* a_col = ray_table_get_col_idx(accum, c); + ray_t* p_col = ray_table_get_col_idx(partial, c); + if (!a_col || !p_col) { + ray_release(merged); + return ray_error("schema", NULL); + } + ray_t* combined = ray_vec_concat(a_col, p_col); + if (!combined || RAY_IS_ERR(combined)) { + ray_release(merged); + return combined; + } + merged = ray_table_add_col(merged, name_id, combined); + ray_release(combined); + } + return merged; + } + + /* Vector merge: concatenate directly */ + if (accum->type != RAY_TABLE && partial->type != RAY_TABLE) { + return ray_vec_concat(accum, partial); + } + + return ray_error("type", NULL); +} + +/* Build a flat table containing one segment's columns from a parted table. + * For each parted column, extracts segs[seg_idx] as a flat vector. + * MAPCOMMON columns are materialized for segment seg_idx: the partition key + * value is broadcast to fill seg_rows elements. + * Non-parted columns are retained as-is. */ +static ray_t* build_segment_table(ray_t* parted_tbl, int32_t seg_idx) { + int64_t ncols = ray_table_ncols(parted_tbl); + ray_t* seg_tbl = ray_table_new(ncols); + if (!seg_tbl || RAY_IS_ERR(seg_tbl)) return seg_tbl; + + /* Find segment row count from first parted column */ + int64_t seg_rows = 0; + for (int64_t c = 0; c < ncols; c++) { + ray_t* col = ray_table_get_col_idx(parted_tbl, c); + if (col && RAY_IS_PARTED(col->type)) { + ray_t** segs = (ray_t**)ray_data(col); + if (seg_idx < col->len && segs[seg_idx]) + seg_rows = segs[seg_idx]->len; + break; + } + } + + for (int64_t c = 0; c < ncols; c++) { + int64_t name_id = ray_table_col_name(parted_tbl, c); + ray_t* col = ray_table_get_col_idx(parted_tbl, c); + if (!col) continue; + + if (col->type == RAY_MAPCOMMON) { + /* Materialize partition key for this segment: broadcast key + * value across seg_rows elements. */ + if (col->len < 2) { + ray_release(seg_tbl); + return ray_error("schema", NULL); + } + ray_t** mc_ptrs = (ray_t**)ray_data(col); + ray_t* kv = mc_ptrs[0]; /* key_values */ + if (!kv || seg_idx >= kv->len) { + ray_release(seg_tbl); + return ray_error("schema", NULL); + } + int8_t kv_type = kv->type; + size_t esz = (size_t)ray_sym_elem_size(kv_type, kv->attrs); + if (esz == 0) { + ray_release(seg_tbl); + return ray_error("type", NULL); + } + ray_t* flat = ray_vec_new(kv_type, seg_rows); + if (!flat || RAY_IS_ERR(flat)) { + ray_release(seg_tbl); + return ray_error("oom", NULL); + } + flat->len = seg_rows; + const char* src = (const char*)ray_data(kv) + (size_t)seg_idx * esz; + char* dst = (char*)ray_data(flat); + if (esz == 8) { + uint64_t v; memcpy(&v, src, 8); + for (int64_t r = 0; r < seg_rows; r++) + ((uint64_t*)dst)[r] = v; + } else if (esz == 4) { + uint32_t v; memcpy(&v, src, 4); + for (int64_t r = 0; r < seg_rows; r++) + ((uint32_t*)dst)[r] = v; + } else { + for (int64_t r = 0; r < seg_rows; r++) + memcpy(dst + r * esz, src, esz); + } + seg_tbl = ray_table_add_col(seg_tbl, name_id, flat); + ray_release(flat); + } else if (RAY_IS_PARTED(col->type)) { + ray_t** segs = (ray_t**)ray_data(col); + if (seg_idx >= col->len || !segs[seg_idx]) { + ray_release(seg_tbl); + return ray_error("schema", NULL); + } + ray_retain(segs[seg_idx]); + seg_tbl = ray_table_add_col(seg_tbl, name_id, segs[seg_idx]); + ray_release(segs[seg_idx]); + } else { + /* Non-parted, non-MAPCOMMON column in a parted table: + * streaming should have been rejected by ray_execute(). + * Error here as defense-in-depth to avoid silent duplication. */ + ray_release(seg_tbl); + return ray_error("schema", NULL); + } + } + return seg_tbl; +} + +/* Is this opcode safe for segment streaming with concatenation merge? + * Only element-wise, scan, filter, project, and alias ops produce + * results that can be correctly concatenated across segments. + * Everything else (joins, aggregations, sorts, graph ops, etc.) + * requires specialized merge or global state. */ +static bool op_streamable(uint16_t opc) { + switch (opc) { + /* Data access (OP_CONST excluded: vector constants have total-row + * length and produce length mismatches with per-segment data. + * Scalar constants are checked separately in dag_can_stream.) */ + case OP_SCAN: + /* Element-wise unary */ + case OP_NEG: case OP_ABS: case OP_NOT: case OP_SQRT: + case OP_LOG: case OP_EXP: case OP_CEIL: case OP_FLOOR: case OP_ROUND: + case OP_ISNULL: case OP_CAST: + /* Element-wise binary */ + case OP_ADD: case OP_SUB: case OP_MUL: case OP_DIV: case OP_MOD: + case OP_EQ: case OP_NE: case OP_LT: case OP_LE: + case OP_GT: case OP_GE: case OP_AND: case OP_OR: + case OP_MIN2: case OP_MAX2: case OP_IF: case OP_IN: case OP_NOT_IN: + /* String element-wise */ + case OP_LIKE: case OP_ILIKE: case OP_UPPER: case OP_LOWER: + case OP_STRLEN: case OP_SUBSTR: case OP_REPLACE: case OP_TRIM: + case OP_CONCAT: + /* Temporal element-wise */ + case OP_EXTRACT: case OP_DATE_TRUNC: + /* Structure */ + case OP_FILTER: case OP_SELECT: case OP_ALIAS: + case OP_MATERIALIZE: + return true; + default: + return false; + } +} + +/* Walk the root's input subtree to check if it reaches a default-table + * OP_SCAN. Returns true if found, false otherwise. Also rejects the + * subtree (sets *ok = false) on vector constants or secondary-table scans. + * + * Several streamable ops store extra operands in ext nodes rather than in + * the standard inputs[] array. These hidden children must be walked too: + * OP_SELECT — ext->sort.columns[0..n_cols-1] + * OP_IF — else branch: g->nodes[(uint32_t)(uintptr_t)ext->literal] + * OP_SUBSTR — length arg: g->nodes[(uint32_t)(uintptr_t)ext->literal] + * OP_REPLACE — replacement: g->nodes[(uint32_t)(uintptr_t)ext->literal] + * OP_CONCAT — args 2+: g->nodes[trail[i-2]] (uint32_t[] after ext) */ +static bool subtree_has_default_scan(ray_graph_t* g, ray_op_t* op, bool* ok, + uint64_t* visited) { + if (!op || !*ok) return false; + /* Skip already-visited nodes (DAGs may share subexpressions). */ + uint32_t nid = op->id; + if (nid < g->node_count) { + if (visited[nid / 64] & (1ULL << (nid % 64))) return false; + visited[nid / 64] |= (1ULL << (nid % 64)); + } + uint16_t opc = op->opcode; + if (opc == OP_CONST) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (ext && ext->literal && !ray_is_atom(ext->literal)) + *ok = false; /* vector constant — can't stream */ + return false; + } + if (opc == OP_SCAN) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (ext) { + uint16_t stored_id = 0; + memcpy(&stored_id, ext->base.pad, sizeof(uint16_t)); + if (stored_id > 0) { *ok = false; return false; } + return true; /* default-table scan */ + } + return false; + } + if (!op_streamable(opc)) { *ok = false; return false; } + bool found = false; + for (uint8_t i = 0; i < op->arity && i < 2; i++) + found |= subtree_has_default_scan(g, op->inputs[i], ok, visited); + + /* Walk hidden operands stored in ext nodes */ + if (opc == OP_SELECT) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (ext) { + for (uint8_t c = 0; c < ext->sort.n_cols && *ok; c++) + found |= subtree_has_default_scan(g, ext->sort.columns[c], ok, visited); + } + } else if (opc == OP_IF || opc == OP_SUBSTR || opc == OP_REPLACE) { + /* 3rd operand stored as node index in ext->literal */ + ray_op_ext_t* ext = find_ext(g, op->id); + if (ext) { + uint32_t child_id = (uint32_t)(uintptr_t)ext->literal; + if (child_id < g->node_count) + found |= subtree_has_default_scan(g, &g->nodes[child_id], ok, visited); + } + } else if (opc == OP_CONCAT) { + /* n_args in ext->sym, args 2+ as uint32_t[] trailing after ext */ + ray_op_ext_t* ext = find_ext(g, op->id); + if (ext) { + int n_args = (int)ext->sym; + uint32_t* trail = (uint32_t*)((char*)(ext + 1)); + for (int i = 2; i < n_args && *ok; i++) { + if (trail[i - 2] < g->node_count) + found |= subtree_has_default_scan(g, &g->nodes[trail[i - 2]], ok, visited); + } + } + } + return found; +} + +/* Check whether a DAG rooted at `root` can be correctly executed via + * segment streaming with simple concatenation merge. + * Every node in the root's subtree must be streamable, and at least one + * OP_SCAN must read from the default table (stored_table_id == 0). + * OP_CONST is allowed only for scalar (atom) literals — vector constants + * have total-row length and would mismatch per-segment data. + * OP_SCAN nodes referencing secondary tables (stored_table_id > 0) + * disqualify streaming, since the loop only swaps g->table. + * DAGs that never scan the default table (e.g. a bare OP_CONST behind + * passthrough ops) are rejected to avoid duplicating table-independent + * results across partitions. */ +static bool dag_can_stream(ray_graph_t* g, ray_op_t* root) { + uint32_t n_words = (g->node_count + 63) / 64; + uint64_t stack_buf[16]; /* covers DAGs up to 1024 nodes */ + ray_t* visited_hdr = NULL; + uint64_t* visited; + if (n_words <= 16) { + visited = stack_buf; + } else { + visited = (uint64_t*)scratch_alloc(&visited_hdr, n_words * 8); + if (!visited) return false; + } + memset(visited, 0, n_words * 8); + bool ok = true; + bool has_default_scan = subtree_has_default_scan(g, root, &ok, visited); + if (visited_hdr) scratch_free(visited_hdr); + return ok && has_default_scan; +} + +static ray_t* ray_execute_inner(ray_graph_t* g, ray_op_t* root); + +ray_t* ray_execute(ray_graph_t* g, ray_op_t* root) { + ray_t* r = ray_execute_inner(g, root); + /* End the current progress tracking session. A no-op when no + * callback is registered; otherwise emits the final "100% done" + * tick (only if the bar was actually shown). */ + ray_progress_end(); + return r; +} + +static ray_t* ray_execute_inner(ray_graph_t* g, ray_op_t* root) { + if (!g || !root) return ray_error("nyi", NULL); + + /* Lazy-init the global thread pool on first call */ + ray_pool_t* pool = ray_pool_get(); + + /* Reset cancellation flag at the start of each query */ + if (pool) + atomic_store_explicit(&pool->cancelled, 0, memory_order_relaxed); + + /* Detect streaming mode: check if g->table has parted columns. + * All non-MAPCOMMON columns must be parted; a flat (non-parted) + * column would be duplicated across every segment table, producing + * wrong results after concatenation merge. + * All parted columns must agree on segment count — a mismatch is + * a malformed table and is rejected upfront. */ + int32_t seg_count = 0; + if (g->table) { + bool has_flat = false; + for (int64_t c = 0; c < ray_table_ncols(g->table); c++) { + ray_t* col = ray_table_get_col_idx(g->table, c); + if (!col) continue; + if (RAY_IS_PARTED(col->type)) { + if (seg_count == 0) + seg_count = (int32_t)col->len; + else if ((int32_t)col->len != seg_count) + return ray_error("schema", NULL); + } else if (col->type != RAY_MAPCOMMON) { + has_flat = true; + } + } + if (has_flat) + seg_count = 0; /* fall back to flat materialization */ + } + + if (seg_count == 0 || !dag_can_stream(g, root)) { + /* Non-parted table or DAG contains ops that need specialized merge: + * use existing flat-materialization path. */ + ray_t* result = exec_node(g, root); + if (g->selection && result && !RAY_IS_ERR(result) + && result->type == RAY_TABLE) { + ray_t* compacted = sel_compact(g, result, g->selection); + ray_release(result); + ray_release(g->selection); + g->selection = NULL; + result = compacted; + } + return result; + } + + /* Streaming mode: find seg_mask from optimizer (if any) */ + uint64_t* seg_mask = NULL; + int64_t seg_mask_count = 0; + for (uint32_t e = 0; e < g->ext_count; e++) { + if (g->ext_nodes[e] && g->ext_nodes[e]->seg_mask) { + seg_mask = g->ext_nodes[e]->seg_mask; + seg_mask_count = g->ext_nodes[e]->seg_mask_count; + break; + } + } + + /* Validate mask covers all segments — a mismatch means the + * MAPCOMMON key count disagrees with the parted column segment + * count, which is a schema error. Surface it rather than + * silently dropping data. */ + if (seg_mask && seg_mask_count != (int64_t)seg_count) + return ray_error("schema", NULL); + + ray_t* saved_table = g->table; + ray_t* result = NULL; + + for (int32_t s = 0; s < seg_count; s++) { + /* Check pruning mask */ + if (seg_mask && !(seg_mask[s / 64] & (1ULL << (s % 64)))) + continue; + + /* Check cancellation */ + if (pool && atomic_load_explicit(&pool->cancelled, memory_order_relaxed)) { + g->table = saved_table; + if (g->selection) { ray_release(g->selection); g->selection = NULL; } + ray_release(result); + return ray_error("cancel", NULL); + } + + /* Build flat table for this segment and swap g->table. + * All operators (OP_SCAN, GROUP, expr_compile, etc.) see flat + * columns via g->table, so no special-casing is needed. */ + ray_t* seg_tbl = build_segment_table(saved_table, s); + if (!seg_tbl || RAY_IS_ERR(seg_tbl)) { + g->table = saved_table; + if (g->selection) { ray_release(g->selection); g->selection = NULL; } + ray_release(result); + return seg_tbl; + } + g->table = seg_tbl; + if (g->selection) ray_release(g->selection); + g->selection = NULL; + + ray_t* partial = exec_node(g, root); + + /* Compact lazy selection for this segment */ + if (g->selection && partial && !RAY_IS_ERR(partial) + && partial->type == RAY_TABLE) { + ray_t* compacted = sel_compact(g, partial, g->selection); + ray_release(partial); + ray_release(g->selection); + g->selection = NULL; + partial = compacted; + } + + g->table = saved_table; + ray_release(seg_tbl); + + if (!partial || RAY_IS_ERR(partial)) { + if (g->selection) { ray_release(g->selection); g->selection = NULL; } + ray_release(result); + return partial; + } + + /* Merge partial into accumulator */ + ray_t* merged = ray_result_merge(result, partial); + ray_release(result); + ray_release(partial); + if (!merged || RAY_IS_ERR(merged)) { + if (g->selection) { ray_release(g->selection); g->selection = NULL; } + return merged; + } + result = merged; + } + + /* Clean up any lingering selection from the last segment iteration */ + if (g->selection) { ray_release(g->selection); g->selection = NULL; } + + /* All segments pruned: execute DAG on empty table to get correct + * output schema (handles SELECT/PROJECT that reshape columns). + * Build a fresh 0-row table — do not mutate shared source vectors. */ + if (!result) { + int64_t ncols = ray_table_ncols(saved_table); + ray_t* empty_tbl = ray_table_new(ncols); + if (empty_tbl && !RAY_IS_ERR(empty_tbl)) { + for (int64_t c = 0; c < ncols; c++) { + int64_t name_id = ray_table_col_name(saved_table, c); + ray_t* col = ray_table_get_col_idx(saved_table, c); + if (!col) continue; + int8_t base = col->type; + if (col->type == RAY_MAPCOMMON) { + ray_t** mc = (ray_t**)ray_data(col); + base = mc[0] ? mc[0]->type : RAY_I64; + } else if (RAY_IS_PARTED(col->type)) { + base = (int8_t)RAY_PARTED_BASETYPE(col->type); + } + ray_t* ecol = ray_vec_new(base, 0); + if (!ecol || RAY_IS_ERR(ecol)) { + /* ray_vec_new rejects RAY_LIST (type 0) and other + * non-standard types; fall back to a raw 0-length + * block with the correct type tag. */ + ecol = ray_alloc(0); + if (!ecol || RAY_IS_ERR(ecol)) continue; + ecol->type = base; + ecol->len = 0; + } + empty_tbl = ray_table_add_col(empty_tbl, name_id, ecol); + ray_release(ecol); + } + g->table = empty_tbl; + if (g->selection) ray_release(g->selection); + g->selection = NULL; + result = exec_node(g, root); + if (g->selection) { + ray_release(g->selection); + g->selection = NULL; + } + g->table = saved_table; + ray_release(empty_tbl); + } + } + + if (!result) return ray_error("oom", NULL); + return result; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/exec.h b/crates/rayforce-sys/vendor/rayforce/src/ops/exec.h new file mode 100644 index 0000000..396677e --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/exec.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_EXEC_H +#define RAY_EXEC_H + +#include "ops.h" + +#endif /* RAY_EXEC_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/expr.c b/crates/rayforce-sys/vendor/rayforce/src/ops/expr.c new file mode 100644 index 0000000..b0f2da6 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/expr.c @@ -0,0 +1,1776 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ops/internal.h" + +static bool atom_to_numeric(ray_t* atom, double* out_f, int64_t* out_i, bool* out_is_f64) { + if (!atom || !ray_is_atom(atom)) return false; + switch (atom->type) { + case -RAY_F64: + *out_f = atom->f64; + *out_i = (int64_t)atom->f64; + *out_is_f64 = true; + return true; + case -RAY_I64: + case -RAY_SYM: + case -RAY_DATE: + case -RAY_TIME: + case -RAY_TIMESTAMP: + *out_i = atom->i64; + *out_f = (double)atom->i64; + *out_is_f64 = false; + return true; + case -RAY_I32: + *out_i = (int64_t)atom->i32; + *out_f = (double)atom->i32; + *out_is_f64 = false; + return true; + case -RAY_I16: + *out_i = (int64_t)atom->i16; + *out_f = (double)atom->i16; + *out_is_f64 = false; + return true; + case -RAY_U8: + case -RAY_BOOL: + *out_i = (int64_t)atom->u8; + *out_f = (double)atom->u8; + *out_is_f64 = false; + return true; + default: + return false; + } +} + +/* Evaluate a numeric constant sub-expression from op graph. + * Supports CONST and arithmetic trees over constant children. */ +static bool eval_const_numeric_expr(ray_graph_t* g, ray_op_t* op, + double* out_f, int64_t* out_i, bool* out_is_f64) { + if (!g || !op || !out_f || !out_i || !out_is_f64) return false; + + if (op->opcode == OP_CONST) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext || !ext->literal) return false; + return atom_to_numeric(ext->literal, out_f, out_i, out_is_f64); + } + + if ((op->opcode == OP_NEG || op->opcode == OP_ABS) && op->arity == 1 && op->inputs[0]) { + double af = 0.0; + int64_t ai = 0; + bool a_is_f64 = false; + if (!eval_const_numeric_expr(g, op->inputs[0], &af, &ai, &a_is_f64)) return false; + if (a_is_f64 || op->out_type == RAY_F64) { + double v = a_is_f64 ? af : (double)ai; + double r = (op->opcode == OP_NEG) ? -v : fabs(v); + *out_f = r; + *out_i = (int64_t)r; + *out_is_f64 = true; + return true; + } + int64_t v = ai; + /* Unsigned negation avoids UB on INT64_MIN */ + int64_t r = (op->opcode == OP_NEG) + ? (int64_t)(-(uint64_t)v) + : (v < 0 ? (int64_t)(-(uint64_t)v) : v); + *out_i = r; + *out_f = (double)r; + *out_is_f64 = false; + return true; + } + + if (op->arity != 2 || !op->inputs[0] || !op->inputs[1]) return false; + if (op->opcode < OP_ADD || op->opcode > OP_MAX2) return false; + + double lf = 0.0, rf = 0.0; + int64_t li = 0, ri = 0; + bool l_is_f64 = false, r_is_f64 = false; + if (!eval_const_numeric_expr(g, op->inputs[0], &lf, &li, &l_is_f64)) return false; + if (!eval_const_numeric_expr(g, op->inputs[1], &rf, &ri, &r_is_f64)) return false; + + if (op->out_type == RAY_F64 || l_is_f64 || r_is_f64 || op->opcode == OP_DIV) { + double lv = l_is_f64 ? lf : (double)li; + double rv = r_is_f64 ? rf : (double)ri; + double r = 0.0; + switch (op->opcode) { + case OP_ADD: r = lv + rv; break; + case OP_SUB: r = lv - rv; break; + case OP_MUL: r = lv * rv; break; + case OP_DIV: r = rv != 0.0 ? lv / rv : NAN; break; + case OP_MOD: { if (rv != 0.0) { r = fmod(lv, rv); if (r && ((r > 0) != (rv > 0))) r += rv; } else { r = NAN; } } break; + case OP_MIN2: r = lv < rv ? lv : rv; break; + case OP_MAX2: r = lv > rv ? lv : rv; break; + default: return false; + } + *out_f = r; + *out_i = (int64_t)r; + *out_is_f64 = true; + return true; + } + + int64_t r = 0; + switch (op->opcode) { + case OP_ADD: r = (int64_t)((uint64_t)li + (uint64_t)ri); break; + case OP_SUB: r = (int64_t)((uint64_t)li - (uint64_t)ri); break; + case OP_MUL: r = (int64_t)((uint64_t)li * (uint64_t)ri); break; + case OP_DIV: + if (ri==0) return false; + r = li/ri; if ((li^ri)<0 && r*ri!=li) r--; + break; + case OP_MOD: + if (ri==0) return false; + r = li%ri; if (r && (r^ri)<0) r+=ri; + break; + case OP_MIN2: r = li < ri ? li : ri; break; + case OP_MAX2: r = li > ri ? li : ri; break; + default: return false; + } + *out_i = r; + *out_f = (double)r; + *out_is_f64 = false; + return true; +} + +static bool const_expr_to_i64(ray_graph_t* g, ray_op_t* op, int64_t* out) { + if (!g || !op || !out) return false; + double c_f = 0.0; + int64_t c_i = 0; + bool c_is_f64 = false; + if (!eval_const_numeric_expr(g, op, &c_f, &c_i, &c_is_f64)) return false; + if (!c_is_f64) { + *out = c_i; + return true; + } + if (!isfinite(c_f)) return false; + double ip = 0.0; + if (modf(c_f, &ip) != 0.0) return false; + if (ip > (double)INT64_MAX || ip < (double)INT64_MIN) return false; + *out = (int64_t)ip; + return true; +} + +static inline bool type_is_linear_i64_col(int8_t t) { + return t == RAY_I64 || t == RAY_TIMESTAMP || + t == RAY_I32 || t == RAY_DATE || t == RAY_TIME || t == RAY_I16 || + t == RAY_U8 || t == RAY_BOOL || RAY_IS_SYM(t); +} + +static bool linear_expr_add_term(linear_expr_i64_t* e, int64_t sym, int64_t coeff) { + if (!e) return false; + if (coeff == 0) return true; + for (uint8_t i = 0; i < e->n_terms; i++) { + if (e->syms[i] != sym) continue; + int64_t next = e->coeff_i64[i] + coeff; + if (next != 0) { + e->coeff_i64[i] = next; + return true; + } + for (uint8_t j = i + 1; j < e->n_terms; j++) { + e->syms[j - 1] = e->syms[j]; + e->coeff_i64[j - 1] = e->coeff_i64[j]; + } + e->n_terms--; + return true; + } + if (e->n_terms >= AGG_LINEAR_MAX_TERMS) return false; + e->syms[e->n_terms] = sym; + e->coeff_i64[e->n_terms] = coeff; + e->n_terms++; + return true; +} + +static void linear_expr_scale(linear_expr_i64_t* e, int64_t k) { + if (!e || k == 1) return; + e->bias_i64 *= k; + for (uint8_t i = 0; i < e->n_terms; i++) + e->coeff_i64[i] *= k; +} + +static bool linear_expr_add_scaled(linear_expr_i64_t* dst, const linear_expr_i64_t* src, int64_t scale) { + if (!dst || !src) return false; + dst->bias_i64 += src->bias_i64 * scale; + for (uint8_t i = 0; i < src->n_terms; i++) { + if (!linear_expr_add_term(dst, src->syms[i], src->coeff_i64[i] * scale)) + return false; + } + return true; +} + +/* Parse an expression tree into integer linear form: + * sum(coeff[i] * scan(sym[i])) + bias + * Supports +, -, unary -, and multiplication by integer constants. */ +static bool parse_linear_i64_expr(ray_graph_t* g, ray_op_t* op, linear_expr_i64_t* out) { + if (!g || !op || !out) return false; + memset(out, 0, sizeof(*out)); + + int64_t c = 0; + if (const_expr_to_i64(g, op, &c)) { + out->bias_i64 = c; + return true; + } + + if (op->opcode == OP_SCAN) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext || ext->base.opcode != OP_SCAN) return false; + out->n_terms = 1; + out->syms[0] = ext->sym; + out->coeff_i64[0] = 1; + return true; + } + + if (op->opcode == OP_NEG && op->arity == 1 && op->inputs[0]) { + linear_expr_i64_t inner; + if (!parse_linear_i64_expr(g, op->inputs[0], &inner)) return false; + linear_expr_scale(&inner, -1); + *out = inner; + return true; + } + + if ((op->opcode == OP_ADD || op->opcode == OP_SUB) && + op->arity == 2 && op->inputs[0] && op->inputs[1]) { + linear_expr_i64_t lhs; + linear_expr_i64_t rhs; + if (!parse_linear_i64_expr(g, op->inputs[0], &lhs)) return false; + if (!parse_linear_i64_expr(g, op->inputs[1], &rhs)) return false; + *out = lhs; + return linear_expr_add_scaled(out, &rhs, op->opcode == OP_ADD ? 1 : -1); + } + + if (op->opcode == OP_MUL && op->arity == 2 && op->inputs[0] && op->inputs[1]) { + int64_t k = 0; + linear_expr_i64_t side; + if (const_expr_to_i64(g, op->inputs[0], &k) && + parse_linear_i64_expr(g, op->inputs[1], &side)) { + linear_expr_scale(&side, k); + *out = side; + return true; + } + if (const_expr_to_i64(g, op->inputs[1], &k) && + parse_linear_i64_expr(g, op->inputs[0], &side)) { + linear_expr_scale(&side, k); + *out = side; + return true; + } + } + + return false; +} + +/* Detect SUM/AVG integer-linear inputs for scalar aggregate fast path. + * Example: (v1 + 1) * 2, v1 + v2 + 1 */ +bool try_linear_sumavg_input_i64(ray_graph_t* g, ray_t* tbl, ray_op_t* input_op, + agg_linear_t* out_plan) { + if (!g || !tbl || !input_op || !out_plan) return false; + linear_expr_i64_t lin; + if (!parse_linear_i64_expr(g, input_op, &lin)) return false; + + memset(out_plan, 0, sizeof(*out_plan)); + out_plan->n_terms = lin.n_terms; + out_plan->bias_i64 = lin.bias_i64; + for (uint8_t i = 0; i < lin.n_terms; i++) { + ray_t* col = ray_table_get_col(tbl, lin.syms[i]); + if (!col || !type_is_linear_i64_col(col->type)) return false; + out_plan->term_ptrs[i] = ray_data(col); + out_plan->term_types[i] = col->type; + out_plan->coeff_i64[i] = lin.coeff_i64[i]; + } + out_plan->enabled = true; + return true; +} + +/* Detect SUM/AVG affine inputs of form (scan +/- const) and return scan vector + * plus the additive bias so we can adjust results from (sum,count) directly. */ +bool try_affine_sumavg_input(ray_graph_t* g, ray_t* tbl, ray_op_t* input_op, + ray_t** out_vec, agg_affine_t* out_affine) { + if (!g || !tbl || !input_op || !out_vec || !out_affine) return false; + if (input_op->opcode != OP_ADD && input_op->opcode != OP_SUB) return false; + if (input_op->arity != 2 || !input_op->inputs[0] || !input_op->inputs[1]) return false; + + ray_op_t* lhs = input_op->inputs[0]; + ray_op_t* rhs = input_op->inputs[1]; + ray_op_t* base_op = NULL; + int sign = 1; + double c_f = 0.0; + int64_t c_i = 0; + bool c_is_f64 = false; + + double lhs_f = 0.0, rhs_f = 0.0; + int64_t lhs_i = 0, rhs_i = 0; + bool lhs_is_f64 = false, rhs_is_f64 = false; + bool lhs_const = eval_const_numeric_expr(g, lhs, &lhs_f, &lhs_i, &lhs_is_f64); + bool rhs_const = eval_const_numeric_expr(g, rhs, &rhs_f, &rhs_i, &rhs_is_f64); + + if (input_op->opcode == OP_ADD) { + if (lhs_const) { + base_op = rhs; + sign = 1; + c_f = lhs_f; + c_i = lhs_i; + c_is_f64 = lhs_is_f64; + } else if (rhs_const) { + base_op = lhs; + sign = 1; + c_f = rhs_f; + c_i = rhs_i; + c_is_f64 = rhs_is_f64; + } + } else { /* OP_SUB */ + if (rhs_const) { + base_op = lhs; + sign = -1; + c_f = rhs_f; + c_i = rhs_i; + c_is_f64 = rhs_is_f64; + } + } + if (!base_op) return false; + + ray_op_ext_t* base_ext = find_ext(g, base_op->id); + if (!base_ext || base_ext->base.opcode != OP_SCAN) return false; + ray_t* base_vec = ray_table_get_col(tbl, base_ext->sym); + if (!base_vec) return false; + + int8_t bt = base_vec->type; + if (bt == RAY_F64) { + out_affine->enabled = true; + out_affine->bias_f64 = (double)sign * (c_is_f64 ? c_f : (double)c_i); + out_affine->bias_i64 = (int64_t)out_affine->bias_f64; + *out_vec = base_vec; + return true; + } + + if (bt == RAY_I64 || bt == RAY_TIMESTAMP || + bt == RAY_I32 || bt == RAY_I16 || bt == RAY_U8 || bt == RAY_BOOL || + RAY_IS_SYM(bt)) { + int64_t c = 0; + if (c_is_f64) { + if (!isfinite(c_f)) return false; + double ip = 0.0; + if (modf(c_f, &ip) != 0.0) return false; + if (ip > (double)INT64_MAX || ip < (double)INT64_MIN) return false; + c = (int64_t)ip; + } else { + c = c_i; + } + out_affine->enabled = true; + out_affine->bias_i64 = sign > 0 ? c : -c; + out_affine->bias_f64 = (double)out_affine->bias_i64; + *out_vec = base_vec; + return true; + } + + return false; +} + +/* ============================================================================ + * Expression Compiler: morsel-batched fused evaluation + * + * Compiles an expression DAG (e.g. v1 + v2 * 3) into a flat instruction + * array. Evaluates in morsel-sized chunks (1024 elements) with scratch + * registers — never allocates full-length intermediate vectors. + * ============================================================================ */ + +/* Is this opcode an element-wise op suitable for expression compilation? */ +static inline bool expr_is_elementwise(uint16_t op) { + return (op >= OP_NEG && op <= OP_CAST) || (op >= OP_ADD && op <= OP_MAX2); +} + +/* Insert CAST instruction to promote register to target type */ +static uint8_t expr_ensure_type(ray_expr_t* out, uint8_t src, int8_t target) { + if (out->regs[src].type == target) return src; + if (out->n_regs >= EXPR_MAX_REGS || out->n_ins >= EXPR_MAX_INS) return src; + uint8_t r = out->n_regs; + out->regs[r].kind = REG_SCRATCH; + out->regs[r].type = target; + out->n_regs++; + out->n_scratch++; + out->ins[out->n_ins++] = (expr_ins_t){ + .opcode = OP_CAST, .dst = r, .src1 = src, .src2 = 0xFF, + }; + return r; +} + +/* Compile expression DAG into flat instruction array. + * Returns true on success. Only compiles element-wise subtrees. */ +bool expr_compile(ray_graph_t* g, ray_t* tbl, ray_op_t* root, ray_expr_t* out) { + memset(out, 0, sizeof(*out)); + if (!root || !g || !tbl) return false; + if (root->opcode == OP_SCAN || root->opcode == OP_CONST) return false; + if (!expr_is_elementwise(root->opcode)) return false; + + uint32_t nc = g->node_count; + if (nc > 4096) return false; /* guard against stack overflow from VLA */ + uint8_t node_reg[nc]; + memset(node_reg, 0xFF, nc * sizeof(uint8_t)); + + /* Post-order DFS with explicit stack */ + /* Depth limit 64 — expressions deeper than 64 levels fall back to non-fused path. */ + typedef struct { ray_op_t* node; uint8_t phase; } dfs_t; + dfs_t dfs[64]; + int sp = 0; + dfs[sp++] = (dfs_t){root, 0}; + + while (sp > 0) { + dfs_t* top = &dfs[sp - 1]; + ray_op_t* node = top->node; + + if (node->id < nc && node_reg[node->id] != 0xFF) { sp--; continue; } + + if (top->phase == 0) { + top->phase = 1; + for (int i = node->arity - 1; i >= 0; i--) { + ray_op_t* ch = node->inputs[i]; + if (!ch) continue; + if (ch->id < nc && node_reg[ch->id] != 0xFF) continue; + if (sp >= 64) return false; + dfs[sp++] = (dfs_t){ch, 0}; + } + } else { + sp--; + uint8_t r = out->n_regs; + if (r >= EXPR_MAX_REGS) return false; + + if (node->opcode == OP_SCAN) { + ray_op_ext_t* ext = find_ext(g, node->id); + if (!ext) return false; + ray_t* col = ray_table_get_col(tbl, ext->sym); + if (!col) return false; + if (col->type == RAY_MAPCOMMON) return false; + if (col->type == RAY_STR) return false; /* RAY_STR needs string comparison path */ + if (col->attrs & (RAY_ATTR_HAS_NULLS | RAY_ATTR_SLICE)) return false; /* nullable cols need bitmap-aware path */ + out->regs[r].kind = REG_SCAN; + if (RAY_IS_PARTED(col->type)) { + int8_t base = (int8_t)RAY_PARTED_BASETYPE(col->type); + out->regs[r].col_type = base; + out->regs[r].data = NULL; /* resolved per-segment */ + out->regs[r].is_parted = true; + out->regs[r].parted_col = col; + out->regs[r].type = (base == RAY_F64) ? RAY_F64 : RAY_I64; + out->has_parted = true; + } else { + out->regs[r].col_type = col->type; + out->regs[r].col_attrs = col->attrs; + out->regs[r].data = ray_data(col); + out->regs[r].is_parted = false; + out->regs[r].parted_col = NULL; + out->regs[r].type = (col->type == RAY_F64) ? RAY_F64 : RAY_I64; + } + } else if (node->opcode == OP_CONST) { + ray_op_ext_t* ext = find_ext(g, node->id); + if (!ext || !ext->literal) return false; + if (RAY_ATOM_IS_NULL(ext->literal)) return false; /* null constants need bitmap-aware path */ + double cf; int64_t ci; bool is_f64; + if (!atom_to_numeric(ext->literal, &cf, &ci, &is_f64)) { + /* Try resolving string constant to symbol intern ID — + * enables fused evaluation of SYM column comparisons + * (e.g. id2 = 'id080' compiles to integer EQ). */ + if (ext->literal->type == -RAY_STR) { + const char* s = ray_str_ptr(ext->literal); + size_t slen = ray_str_len(ext->literal); + int64_t sid = ray_sym_find(s, slen); + if (sid < 0) return false; + ci = sid; + cf = (double)sid; + is_f64 = false; + } else { + return false; + } + } + out->regs[r].kind = REG_CONST; + out->regs[r].type = is_f64 ? RAY_F64 : RAY_I64; + out->regs[r].const_f64 = cf; + out->regs[r].const_i64 = ci; + } else if (expr_is_elementwise(node->opcode)) { + if (!node->inputs[0]) return false; + uint8_t s1 = node_reg[node->inputs[0]->id]; + if (s1 == 0xFF) return false; + uint8_t s2 = 0xFF; + if (node->arity >= 2 && node->inputs[1]) { + s2 = node_reg[node->inputs[1]->id]; + if (s2 == 0xFF) return false; + } + + int8_t t1 = out->regs[s1].type; + int8_t t2 = (s2 != 0xFF) ? out->regs[s2].type : t1; + uint16_t op = node->opcode; + int8_t ot; + + /* Determine output type */ + if (op == OP_CAST) + ot = node->out_type; + else if ((op >= OP_EQ && op <= OP_GE) || + op == OP_AND || op == OP_OR || op == OP_NOT) + ot = RAY_BOOL; + else if (t1 == RAY_F64 || t2 == RAY_F64 || op == OP_DIV || + op == OP_SQRT || op == OP_LOG || op == OP_EXP) + ot = RAY_F64; + else + ot = RAY_I64; + + /* Type promotion: ensure both sources match for the operation. + * Skip for OP_CAST — the instruction itself IS the conversion. */ + if (op == OP_CAST) { + /* No promotion needed; CAST handles the conversion */ + r = out->n_regs; + if (r >= EXPR_MAX_REGS) return false; + } else if (ot == RAY_F64 && s2 != 0xFF) { + /* Arithmetic with f64 output — promote i64 inputs to f64 */ + s1 = expr_ensure_type(out, s1, RAY_F64); + s2 = expr_ensure_type(out, s2, RAY_F64); + r = out->n_regs; /* re-read after possible CAST inserts */ + if (r >= EXPR_MAX_REGS) return false; + } else if (ot == RAY_F64 && s2 == 0xFF) { + /* Unary f64 — promote input */ + s1 = expr_ensure_type(out, s1, RAY_F64); + r = out->n_regs; + if (r >= EXPR_MAX_REGS) return false; + } else if (ot == RAY_BOOL && s2 != 0xFF && t1 != t2) { + /* Comparison with mixed types — promote both to f64 */ + int8_t pt = (t1 == RAY_F64 || t2 == RAY_F64) ? RAY_F64 : RAY_I64; + s1 = expr_ensure_type(out, s1, pt); + s2 = expr_ensure_type(out, s2, pt); + r = out->n_regs; + if (r >= EXPR_MAX_REGS) return false; + } + + out->regs[r].kind = REG_SCRATCH; + out->regs[r].type = ot; + out->n_scratch++; + + if (out->n_ins >= EXPR_MAX_INS) return false; + out->ins[out->n_ins++] = (expr_ins_t){ + .opcode = (uint8_t)op, .dst = r, .src1 = s1, .src2 = s2, + }; + } else { + return false; + } + + out->n_regs++; + if (node->id < nc) node_reg[node->id] = r; + } + } + + if (out->n_regs == 0 || out->n_ins == 0) return false; + out->out_reg = out->n_regs - 1; + out->out_type = out->regs[out->out_reg].type; + return true; +} + +/* ---- Morsel-batched expression evaluator ---- */ + +/* Load SCAN column data into i64 scratch buffer with type conversion */ +static void expr_load_i64(int64_t* dst, const void* data, int8_t col_type, + uint8_t col_attrs, int64_t start, int64_t n) { + switch (col_type) { + case RAY_I64: case RAY_TIMESTAMP: + memcpy(dst, (const int64_t*)data + start, (size_t)n * 8); + break; + case RAY_SYM: { + for (int64_t j = 0; j < n; j++) + dst[j] = ray_read_sym(data, start + j, col_type, col_attrs); + } break; + case RAY_I32: case RAY_DATE: case RAY_TIME: { + const int32_t* s = (const int32_t*)data + start; + for (int64_t j = 0; j < n; j++) dst[j] = s[j]; + } break; + case RAY_U8: case RAY_BOOL: { + const uint8_t* s = (const uint8_t*)data + start; + for (int64_t j = 0; j < n; j++) dst[j] = s[j]; + } break; + case RAY_I16: { + const int16_t* s = (const int16_t*)data + start; + for (int64_t j = 0; j < n; j++) dst[j] = s[j]; + } break; + default: memset(dst, 0, (size_t)n * 8); break; + } +} + +/* Load SCAN column data into f64 scratch buffer with type conversion */ +static void expr_load_f64(double* dst, const void* data, int8_t col_type, + uint8_t col_attrs, int64_t start, int64_t n) { + switch (col_type) { + case RAY_F64: + memcpy(dst, (const double*)data + start, (size_t)n * 8); + break; + case RAY_I64: case RAY_TIMESTAMP: { + const int64_t* s = (const int64_t*)data + start; + for (int64_t j = 0; j < n; j++) dst[j] = (double)s[j]; + } break; + case RAY_SYM: { + for (int64_t j = 0; j < n; j++) + dst[j] = (double)ray_read_sym(data, start + j, col_type, col_attrs); + } break; + case RAY_I32: case RAY_DATE: case RAY_TIME: { + const int32_t* s = (const int32_t*)data + start; + for (int64_t j = 0; j < n; j++) dst[j] = (double)s[j]; + } break; + case RAY_U8: case RAY_BOOL: { + const uint8_t* s = (const uint8_t*)data + start; + for (int64_t j = 0; j < n; j++) dst[j] = (double)s[j]; + } break; + case RAY_I16: { + const int16_t* s = (const int16_t*)data + start; + for (int64_t j = 0; j < n; j++) dst[j] = (double)s[j]; + } break; + default: memset(dst, 0, (size_t)n * 8); break; + } +} + +/* Execute a binary instruction over n elements. + * Switch is OUTSIDE the loop so each case auto-vectorizes. */ +static void expr_exec_binary(uint8_t opcode, int8_t dt, void* dp, + int8_t t1, const void* ap, + int8_t t2, const void* bp, int64_t n) { + (void)t2; + if (dt == RAY_F64) { + double* d = (double*)dp; + const double* a = (const double*)ap; + const double* b = (const double*)bp; + switch (opcode) { + case OP_ADD: for (int64_t j = 0; j < n; j++) d[j] = a[j] + b[j]; break; + case OP_SUB: for (int64_t j = 0; j < n; j++) d[j] = a[j] - b[j]; break; + case OP_MUL: for (int64_t j = 0; j < n; j++) d[j] = a[j] * b[j]; break; + case OP_DIV: for (int64_t j = 0; j < n; j++) d[j] = b[j] != 0.0 ? a[j] / b[j] : NAN; break; + case OP_MOD: for (int64_t j = 0; j < n; j++) { + if (b[j] == 0.0) { d[j] = NAN; continue; } + double m = fmod(a[j], b[j]); + d[j] = (m && ((m > 0) != (b[j] > 0))) ? m + b[j] : m; + } break; + case OP_MIN2: for (int64_t j = 0; j < n; j++) d[j] = a[j] < b[j] ? a[j] : b[j]; break; + case OP_MAX2: for (int64_t j = 0; j < n; j++) d[j] = a[j] > b[j] ? a[j] : b[j]; break; + default: break; + } + } else if (dt == RAY_I64 || dt == RAY_TIMESTAMP) { + int64_t* d = (int64_t*)dp; + const int64_t* a = (const int64_t*)ap; + const int64_t* b = (const int64_t*)bp; + switch (opcode) { + case OP_ADD: for (int64_t j = 0; j < n; j++) d[j] = (int64_t)((uint64_t)a[j] + (uint64_t)b[j]); break; + case OP_SUB: for (int64_t j = 0; j < n; j++) d[j] = (int64_t)((uint64_t)a[j] - (uint64_t)b[j]); break; + case OP_MUL: for (int64_t j = 0; j < n; j++) d[j] = (int64_t)((uint64_t)a[j] * (uint64_t)b[j]); break; + case OP_DIV: for (int64_t j = 0; j < n; j++) { + if (b[j]==0 || (b[j]==-1 && a[j]==((int64_t)1<<63))) { d[j]=0; continue; } + int64_t q = a[j]/b[j]; + if ((a[j]^b[j])<0 && q*b[j]!=a[j]) q--; + d[j] = q; + } break; + case OP_MOD: for (int64_t j = 0; j < n; j++) { + if (b[j]==0 || (b[j]==-1 && a[j]==((int64_t)1<<63))) { d[j]=0; continue; } + int64_t m = a[j]%b[j]; + if (m && (m^b[j])<0) m+=b[j]; + d[j] = m; + } break; + case OP_MIN2: for (int64_t j = 0; j < n; j++) d[j] = a[j] < b[j] ? a[j] : b[j]; break; + case OP_MAX2: for (int64_t j = 0; j < n; j++) d[j] = a[j] > b[j] ? a[j] : b[j]; break; + default: break; + } + } else if (dt == RAY_I32 || dt == RAY_DATE || dt == RAY_TIME) { + int32_t* d = (int32_t*)dp; + const int32_t* a = (const int32_t*)ap; + const int32_t* b = (const int32_t*)bp; + switch (opcode) { + case OP_ADD: for (int64_t j = 0; j < n; j++) d[j] = (int32_t)((uint32_t)a[j] + (uint32_t)b[j]); break; + case OP_SUB: for (int64_t j = 0; j < n; j++) d[j] = (int32_t)((uint32_t)a[j] - (uint32_t)b[j]); break; + case OP_MUL: for (int64_t j = 0; j < n; j++) d[j] = (int32_t)((uint32_t)a[j] * (uint32_t)b[j]); break; + case OP_DIV: for (int64_t j = 0; j < n; j++) { + if (b[j]==0 || (b[j]==-1 && a[j]==((int32_t)1<<31))) { d[j]=0; continue; } + int32_t q = a[j]/b[j]; + if ((a[j]^b[j])<0 && q*b[j]!=a[j]) q--; + d[j] = q; + } break; + case OP_MOD: for (int64_t j = 0; j < n; j++) { + if (b[j]==0 || (b[j]==-1 && a[j]==((int32_t)1<<31))) { d[j]=0; continue; } + int32_t m = a[j]%b[j]; + if (m && (m^b[j])<0) m+=b[j]; + d[j] = m; + } break; + case OP_MIN2: for (int64_t j = 0; j < n; j++) d[j] = a[j] < b[j] ? a[j] : b[j]; break; + case OP_MAX2: for (int64_t j = 0; j < n; j++) d[j] = a[j] > b[j] ? a[j] : b[j]; break; + default: break; + } + } else if (dt == RAY_I16) { + int16_t* d = (int16_t*)dp; + const int16_t* a = (const int16_t*)ap; + const int16_t* b = (const int16_t*)bp; + switch (opcode) { + case OP_ADD: for (int64_t j = 0; j < n; j++) d[j] = (int16_t)((uint16_t)a[j] + (uint16_t)b[j]); break; + case OP_SUB: for (int64_t j = 0; j < n; j++) d[j] = (int16_t)((uint16_t)a[j] - (uint16_t)b[j]); break; + case OP_MUL: for (int64_t j = 0; j < n; j++) d[j] = (int16_t)((uint16_t)a[j] * (uint16_t)b[j]); break; + case OP_DIV: for (int64_t j = 0; j < n; j++) { d[j] = b[j] ? a[j] / b[j] : 0; } break; + case OP_MOD: for (int64_t j = 0; j < n; j++) { d[j] = b[j] ? a[j] % b[j] : 0; } break; + case OP_MIN2: for (int64_t j = 0; j < n; j++) d[j] = a[j] < b[j] ? a[j] : b[j]; break; + case OP_MAX2: for (int64_t j = 0; j < n; j++) d[j] = a[j] > b[j] ? a[j] : b[j]; break; + default: break; + } + } else if (dt == RAY_U8) { + uint8_t* d2 = (uint8_t*)dp; + const uint8_t* a2 = (const uint8_t*)ap; + const uint8_t* b2 = (const uint8_t*)bp; + switch (opcode) { + case OP_ADD: for (int64_t j = 0; j < n; j++) d2[j] = a2[j] + b2[j]; break; + case OP_SUB: for (int64_t j = 0; j < n; j++) d2[j] = a2[j] - b2[j]; break; + case OP_MUL: for (int64_t j = 0; j < n; j++) d2[j] = a2[j] * b2[j]; break; + case OP_DIV: for (int64_t j = 0; j < n; j++) { d2[j] = b2[j] ? a2[j] / b2[j] : 0; } break; + case OP_MOD: for (int64_t j = 0; j < n; j++) { d2[j] = b2[j] ? a2[j] % b2[j] : 0; } break; + case OP_MIN2: for (int64_t j = 0; j < n; j++) d2[j] = a2[j] < b2[j] ? a2[j] : b2[j]; break; + case OP_MAX2: for (int64_t j = 0; j < n; j++) d2[j] = a2[j] > b2[j] ? a2[j] : b2[j]; break; + default: break; + } + } else if (dt == RAY_BOOL) { + uint8_t* d = (uint8_t*)dp; + if (t1 == RAY_F64) { + const double* a = (const double*)ap; + const double* b = (const double*)bp; + /* Null-aware F64 comparisons: NaN is null sentinel. + * null == null → true, null < non-null → true, non-null > null → true */ + #define F64_ISNAN(x) ((x) != (x)) + switch (opcode) { + case OP_EQ: for (int64_t j = 0; j < n; j++) d[j] = (F64_ISNAN(a[j])&&F64_ISNAN(b[j])) ? 1 : (F64_ISNAN(a[j])||F64_ISNAN(b[j])) ? 0 : a[j]==b[j]; break; + case OP_NE: for (int64_t j = 0; j < n; j++) d[j] = (F64_ISNAN(a[j])&&F64_ISNAN(b[j])) ? 0 : (F64_ISNAN(a[j])||F64_ISNAN(b[j])) ? 1 : a[j]!=b[j]; break; + case OP_LT: for (int64_t j = 0; j < n; j++) d[j] = (F64_ISNAN(a[j])&&F64_ISNAN(b[j])) ? 0 : F64_ISNAN(a[j]) ? 1 : F64_ISNAN(b[j]) ? 0 : a[j]b[j]; break; + case OP_GE: for (int64_t j = 0; j < n; j++) d[j] = (F64_ISNAN(a[j])&&F64_ISNAN(b[j])) ? 1 : F64_ISNAN(b[j]) ? 1 : F64_ISNAN(a[j]) ? 0 : a[j]>=b[j]; break; + default: break; + } + #undef F64_ISNAN + } else if (t1 == RAY_I64) { + const int64_t* a = (const int64_t*)ap; + const int64_t* b = (const int64_t*)bp; + /* Plain comparison — null handling via bitmap post-pass. + * Values at null positions are zero (from vector init), which + * compares correctly for null-as-minimum semantics when both + * input null bitmaps are propagated to the result. */ + switch (opcode) { + case OP_EQ: for (int64_t j = 0; j < n; j++) d[j] = a[j]==b[j]; break; + case OP_NE: for (int64_t j = 0; j < n; j++) d[j] = a[j]!=b[j]; break; + case OP_LT: for (int64_t j = 0; j < n; j++) d[j] = a[j]b[j]; break; + case OP_GE: for (int64_t j = 0; j < n; j++) d[j] = a[j]>=b[j]; break; + default: break; + } + } else { /* both bool */ + const uint8_t* a = (const uint8_t*)ap; + const uint8_t* b = (const uint8_t*)bp; + switch (opcode) { + case OP_AND: for (int64_t j = 0; j < n; j++) d[j] = a[j] && b[j]; break; + case OP_OR: for (int64_t j = 0; j < n; j++) d[j] = a[j] || b[j]; break; + default: break; + } + } + } +} + +/* Execute a unary instruction over n elements */ +static void expr_exec_unary(uint8_t opcode, int8_t dt, void* dp, + int8_t t1, const void* ap, int64_t n) { + if (dt == RAY_F64) { + double* d = (double*)dp; + if (t1 == RAY_F64) { + const double* a = (const double*)ap; + switch (opcode) { + case OP_NEG: for (int64_t j = 0; j < n; j++) d[j] = -a[j]; break; + case OP_ABS: for (int64_t j = 0; j < n; j++) d[j] = fabs(a[j]); break; + case OP_SQRT: for (int64_t j = 0; j < n; j++) d[j] = sqrt(a[j]); break; + case OP_LOG: for (int64_t j = 0; j < n; j++) d[j] = log(a[j]); break; + case OP_EXP: for (int64_t j = 0; j < n; j++) d[j] = exp(a[j]); break; + case OP_CEIL: for (int64_t j = 0; j < n; j++) d[j] = ceil(a[j]); break; + case OP_FLOOR: for (int64_t j = 0; j < n; j++) d[j] = floor(a[j]); break; + case OP_ROUND: for (int64_t j = 0; j < n; j++) d[j] = round(a[j]); break; + default: break; + } + } else { /* CAST i64→f64 */ + const int64_t* a = (const int64_t*)ap; + for (int64_t j = 0; j < n; j++) d[j] = (double)a[j]; + } + } else if (dt == RAY_I64) { + int64_t* d = (int64_t*)dp; + if (t1 == RAY_I64) { + const int64_t* a = (const int64_t*)ap; + switch (opcode) { + /* Unsigned negation avoids UB on INT64_MIN */ + case OP_NEG: for (int64_t j = 0; j < n; j++) d[j] = (int64_t)(-(uint64_t)a[j]); break; + case OP_ABS: for (int64_t j = 0; j < n; j++) d[j] = a[j] < 0 ? (int64_t)(-(uint64_t)a[j]) : a[j]; break; + default: break; + } + } else { /* CAST f64→i64 — clamp to avoid out-of-range UB */ + const double* a = (const double*)ap; + for (int64_t j = 0; j < n; j++) + d[j] = (a[j] >= (double)INT64_MAX) ? INT64_MAX + : (a[j] <= (double)INT64_MIN) ? INT64_MIN + : (int64_t)a[j]; + } + } else if (dt == RAY_BOOL) { + uint8_t* d = (uint8_t*)dp; + const uint8_t* a = (const uint8_t*)ap; + switch (opcode) { + case OP_NOT: for (int64_t j = 0; j < n; j++) d[j] = !a[j]; break; + default: break; + } + } +} + +/* Evaluate compiled expression for morsel [start, end). + * scratch: array of EXPR_MAX_REGS buffers, each EXPR_MORSEL*8 bytes. + * Returns pointer to output data (morsel-relative indexing). */ +static void* expr_eval_morsel(const ray_expr_t* expr, void** scratch, + int64_t start, int64_t end) { + int64_t n = end - start; + if (n <= 0) return NULL; + + void* rptrs[EXPR_MAX_REGS]; + for (uint8_t r = 0; r < expr->n_regs; r++) { + int8_t rt = expr->regs[r].type; + int8_t ct = expr->regs[r].col_type; + switch (expr->regs[r].kind) { + case REG_SCAN: { + /* Direct pointer if native type matches, else convert */ + uint8_t ca = expr->regs[r].col_attrs; + if (rt == RAY_F64 && ct == RAY_F64) { + rptrs[r] = (double*)expr->regs[r].data + start; + } else if (rt == RAY_I64 && (ct == RAY_I64 || ct == RAY_TIMESTAMP)) { + rptrs[r] = (int64_t*)expr->regs[r].data + start; + } else if (rt == RAY_I64 && ct == RAY_SYM && + (ca & RAY_SYM_W_MASK) == RAY_SYM_W64) { + rptrs[r] = (int64_t*)expr->regs[r].data + start; + } else { + rptrs[r] = scratch[r]; + if (rt == RAY_F64) + expr_load_f64(scratch[r], expr->regs[r].data, ct, ca, start, n); + else + expr_load_i64(scratch[r], expr->regs[r].data, ct, ca, start, n); + } + } + break; + case REG_CONST: + rptrs[r] = scratch[r]; + if (rt == RAY_F64) { + double v = expr->regs[r].const_f64; + double* d = (double*)scratch[r]; + for (int64_t j = 0; j < n; j++) d[j] = v; + } else { + int64_t v = expr->regs[r].const_i64; + int64_t* d = (int64_t*)scratch[r]; + for (int64_t j = 0; j < n; j++) d[j] = v; + } + break; + default: /* REG_SCRATCH */ + rptrs[r] = scratch[r]; + break; + } + } + + for (uint8_t i = 0; i < expr->n_ins; i++) { + const expr_ins_t* ins = &expr->ins[i]; + int8_t dt = expr->regs[ins->dst].type; + if (ins->src2 != 0xFF) { + expr_exec_binary(ins->opcode, dt, rptrs[ins->dst], + expr->regs[ins->src1].type, rptrs[ins->src1], + expr->regs[ins->src2].type, rptrs[ins->src2], n); + } else { + expr_exec_unary(ins->opcode, dt, rptrs[ins->dst], + expr->regs[ins->src1].type, rptrs[ins->src1], n); + } + } + + return rptrs[expr->out_reg]; +} + +/* Context for parallel full-vector expression evaluation */ +typedef struct { + const ray_expr_t* expr; + void* out_data; + int8_t out_type; +} expr_full_ctx_t; + +static void expr_full_fn(void* ctx, uint32_t worker_id, int64_t start, int64_t end) { + (void)worker_id; + expr_full_ctx_t* c = (expr_full_ctx_t*)ctx; + const ray_expr_t* expr = c->expr; + uint8_t esz = ray_elem_size(c->out_type); + + /* Per-worker scratch buffers (heap-allocated via arena, morsel-sized) */ + ray_t* scratch_hdr = NULL; + char* scratch_mem = (char*)scratch_alloc(&scratch_hdr, + (size_t)EXPR_MAX_REGS * EXPR_MORSEL * 8); + if (!scratch_mem) return; + void* scratch[EXPR_MAX_REGS]; + for (uint8_t r = 0; r < expr->n_regs; r++) + scratch[r] = scratch_mem + (size_t)r * EXPR_MORSEL * 8; + + for (int64_t ms = start; ms < end; ms += EXPR_MORSEL) { + int64_t me = (ms + EXPR_MORSEL < end) ? ms + EXPR_MORSEL : end; + void* result = expr_eval_morsel(expr, scratch, ms, me); + if (result) + memcpy((char*)c->out_data + ms * esz, result, (size_t)(me - ms) * esz); + } + scratch_free(scratch_hdr); +} + +/* Post-pass for the fused unary path: |INT64_MIN| and -INT64_MIN don't fit in + * i64 (signed-overflow; k/q convention surfaces this as typed null). The + * element-wise loop uses unsigned wrap, so any overflow position lands as + * INT64_MIN in data. Convert each such position to typed-null: zero data[i] + * (preserve "null position is 0" invariant) and set the null bit. Caller + * must invoke single-threaded — after pool dispatch joins. */ +static void mark_i64_overflow_as_null(ray_t* result, int64_t off, int64_t len) { + int64_t* d = (int64_t*)ray_data(result) + off; + for (int64_t i = 0; i < len; i++) { + if (RAY_UNLIKELY(d[i] == INT64_MIN)) { + d[i] = 0; + ray_vec_set_null(result, off + i, true); + } + } +} + +/* The fused unary path may produce INT64_MIN via signed-overflow only for + * OP_NEG and OP_ABS over an i64 source (output type i64). Detect those + * shapes from the last instruction in the compiled expression. */ +static bool expr_last_op_overflows_i64(const ray_expr_t* expr) { + if (expr->out_type != RAY_I64 || expr->n_ins == 0) return false; + const expr_ins_t* last = &expr->ins[expr->n_ins - 1]; + if (last->opcode != OP_NEG && last->opcode != OP_ABS) return false; + if (last->src2 != 0xFF) return false; /* unary only */ + if (expr->regs[last->src1].type != RAY_I64) return false; + if (expr->regs[last->dst].type != RAY_I64) return false; + return true; +} + +/* Evaluate compiled expression over parted (segmented) columns. + * Iterates segments as outer loop, rebinds data pointers per segment, + * then dispatches the existing morsel evaluator per segment. Zero copy. */ +static ray_t* expr_eval_full_parted(const ray_expr_t* expr, int64_t nrows) { + ray_t* out = ray_vec_new(expr->out_type, nrows); + if (!out || RAY_IS_ERR(out)) { + return out; + } + out->len = nrows; + + /* Find first parted register to get segment structure */ + ray_t* ref_parted = NULL; + for (uint8_t r = 0; r < expr->n_regs; r++) { + if (expr->regs[r].is_parted) { + ref_parted = expr->regs[r].parted_col; + break; + } + } + if (!ref_parted) { ray_release(out); return ray_error("nyi", NULL); } + + int64_t n_segs = ref_parted->len; + uint8_t esz = ray_elem_size(expr->out_type); + ray_pool_t* pool = ray_pool_get(); + int64_t global_off = 0; + + for (int64_t s = 0; s < n_segs; s++) { + /* Determine segment length from any non-NULL parted register */ + int64_t seg_len = 0; + for (uint8_t r = 0; r < expr->n_regs; r++) { + if (expr->regs[r].is_parted) { + ray_t** segs = (ray_t**)ray_data(expr->regs[r].parted_col); + if (segs[s]) { seg_len = segs[s]->len; break; } + } + } + if (seg_len <= 0) continue; + + /* Stack-copy expr, rebind parted registers to this segment's data */ + ray_expr_t seg_expr = *expr; + bool seg_ok = true; + for (uint8_t r = 0; r < seg_expr.n_regs; r++) { + if (seg_expr.regs[r].is_parted) { + ray_t** segs = (ray_t**)ray_data(seg_expr.regs[r].parted_col); + if (!segs[s]) { seg_ok = false; break; } + seg_expr.regs[r].data = ray_data(segs[s]); + } + } + if (!seg_ok) { + memset((char*)ray_data(out) + global_off * esz, 0, + (size_t)seg_len * esz); + global_off += seg_len; + continue; + } + + expr_full_ctx_t ctx = { + .expr = &seg_expr, + .out_data = (char*)ray_data(out) + global_off * esz, + .out_type = expr->out_type, + }; + if (pool && seg_len >= RAY_PARALLEL_THRESHOLD) + ray_pool_dispatch(pool, expr_full_fn, &ctx, seg_len); + else + expr_full_fn(&ctx, 0, 0, seg_len); + + global_off += seg_len; + } + if (expr_last_op_overflows_i64(expr)) + mark_i64_overflow_as_null(out, 0, nrows); + return out; +} + +/* Evaluate compiled expression into a full-length output vector. + * Replaces exec_node() for expression subtrees — no intermediate vectors. */ +ray_t* expr_eval_full(const ray_expr_t* expr, int64_t nrows) { + if (expr->has_parted) + return expr_eval_full_parted(expr, nrows); + + ray_t* out = ray_vec_new(expr->out_type, nrows); + if (!out || RAY_IS_ERR(out)) return out; + out->len = nrows; + + expr_full_ctx_t ctx = { + .expr = expr, .out_data = ray_data(out), .out_type = expr->out_type, + }; + + ray_pool_t* pool = ray_pool_get(); + if (pool && nrows >= RAY_PARALLEL_THRESHOLD) + ray_pool_dispatch(pool, expr_full_fn, &ctx, nrows); + else + expr_full_fn(&ctx, 0, 0, nrows); + + if (expr_last_op_overflows_i64(expr)) + mark_i64_overflow_as_null(out, 0, nrows); + return out; +} + +/* ============================================================================ + * Null bitmap propagation for element-wise ops + * ============================================================================ */ + +/* Resolve the raw null bitmap pointer and bit offset for a vector. + * Returns NULL if the vector has no null bits, or if the inline nullmap + * cannot cover the requested range (prevents overread). */ +static const uint8_t* nullmap_bits(ray_t* v, int64_t* bit_offset, int64_t len) { + ray_t* target = v; + int64_t off = 0; + if (v->attrs & RAY_ATTR_SLICE) { + target = v->slice_parent; + off = v->slice_offset; + } + if (!(target->attrs & RAY_ATTR_HAS_NULLS)) return NULL; + int64_t resolved_off = 0, len_bits = 0; + const uint8_t* bits = ray_vec_nullmap_bytes(target, &resolved_off, &len_bits); + if (!bits) return NULL; + *bit_offset = off + resolved_off; + /* Caller assumes inline buffer means 128-bit coverage; reject ranges + * that would overrun it just like the original guard. */ + if (len_bits == 128 && off + len > 128) return NULL; + return bits; +} + +/* Writable null bitmap pointer for freshly allocated (non-slice) dst vector. + * Returns NULL if inline nullmap cannot cover dst->len (prevents overflow). */ +static uint8_t* nullmap_bits_mut(ray_t* dst) { + if (dst->attrs & RAY_ATTR_NULLMAP_EXT) + return (uint8_t*)ray_data(dst->ext_nullmap); + if (dst->type == RAY_STR) return NULL; + if (dst->len > 128) return NULL; /* inline can only cover 128 bits */ + return dst->nullmap; +} + +/* OR-merge null bitmap from src into dst. Fast byte-level path when possible, + * element-level fallback for misaligned slices or RAY_STR without ext nullmap. */ +static void propagate_nulls(ray_t* src, ray_t* dst, int64_t len) { + int64_t src_off = 0; + const uint8_t* sbits = nullmap_bits(src, &src_off, len); + if (!sbits) goto slow; /* no accessible bitmap — use element path */ + + /* Ensure dst has ext nullmap for large vectors */ + if (len > 128 && !(dst->attrs & RAY_ATTR_NULLMAP_EXT)) + ray_vec_set_null(dst, len - 1, false); /* force ext alloc */ + uint8_t* dbits = nullmap_bits_mut(dst); + if (!dbits) goto slow; /* ext alloc failed or RAY_STR */ + + /* Bulk OR — both bitmaps are byte-accessible and src is byte-aligned */ + if ((src_off % 8) == 0) { + int64_t byte_start = src_off / 8; + int64_t nbytes = (len + 7) / 8; + for (int64_t b = 0; b < nbytes; b++) + dbits[b] |= sbits[byte_start + b]; + dst->attrs |= RAY_ATTR_HAS_NULLS; + return; + } + +slow: + for (int64_t i = 0; i < len; i++) { + if (ray_vec_is_null(src, i)) + ray_vec_set_null(dst, i, true); + } +} + +/* Returns true for arithmetic ops that should propagate nulls. + * Comparisons (EQ..GE) and logical ops (AND/OR) produce false for null inputs. */ +static bool op_propagates_null(uint16_t opc) { + return opc < OP_EQ || opc > OP_OR; +} + +/* Check if a scalar operand (atom or length-1 vector) is null. + * Handles slices correctly via ray_vec_is_null delegation. */ +static bool scalar_is_null(ray_t* x) { + if (ray_is_atom(x)) return RAY_ATOM_IS_NULL(x); + /* Length-1 vector — use ray_vec_is_null which handles slices */ + return ray_vec_is_null(x, 0); +} + +/* Check if a vector might contain nulls (accounts for slices). */ +static bool vec_may_have_nulls(ray_t* v) { + return (v->attrs & (RAY_ATTR_HAS_NULLS | RAY_ATTR_SLICE)) != 0; +} + +/* Resolve data pointer for a vector, accounting for slices. + * For slices, returns the parent's data and adjusts *offset. */ +static void* resolve_vec_data(ray_t* v, int64_t* offset) { + if (v->attrs & RAY_ATTR_SLICE) { + *offset += v->slice_offset; + return ray_data(v->slice_parent); + } + return ray_data(v); +} + +/* For comparisons: force result to false for any element where either input is null. */ +/* Fix comparison results at null positions using null-as-minimum semantics. + * null == null → true, null < x → true, x > null → true, etc. */ +static void fix_null_comparisons(ray_t* lhs, ray_t* rhs, ray_t* result, + bool l_scalar, bool r_scalar, int64_t len, + uint16_t opcode) { + uint8_t* dst = (uint8_t*)ray_data(result); + bool ln_s = l_scalar && scalar_is_null(lhs); + bool rn_s = r_scalar && scalar_is_null(rhs); + bool l_has = !l_scalar && vec_may_have_nulls(lhs); + bool r_has = !r_scalar && vec_may_have_nulls(rhs); + if (!ln_s && !rn_s && !l_has && !r_has) return; + + for (int64_t i = 0; i < len; i++) { + bool ln = ln_s || (l_has && ray_vec_is_null(lhs, i)); + bool rn = rn_s || (r_has && ray_vec_is_null(rhs, i)); + if (!ln && !rn) continue; + /* Both null */ + if (ln && rn) { + dst[i] = (opcode == OP_EQ || opcode == OP_LE || opcode == OP_GE) ? 1 : 0; + continue; + } + /* Left null only (null = minimum) */ + if (ln) { + dst[i] = (opcode == OP_LT || opcode == OP_LE || opcode == OP_NE) ? 1 : 0; + continue; + } + /* Right null only */ + dst[i] = (opcode == OP_GT || opcode == OP_GE || opcode == OP_NE) ? 1 : 0; + } +} + +/* Set all elements in result as null (scalar null broadcast). */ +static void set_all_null(ray_t* result, int64_t len) { + if (len > 128 && !(result->attrs & RAY_ATTR_NULLMAP_EXT)) + ray_vec_set_null(result, len - 1, false); /* force ext alloc */ + uint8_t* dbits = nullmap_bits_mut(result); + if (dbits) { + memset(dbits, 0xFF, (size_t)((len + 7) / 8)); + result->attrs |= RAY_ATTR_HAS_NULLS; + } else { + for (int64_t i = 0; i < len; i++) ray_vec_set_null(result, i, true); + } +} + +/* Propagate null bitmaps for binary ops: null in either operand → null in result. */ +static void propagate_nulls_binary(ray_t* lhs, ray_t* rhs, ray_t* result, + bool l_scalar, bool r_scalar, int64_t len) { + if (l_scalar && scalar_is_null(lhs)) { + set_all_null(result, len); + } else if (r_scalar && scalar_is_null(rhs)) { + set_all_null(result, len); + } else { + if (!l_scalar && vec_may_have_nulls(lhs)) propagate_nulls(lhs, result, len); + if (!r_scalar && vec_may_have_nulls(rhs)) propagate_nulls(rhs, result, len); + } +} + +/* ============================================================================ + * Element-wise execution + * ============================================================================ */ + +ray_t* exec_elementwise_unary(ray_graph_t* g, ray_op_t* op, ray_t* input) { + (void)g; + if (!input || RAY_IS_ERR(input)) return input; + int64_t len = input->len; + int8_t in_type = input->type; + int8_t out_type = op->out_type; + + ray_t* result = ray_vec_new(out_type, len); + if (!result || RAY_IS_ERR(result)) return result; + result->len = len; + + ray_morsel_t m; + ray_morsel_init(&m, input); + int64_t out_off = 0; + + while (ray_morsel_next(&m)) { + int64_t n = m.morsel_len; + void* dst = (char*)ray_data(result) + out_off * ray_elem_size(out_type); + + if (in_type == RAY_F64 || in_type == RAY_I64) { + for (int64_t i = 0; i < n; i++) { + if (in_type == RAY_F64) { + double v = ((double*)m.morsel_ptr)[i]; + double r; + switch (op->opcode) { + case OP_NEG: r = -v; break; + case OP_ABS: r = fabs(v); break; + case OP_SQRT: r = sqrt(v); break; + case OP_LOG: r = log(v); break; + case OP_EXP: r = exp(v); break; + case OP_CEIL: r = ceil(v); break; + case OP_FLOOR: r = floor(v); break; + case OP_ROUND: r = round(v); break; + default: r = v; break; + } + if (out_type == RAY_F64) ((double*)dst)[i] = r; + else if (out_type == RAY_I64) ((int64_t*)dst)[i] = (int64_t)r; + } else { + int64_t v = ((int64_t*)m.morsel_ptr)[i]; + if (out_type == RAY_I64) { + int64_t r; + switch (op->opcode) { + /* Unsigned negation avoids UB on INT64_MIN */ + case OP_NEG: r = (int64_t)(-(uint64_t)v); break; + case OP_ABS: r = v < 0 ? (int64_t)(-(uint64_t)v) : v; break; + default: r = v; break; + } + ((int64_t*)dst)[i] = r; + } else if (out_type == RAY_F64) { + double r; + switch (op->opcode) { + case OP_NEG: r = -(double)v; break; + case OP_SQRT: r = sqrt((double)v); break; + case OP_LOG: r = log((double)v); break; + case OP_EXP: r = exp((double)v); break; + default: r = (double)v; break; + } + ((double*)dst)[i] = r; + } else if (out_type == RAY_BOOL) { + /* ISNULL: for non-null vecs, always false */ + ((uint8_t*)dst)[i] = 0; + } + } + } + } else if (in_type == RAY_BOOL && op->opcode == OP_NOT) { + for (int64_t i = 0; i < n; i++) { + ((uint8_t*)dst)[i] = !((uint8_t*)m.morsel_ptr)[i]; + } + } else if (op->opcode == OP_CAST) { + /* CAST from narrow integer types (I32/I16/U8/BOOL) to I64/F64 */ + for (int64_t i = 0; i < n; i++) { + int64_t v = 0; + if (in_type == RAY_I32 || in_type == RAY_DATE || in_type == RAY_TIME) + v = (int64_t)((int32_t*)m.morsel_ptr)[i]; + else if (in_type == RAY_I16) + v = (int64_t)((int16_t*)m.morsel_ptr)[i]; + else if (in_type == RAY_U8 || in_type == RAY_BOOL) + v = (int64_t)((uint8_t*)m.morsel_ptr)[i]; + if (out_type == RAY_I64) ((int64_t*)dst)[i] = v; + else if (out_type == RAY_F64) ((double*)dst)[i] = (double)v; + } + } + + out_off += n; + } + + /* Propagate null bitmap from input to result. + * ISNULL is special: set output to 1 for null elements. */ + if (vec_may_have_nulls(input)) { + if (op->opcode == OP_ISNULL) { + for (int64_t i = 0; i < len; i++) { + if (ray_vec_is_null(input, i)) + ((uint8_t*)ray_data(result))[i] = 1; + } + } else { + propagate_nulls(input, result, len); + } + } + + /* OP_NEG/OP_ABS over i64: |INT64_MIN| and -INT64_MIN don't fit — surface + * as typed null (k/q convention). Loop above used unsigned wrap, so + * overflow positions land as INT64_MIN in data; convert them to null. */ + if (out_type == RAY_I64 && in_type == RAY_I64 && + (op->opcode == OP_NEG || op->opcode == OP_ABS)) + mark_i64_overflow_as_null(result, 0, len); + + return result; +} + +/* Inner loop for binary element-wise string comparison over [start, end) */ +static void binary_range_str(ray_op_t* op, ray_t* lhs, ray_t* rhs, ray_t* result, + bool l_scalar, bool r_scalar, + int64_t start, int64_t end) { + uint8_t* dst = (uint8_t*)ray_data(result) + start; + int64_t n = end - start; + uint16_t opc = op->opcode; + + const ray_str_t* l_elems = NULL; + const ray_str_t* r_elems = NULL; + const char* l_pool = NULL; + const char* r_pool = NULL; + if (!l_scalar) { str_resolve(lhs, &l_elems, &l_pool); l_elems += start; } + if (!r_scalar) { str_resolve(rhs, &r_elems, &r_pool); r_elems += start; } + + /* For scalar side, build a single ray_str_t */ + ray_str_t l_scalar_elem = {0}, r_scalar_elem = {0}; + const char* l_scalar_pool = NULL; + const char* r_scalar_pool = NULL; + if (l_scalar) { + atom_to_str_t(lhs, &l_scalar_elem, &l_scalar_pool); + l_elems = &l_scalar_elem; + } + if (r_scalar) { + atom_to_str_t(rhs, &r_scalar_elem, &r_scalar_pool); + r_elems = &r_scalar_elem; + } + + for (int64_t i = 0; i < n; i++) { + const ray_str_t* a = l_scalar ? l_elems : &l_elems[i]; + const ray_str_t* b = r_scalar ? r_elems : &r_elems[i]; + const char* pa = l_scalar ? l_scalar_pool : l_pool; + const char* pb = r_scalar ? r_scalar_pool : r_pool; + + switch (opc) { + case OP_EQ: dst[i] = ray_str_t_eq(a, pa, b, pb); break; + case OP_NE: dst[i] = !ray_str_t_eq(a, pa, b, pb); break; + case OP_LT: dst[i] = ray_str_t_cmp(a, pa, b, pb) < 0; break; + case OP_LE: dst[i] = ray_str_t_cmp(a, pa, b, pb) <= 0; break; + case OP_GT: dst[i] = ray_str_t_cmp(a, pa, b, pb) > 0; break; + case OP_GE: dst[i] = ray_str_t_cmp(a, pa, b, pb) >= 0; break; + default: dst[i] = 0; break; + } + } +} + +/* Context for parallel RAY_STR binary dispatch */ +typedef struct { + ray_op_t* op; + ray_t* lhs; + ray_t* rhs; + ray_t* result; + bool l_scalar; + bool r_scalar; +} par_binary_str_ctx_t; + +static void par_binary_str_fn(void* ctx, uint32_t worker_id, int64_t start, int64_t end) { + (void)worker_id; + par_binary_str_ctx_t* c = (par_binary_str_ctx_t*)ctx; + binary_range_str(c->op, c->lhs, c->rhs, c->result, + c->l_scalar, c->r_scalar, start, end); +} + +/* Inner loop for binary element-wise over a range [start, end) */ +static void binary_range(ray_op_t* op, int8_t out_type, + ray_t* lhs, ray_t* rhs, ray_t* result, + bool l_scalar, bool r_scalar, + double l_f64, double r_f64, + int64_t l_i64, int64_t r_i64, + int64_t start, int64_t end) { + uint8_t out_esz = ray_elem_size(out_type); + void* dst = (char*)ray_data(result) + start * out_esz; + int64_t n = end - start; + + /* Pointers into source data at offset start */ + double* lp_f64 = NULL; int64_t* lp_i64 = NULL; uint8_t* lp_bool = NULL; + double* rp_f64 = NULL; int64_t* rp_i64 = NULL; uint8_t* rp_bool = NULL; + + int32_t* lp_i32 = NULL; uint32_t* lp_u32 = NULL; int16_t* lp_i16 = NULL; + int32_t* rp_i32 = NULL; uint32_t* rp_u32 = NULL; int16_t* rp_i16 = NULL; + + /* VLA bound of zero is UB; guarantee >=1 slot. The fill loops below + * are bounded by n so extra slots are harmless. */ + int64_t _sym_buf_n = n ? n : 1; + int64_t lsym_buf[_sym_buf_n], rsym_buf[_sym_buf_n]; /* stack VLA for narrow RAY_SYM (n<=1024) */ + if (!l_scalar) { + int64_t l_off = start; + void* l_data = resolve_vec_data(lhs, &l_off); + void* lbase = (char*)l_data + l_off * ray_sym_elem_size(lhs->type, lhs->attrs); + if (lhs->type == RAY_F64) lp_f64 = (double*)lbase; + else if (lhs->type == RAY_I64 || lhs->type == RAY_TIMESTAMP) lp_i64 = (int64_t*)lbase; + else if (RAY_IS_SYM(lhs->type)) { + uint8_t w = lhs->attrs & RAY_SYM_W_MASK; + if (w == RAY_SYM_W64) lp_i64 = (int64_t*)lbase; + else if (w == RAY_SYM_W32) lp_u32 = (uint32_t*)lbase; + else { for (int64_t j = 0; j < n; j++) lsym_buf[j] = ray_read_sym(l_data, l_off+j, lhs->type, lhs->attrs); lp_i64 = lsym_buf; } + } + else if (lhs->type == RAY_I32 || lhs->type == RAY_DATE || lhs->type == RAY_TIME) lp_i32 = (int32_t*)lbase; + else if (lhs->type == RAY_I16) lp_i16 = (int16_t*)lbase; + else if (lhs->type == RAY_BOOL || lhs->type == RAY_U8) lp_bool = (uint8_t*)lbase; + } + if (!r_scalar) { + int64_t r_off = start; + void* r_data = resolve_vec_data(rhs, &r_off); + void* rbase = (char*)r_data + r_off * ray_sym_elem_size(rhs->type, rhs->attrs); + if (rhs->type == RAY_F64) rp_f64 = (double*)rbase; + else if (rhs->type == RAY_I64 || rhs->type == RAY_TIMESTAMP) rp_i64 = (int64_t*)rbase; + else if (RAY_IS_SYM(rhs->type)) { + uint8_t w = rhs->attrs & RAY_SYM_W_MASK; + if (w == RAY_SYM_W64) rp_i64 = (int64_t*)rbase; + else if (w == RAY_SYM_W32) rp_u32 = (uint32_t*)rbase; + else { for (int64_t j = 0; j < n; j++) rsym_buf[j] = ray_read_sym(r_data, r_off+j, rhs->type, rhs->attrs); rp_i64 = rsym_buf; } + } + else if (rhs->type == RAY_I32 || rhs->type == RAY_DATE || rhs->type == RAY_TIME) rp_i32 = (int32_t*)rbase; + else if (rhs->type == RAY_I16) rp_i16 = (int16_t*)rbase; + else if (rhs->type == RAY_BOOL || rhs->type == RAY_U8) rp_bool = (uint8_t*)rbase; + } + + for (int64_t i = 0; i < n; i++) { + double lv, rv; + if (lp_f64) lv = lp_f64[i]; + else if (lp_i64) lv = (double)lp_i64[i]; + else if (lp_i32) lv = (double)lp_i32[i]; + else if (lp_u32) lv = (double)lp_u32[i]; + else if (lp_i16) lv = (double)lp_i16[i]; + else if (lp_bool) lv = (double)lp_bool[i]; + else if (l_scalar && (lhs->type == -RAY_F64 || lhs->type == RAY_F64)) lv = l_f64; + else lv = (double)l_i64; + + if (rp_f64) rv = rp_f64[i]; + else if (rp_i64) rv = (double)rp_i64[i]; + else if (rp_i32) rv = (double)rp_i32[i]; + else if (rp_u32) rv = (double)rp_u32[i]; + else if (rp_i16) rv = (double)rp_i16[i]; + else if (rp_bool) rv = (double)rp_bool[i]; + else if (r_scalar && (rhs->type == -RAY_F64 || rhs->type == RAY_F64)) rv = r_f64; + else rv = (double)r_i64; + + if (out_type == RAY_F64) { + double r; + switch (op->opcode) { + case OP_ADD: r = lv + rv; break; + case OP_SUB: r = lv - rv; break; + case OP_MUL: r = lv * rv; break; + case OP_DIV: r = rv != 0.0 ? lv / rv : NAN; break; + case OP_MOD: { if (rv != 0.0) { r = fmod(lv, rv); if (r && ((r > 0) != (rv > 0))) r += rv; } else { r = NAN; } } break; + case OP_MIN2: r = lv < rv ? lv : rv; break; + case OP_MAX2: r = lv > rv ? lv : rv; break; + default: r = 0.0; break; + } + ((double*)dst)[i] = r; + } else if (out_type == RAY_I64 || out_type == RAY_TIMESTAMP) { + int64_t li = (int64_t)lv, ri = (int64_t)rv; + int64_t r; + switch (op->opcode) { + case OP_ADD: r = (int64_t)((uint64_t)li + (uint64_t)ri); break; + case OP_SUB: r = (int64_t)((uint64_t)li - (uint64_t)ri); break; + case OP_MUL: r = (int64_t)((uint64_t)li * (uint64_t)ri); break; + case OP_DIV: + if (ri==0 || (ri==-1 && li==((int64_t)1<<63))) { r = 0; } + else { r = li/ri; if ((li^ri)<0 && r*ri!=li) r--; } + break; + case OP_MOD: + if (ri==0 || (ri==-1 && li==((int64_t)1<<63))) { r = 0; } + else { r = li%ri; if (r && (r^ri)<0) r+=ri; } + break; + case OP_MIN2: r = li < ri ? li : ri; break; + case OP_MAX2: r = li > ri ? li : ri; break; + default: r = 0; break; + } + ((int64_t*)dst)[i] = r; + } else if (out_type == RAY_I32 || out_type == RAY_DATE || out_type == RAY_TIME) { + int32_t li = (int32_t)lv, ri = (int32_t)rv; + int32_t r; + switch (op->opcode) { + case OP_ADD: r = (int32_t)((uint32_t)li + (uint32_t)ri); break; + case OP_SUB: r = (int32_t)((uint32_t)li - (uint32_t)ri); break; + case OP_MUL: r = (int32_t)((uint32_t)li * (uint32_t)ri); break; + case OP_DIV: + if (ri==0 || (ri==-1 && li==((int32_t)1<<31))) { r = 0; } + else { r = li/ri; if ((li^ri)<0 && r*ri!=li) r--; } + break; + case OP_MOD: + if (ri==0 || (ri==-1 && li==((int32_t)1<<31))) { r = 0; } + else { r = li%ri; if (r && (r^ri)<0) r+=ri; } + break; + case OP_MIN2: r = li < ri ? li : ri; break; + case OP_MAX2: r = li > ri ? li : ri; break; + default: r = 0; break; + } + ((int32_t*)dst)[i] = r; + } else if (out_type == RAY_I16) { + int16_t li = (int16_t)lv, ri = (int16_t)rv; + int16_t r; + switch (op->opcode) { + case OP_ADD: r = (int16_t)((uint16_t)li + (uint16_t)ri); break; + case OP_SUB: r = (int16_t)((uint16_t)li - (uint16_t)ri); break; + case OP_MUL: r = (int16_t)((uint16_t)li * (uint16_t)ri); break; + case OP_DIV: r = ri ? li / ri : 0; break; + case OP_MOD: r = ri ? li % ri : 0; break; + case OP_MIN2: r = li < ri ? li : ri; break; + case OP_MAX2: r = li > ri ? li : ri; break; + default: r = 0; break; + } + ((int16_t*)dst)[i] = r; + } else if (out_type == RAY_U8) { + uint8_t li = (uint8_t)lv, ri = (uint8_t)rv; + uint8_t r; + switch (op->opcode) { + case OP_ADD: r = li + ri; break; + case OP_SUB: r = li - ri; break; + case OP_MUL: r = li * ri; break; + case OP_DIV: r = ri ? li / ri : 0; break; + case OP_MOD: r = ri ? li % ri : 0; break; + case OP_MIN2: r = li < ri ? li : ri; break; + case OP_MAX2: r = li > ri ? li : ri; break; + default: r = 0; break; + } + ((uint8_t*)dst)[i] = r; + } else if (out_type == RAY_BOOL) { + /* Read raw I64 values directly for null-aware comparison + * when both operands are I64/I32-family (not F64). */ + int src_is_i64 = (lp_i64 || lp_i32 || lp_u32 || lp_i16 || + (l_scalar && lhs->type != -RAY_F64 && lhs->type != RAY_F64)) && + (rp_i64 || rp_i32 || rp_u32 || rp_i16 || + (r_scalar && rhs->type != -RAY_F64 && rhs->type != RAY_F64)); + int64_t li64 = (int64_t)lv, ri64 = (int64_t)rv; + uint8_t r; + if (src_is_i64) { + /* No sentinel nulls — fix_null_comparisons handles null positions */ + switch (op->opcode) { + case OP_EQ: r = li64==ri64; break; + case OP_NE: r = li64!=ri64; break; + case OP_LT: r = li64ri64; break; + case OP_GE: r = li64>=ri64; break; + case OP_AND: r = (uint8_t)lv && (uint8_t)rv; break; + case OP_OR: r = (uint8_t)lv || (uint8_t)rv; break; + default: r = 0; break; + } + } else { + /* Null-aware F64 comparisons: NaN is null sentinel */ + int ln = (lv != lv), rn = (rv != rv); /* NaN check */ + switch (op->opcode) { + case OP_EQ: r = (ln&&rn) ? 1 : (ln||rn) ? 0 : lv==rv; break; + case OP_NE: r = (ln&&rn) ? 0 : (ln||rn) ? 1 : lv!=rv; break; + case OP_LT: r = (ln&&rn) ? 0 : ln ? 1 : rn ? 0 : lvrv; break; + case OP_GE: r = (ln&&rn) ? 1 : rn ? 1 : ln ? 0 : lv>=rv; break; + case OP_AND: r = (uint8_t)lv && (uint8_t)rv; break; + case OP_OR: r = (uint8_t)lv || (uint8_t)rv; break; + default: r = 0; break; + } + } + ((uint8_t*)dst)[i] = r; + } + } +} + +/* Context for parallel binary dispatch */ +typedef struct { + ray_op_t* op; + int8_t out_type; + ray_t* lhs; + ray_t* rhs; + ray_t* result; + bool l_scalar; + bool r_scalar; + double l_f64, r_f64; + int64_t l_i64, r_i64; +} par_binary_ctx_t; + +static void par_binary_fn(void* ctx, uint32_t worker_id, int64_t start, int64_t end) { + (void)worker_id; + par_binary_ctx_t* c = (par_binary_ctx_t*)ctx; + binary_range(c->op, c->out_type, c->lhs, c->rhs, c->result, + c->l_scalar, c->r_scalar, + c->l_f64, c->r_f64, c->l_i64, c->r_i64, + start, end); +} + +ray_t* exec_elementwise_binary(ray_graph_t* g, ray_op_t* op, ray_t* lhs, ray_t* rhs) { + (void)g; + if (!lhs || RAY_IS_ERR(lhs)) return lhs; + if (!rhs || RAY_IS_ERR(rhs)) return rhs; + + bool l_scalar = ray_is_atom(lhs) || (lhs->type > 0 && lhs->len == 1); + bool r_scalar = ray_is_atom(rhs) || (rhs->type > 0 && rhs->len == 1); + + int64_t len = 1; + if (!l_scalar && !r_scalar) { + if (lhs->len != rhs->len) return ray_error("length", NULL); + len = lhs->len; + } else if (l_scalar && !r_scalar) { + len = rhs->len; + } else if (!l_scalar && r_scalar) { + len = lhs->len; + } + + int8_t out_type = op->out_type; + ray_t* result = ray_vec_new(out_type, len); + if (!result || RAY_IS_ERR(result)) return result; + result->len = len; + + /* RAY_STR comparison: use ray_str_t_eq / ray_str_t_cmp directly. + Handles RAY_STR column vs RAY_STR column, or -RAY_STR scalar vs RAY_STR column. */ + { + bool l_is_str = (!l_scalar && lhs->type == RAY_STR); + bool r_is_str = (!r_scalar && rhs->type == RAY_STR); + bool l_atom_str = (l_scalar && (lhs->type == -RAY_STR + || lhs->type == RAY_STR + || (RAY_IS_SYM(lhs->type) && ray_is_atom(lhs)))); + bool r_atom_str = (r_scalar && (rhs->type == -RAY_STR + || rhs->type == RAY_STR + || (RAY_IS_SYM(rhs->type) && ray_is_atom(rhs)))); + + if (l_is_str || r_is_str || (l_atom_str && r_atom_str)) { + /* RAY_STR only supports comparison ops — reject arithmetic */ + uint16_t opc = op->opcode; + if (opc < OP_EQ || opc > OP_GE) { ray_release(result); return ray_error("type", NULL); } + /* At least one side is a RAY_STR column — use string comparison path. + The scalar side (if any) must be -RAY_STR or RAY_SYM atom. + The non-scalar side must be RAY_STR. */ + if (l_scalar && !l_atom_str) { ray_release(result); return ray_error("type", NULL); } + if (r_scalar && !r_atom_str) { ray_release(result); return ray_error("type", NULL); } + if (!l_scalar && !l_is_str) { ray_release(result); return ray_error("type", NULL); } + if (!r_scalar && !r_is_str) { ray_release(result); return ray_error("type", NULL); } + + ray_pool_t* pool = ray_pool_get(); + if (pool && len >= RAY_PARALLEL_THRESHOLD) { + par_binary_str_ctx_t ctx = { + .op = op, .lhs = lhs, .rhs = rhs, .result = result, + .l_scalar = l_scalar, .r_scalar = r_scalar, + }; + ray_pool_dispatch(pool, par_binary_str_fn, &ctx, len); + fix_null_comparisons(lhs, rhs, result, l_scalar, r_scalar, len, op->opcode); + return result; + } + binary_range_str(op, lhs, rhs, result, l_scalar, r_scalar, 0, len); + fix_null_comparisons(lhs, rhs, result, l_scalar, r_scalar, len, op->opcode); + return result; + } + } + + /* SYM vs STR comparison: resolve string constant to intern ID so we + can compare numerically against SYM intern indices. + ray_sym_find returns -1 if string not in table → no match. */ + bool str_resolved = false; + int64_t resolved_sym_id = 0; + if (r_scalar && rhs->type == -RAY_STR && + RAY_IS_SYM(lhs->type)) { + const char* s = ray_str_ptr(rhs); + size_t slen = ray_str_len(rhs); + resolved_sym_id = ray_sym_find(s, slen); + str_resolved = true; + } else if (l_scalar && lhs->type == -RAY_STR && + RAY_IS_SYM(rhs->type)) { + const char* s = ray_str_ptr(lhs); + size_t slen = ray_str_len(lhs); + resolved_sym_id = ray_sym_find(s, slen); + str_resolved = true; + } + + double l_f64_val = 0, r_f64_val = 0; + int64_t l_i64_val = 0, r_i64_val = 0; + if (l_scalar) { + if (str_resolved && lhs->type == -RAY_STR) + l_i64_val = resolved_sym_id; + else if (ray_is_atom(lhs)) { + if (lhs->type == -RAY_F64) l_f64_val = lhs->f64; + else if (lhs->type == -RAY_I32 || lhs->type == -RAY_DATE || lhs->type == -RAY_TIME) + l_i64_val = (int64_t)lhs->i32; + else if (lhs->type == -RAY_I16) l_i64_val = (int64_t)lhs->i16; + else if (lhs->type == -RAY_U8 || lhs->type == -RAY_BOOL) l_i64_val = (int64_t)lhs->u8; + else l_i64_val = lhs->i64; + } else { + int8_t t = lhs->type; + int64_t elem = 0; + void* data = resolve_vec_data(lhs, &elem); + if (t == RAY_F64) l_f64_val = ((double*)data)[elem]; + else l_i64_val = read_col_i64(data, elem, t, lhs->attrs); + } + } + if (r_scalar) { + if (str_resolved && rhs->type == -RAY_STR) + r_i64_val = resolved_sym_id; + else if (ray_is_atom(rhs)) { + if (rhs->type == -RAY_F64) r_f64_val = rhs->f64; + else if (rhs->type == -RAY_I32 || rhs->type == -RAY_DATE || rhs->type == -RAY_TIME) + r_i64_val = (int64_t)rhs->i32; + else if (rhs->type == -RAY_I16) r_i64_val = (int64_t)rhs->i16; + else if (rhs->type == -RAY_U8 || rhs->type == -RAY_BOOL) r_i64_val = (int64_t)rhs->u8; + else r_i64_val = rhs->i64; + } else { + int8_t t = rhs->type; + int64_t elem = 0; + void* data = resolve_vec_data(rhs, &elem); + if (t == RAY_F64) r_f64_val = ((double*)data)[elem]; + else r_i64_val = read_col_i64(data, elem, t, rhs->attrs); + } + } + + ray_pool_t* pool = ray_pool_get(); + if (pool && len >= RAY_PARALLEL_THRESHOLD) { + par_binary_ctx_t ctx = { + .op = op, .out_type = out_type, + .lhs = lhs, .rhs = rhs, .result = result, + .l_scalar = l_scalar, .r_scalar = r_scalar, + .l_f64 = l_f64_val, .r_f64 = r_f64_val, + .l_i64 = l_i64_val, .r_i64 = r_i64_val, + }; + ray_pool_dispatch(pool, par_binary_fn, &ctx, len); + } else { + binary_range(op, out_type, lhs, rhs, result, + l_scalar, r_scalar, + l_f64_val, r_f64_val, l_i64_val, r_i64_val, + 0, len); + } + + /* Null propagation from inputs */ + if (op_propagates_null(op->opcode)) + propagate_nulls_binary(lhs, rhs, result, l_scalar, r_scalar, len); + else + fix_null_comparisons(lhs, rhs, result, l_scalar, r_scalar, len, op->opcode); + + /* Div/mod: mark zero-divisor positions as null. + * The morsel loop writes 0 for b==0 but can't set bitmap nulls. */ + uint16_t opc = op->opcode; + if (opc == OP_DIV || opc == OP_MOD) { + if (!r_scalar) { + int8_t rt = rhs->type; + if (rt == RAY_I64 || rt == RAY_TIMESTAMP) { + const int64_t* b = (const int64_t*)ray_data(rhs); + for (int64_t i = 0; i < len; i++) + if (b[i] == 0) ray_vec_set_null(result, i, true); + } else if (rt == RAY_I32 || rt == RAY_DATE || rt == RAY_TIME) { + const int32_t* b = (const int32_t*)ray_data(rhs); + for (int64_t i = 0; i < len; i++) + if (b[i] == 0) ray_vec_set_null(result, i, true); + } + /* F64 div-by-zero produces NaN which is handled by propagate_nulls */ + } else { + /* Scalar divisor: check for zero using the correct type */ + bool is_zero = false; + if (rhs->type == -RAY_F64 || rhs->type == RAY_F64) + is_zero = (r_f64_val == 0.0); + else + is_zero = (r_i64_val == 0); + if (is_zero) { + for (int64_t i = 0; i < len; i++) + ray_vec_set_null(result, i, true); + } + } + } + + return result; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/filter.c b/crates/rayforce-sys/vendor/rayforce/src/ops/filter.c new file mode 100644 index 0000000..c7ba85c --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/filter.c @@ -0,0 +1,685 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ops/internal.h" +#include "ops/rowsel.h" + +/* ============================================================================ + * Filter execution — extracted from exec.c + * ============================================================================ */ + +/* Gather from a parted column using global row indices (sorted ascending). + * Walks match_idx with an advancing segment cursor — O(count + n_segs). */ +static void parted_gather_col(ray_t* parted_col, const int64_t* match_idx, + int64_t count, ray_t* dst_col) { + int64_t n_segs = parted_col->len; + if (n_segs == 0) return; /* zero-length VLA is UB in C17 */ + ray_t** segs = (ray_t**)ray_data(parted_col); + int8_t base = (int8_t)RAY_PARTED_BASETYPE(parted_col->type); + uint8_t base_attrs = (base == RAY_SYM) + ? parted_first_attrs(segs, n_segs) : 0; + uint8_t esz = ray_sym_elem_size(base, base_attrs); + char* dst = (char*)ray_data(dst_col); + memset(dst, 0, (size_t)count * esz); + + /* Build prefix-sum segment end table */ + int64_t seg_ends[n_segs]; + int64_t cumul = 0; + for (int64_t i = 0; i < n_segs; i++) { + cumul += segs[i] ? segs[i]->len : 0; + seg_ends[i] = cumul; + } + + /* Walk match_idx (sorted ascending) with advancing segment cursor */ + int64_t seg = 0; + for (int64_t i = 0; i < count; i++) { + int64_t row = match_idx[i]; + while (seg < n_segs - 1 && row >= seg_ends[seg]) seg++; + if (!segs[seg] || !parted_seg_esz_ok(segs[seg], base, esz)) + continue; /* NULL or width-mismatch — skip (zero-fill from vec_new) */ + int64_t seg_start = (seg > 0) ? seg_ends[seg - 1] : 0; + int64_t local_row = row - seg_start; + char* src = (char*)ray_data(segs[seg]); + memcpy(dst + i * esz, src + local_row * esz, esz); + if ((segs[seg]->attrs & RAY_ATTR_HAS_NULLS) && + ray_vec_is_null(segs[seg], local_row)) + ray_vec_set_null(dst_col, i, true); + } +} + +/* Filter a single vector by boolean predicate. */ +static ray_t* exec_filter_vec(ray_t* input, ray_t* pred, int64_t pass_count) { + uint8_t esz = col_esz(input); + ray_t* result = col_vec_new(input, pass_count); + if (!result || RAY_IS_ERR(result)) return result; + result->len = pass_count; + + ray_morsel_t mi, mf; + ray_morsel_init(&mi, input); + ray_morsel_init(&mf, pred); + int64_t out_idx = 0; + + if (input->len != pred->len) { ray_release(result); return ray_error("length", NULL); } + + while (ray_morsel_next(&mi) && ray_morsel_next(&mf)) { + uint8_t* bits = (uint8_t*)mf.morsel_ptr; + char* src = (char*)mi.morsel_ptr; + char* dst = (char*)ray_data(result); + for (int64_t i = 0; i < mi.morsel_len; i++) { + if (bits[i]) { + memcpy(dst + out_idx * esz, src + i * esz, esz); + out_idx++; + } + } + } + + col_propagate_str_pool(result, input); + col_propagate_nulls_filter(result, input, + (const uint8_t*)ray_data(pred), input->len); + return result; +} + +/* Filter a parted column by boolean predicate (sequential). */ +static ray_t* exec_filter_parted_vec(ray_t* parted_col, ray_t* pred, + int64_t pass_count) { + int8_t base = (int8_t)RAY_PARTED_BASETYPE(parted_col->type); + ray_t** segs = (ray_t**)ray_data(parted_col); + int64_t n_segs = parted_col->len; + uint8_t* pred_data = (uint8_t*)ray_data(pred); + + /* RAY_STR: deep-copy to handle multi-pool segments */ + if (base == RAY_STR) { + ray_t* result = ray_vec_new(RAY_STR, pass_count); + if (!result || RAY_IS_ERR(result)) return result; + int64_t pred_off = 0; + for (int64_t s = 0; s < n_segs; s++) { + if (!segs[s]) continue; + int64_t seg_len = segs[s]->len; + const char* pool_base = segs[s]->str_pool + ? (const char*)ray_data(segs[s]->str_pool) : NULL; + for (int64_t i = 0; i < seg_len; i++) { + if (pred_data[pred_off + i]) { + result = parted_str_append_elem(result, segs[s], i, pool_base); + if (RAY_IS_ERR(result)) return result; + } + } + pred_off += seg_len; + } + return result; + } + + uint8_t base_attrs = (base == RAY_SYM) + ? parted_first_attrs(segs, n_segs) : 0; + uint8_t esz = ray_sym_elem_size(base, base_attrs); + ray_t* result = typed_vec_new(base, base_attrs, pass_count); + if (!result || RAY_IS_ERR(result)) return result; + result->len = pass_count; + + int64_t out_idx = 0; + int64_t pred_off = 0; + + for (int64_t s = 0; s < n_segs; s++) { + if (!segs[s]) continue; + int64_t seg_len = segs[s]->len; + if (!parted_seg_esz_ok(segs[s], base, esz)) { + char* dst = (char*)ray_data(result); + for (int64_t i = 0; i < seg_len; i++) { + if (pred_data[pred_off + i]) { + memset(dst + out_idx * esz, 0, esz); + out_idx++; + } + } + pred_off += seg_len; + continue; + } + char* src = (char*)ray_data(segs[s]); + char* dst = (char*)ray_data(result); + bool seg_has_nulls = (segs[s]->attrs & RAY_ATTR_HAS_NULLS) != 0; + for (int64_t i = 0; i < seg_len; i++) { + if (pred_data[pred_off + i]) { + memcpy(dst + out_idx * esz, src + i * esz, esz); + if (seg_has_nulls && ray_vec_is_null(segs[s], i)) + ray_vec_set_null(result, out_idx, true); + out_idx++; + } + } + pred_off += seg_len; + } + return result; +} + +/* Sequential table filter fallback (small tables or alloc failure). */ +static ray_t* exec_filter_seq(ray_t* input, ray_t* pred, int64_t ncols, + int64_t pass_count) { + ray_t* tbl = ray_table_new(ncols); + if (!tbl || RAY_IS_ERR(tbl)) return tbl; + for (int64_t c = 0; c < ncols; c++) { + ray_t* col = ray_table_get_col_idx(input, c); + if (!col || RAY_IS_ERR(col)) continue; + int64_t name_id = ray_table_col_name(input, c); + if (col->type == RAY_MAPCOMMON) { + ray_t* mc_filt = materialize_mapcommon_filter(col, pred, pass_count); + if (!mc_filt || RAY_IS_ERR(mc_filt)) { ray_release(tbl); return mc_filt; } + tbl = ray_table_add_col(tbl, name_id, mc_filt); + ray_release(mc_filt); + continue; + } + ray_t* filtered; + if (RAY_IS_PARTED(col->type)) + filtered = exec_filter_parted_vec(col, pred, pass_count); + else + filtered = exec_filter_vec(col, pred, pass_count); + if (!filtered || RAY_IS_ERR(filtered)) { ray_release(tbl); return filtered; } + tbl = ray_table_add_col(tbl, name_id, filtered); + ray_release(filtered); + } + return tbl; +} + +ray_t* exec_filter(ray_graph_t* g, ray_op_t* op, ray_t* input, ray_t* pred) { + (void)g; + (void)op; + if (!input || RAY_IS_ERR(input)) return input; + if (!pred || RAY_IS_ERR(pred)) return pred; + + /* Count passing elements — single sequential scan over predicate */ + int64_t pass_count = 0; + { + ray_morsel_t mp; + ray_morsel_init(&mp, pred); + while (ray_morsel_next(&mp)) { + uint8_t* bits = (uint8_t*)mp.morsel_ptr; + for (int64_t i = 0; i < mp.morsel_len; i++) + if (bits[i]) pass_count++; + } + } + + /* Vector filter — single column, use sequential path */ + if (input->type != RAY_TABLE) + return exec_filter_vec(input, pred, pass_count); + + /* table filter: parallel gather using compact match index */ + int64_t ncols = ray_table_ncols(input); + int64_t nrows = ray_table_nrows(input); + + /* Fall back to sequential for tiny inputs or degenerate tables */ + if (nrows <= RAY_PARALLEL_THRESHOLD || ncols <= 0) + return exec_filter_seq(input, pred, ncols, pass_count); + + /* VLA guard: cap at 256 columns for stack safety (256*16 = 4KB). + * Wider tables fall back to sequential filter. */ + if (ncols > 256) return exec_filter_seq(input, pred, ncols, pass_count); + + /* Build match_idx: match_idx[j] = row of j-th matching element */ + ray_t* idx_hdr = NULL; + int64_t* match_idx = (int64_t*)scratch_alloc(&idx_hdr, + (size_t)pass_count * sizeof(int64_t)); + if (!match_idx) + return exec_filter_seq(input, pred, ncols, pass_count); + + { + int64_t j = 0; + ray_morsel_t mp; + ray_morsel_init(&mp, pred); + int64_t row_base = 0; + while (ray_morsel_next(&mp)) { + uint8_t* bits = (uint8_t*)mp.morsel_ptr; + for (int64_t i = 0; i < mp.morsel_len; i++) + if (bits[i]) match_idx[j++] = row_base + i; + row_base += mp.morsel_len; + } + } + + /* Parallel gather — same pattern as sort gather */ + ray_pool_t* pool = ray_pool_get(); + ray_t* tbl = ray_table_new(ncols); + if (!tbl || RAY_IS_ERR(tbl)) { scratch_free(idx_hdr); return tbl; } + + /* Pre-allocate output columns */ + ray_t* new_cols[ncols]; + int64_t col_names[ncols]; + int64_t valid_ncols = 0; + + bool has_parted_cols = false; + for (int64_t c = 0; c < ncols; c++) { + ray_t* col = ray_table_get_col_idx(input, c); + col_names[c] = ray_table_col_name(input, c); + if (!col || RAY_IS_ERR(col)) { new_cols[c] = NULL; continue; } + if (col->type == RAY_MAPCOMMON) { + /* Materialize MAPCOMMON through filter predicate */ + new_cols[c] = materialize_mapcommon_filter(col, pred, pass_count); + if (new_cols[c] && !RAY_IS_ERR(new_cols[c])) valid_ncols++; + else new_cols[c] = NULL; + continue; + } + int8_t out_type = RAY_IS_PARTED(col->type) + ? (int8_t)RAY_PARTED_BASETYPE(col->type) + : col->type; + uint8_t out_attrs = 0; + if (out_type == RAY_SYM) { + if (RAY_IS_PARTED(col->type)) { + ray_t** sp = (ray_t**)ray_data(col); + out_attrs = parted_first_attrs(sp, col->len); + } else { + out_attrs = col->attrs; + } + } + if (RAY_IS_PARTED(col->type)) has_parted_cols = true; + ray_t* nc = typed_vec_new(out_type, out_attrs, pass_count); + if (!nc || RAY_IS_ERR(nc)) { new_cols[c] = NULL; continue; } + nc->len = pass_count; + new_cols[c] = nc; + valid_ncols++; + } + + if (has_parted_cols) { + /* Parted-aware gather: use parted_gather_col for parted columns, + * sequential flat gather for non-parted columns */ + for (int64_t c = 0; c < ncols; c++) { + ray_t* col = ray_table_get_col_idx(input, c); + if (!col || !new_cols[c]) continue; + if (col->type == RAY_MAPCOMMON) continue; /* already materialized */ + if (RAY_IS_PARTED(col->type)) { + int8_t pbase = (int8_t)RAY_PARTED_BASETYPE(col->type); + if (pbase == RAY_STR) { + ray_t** psegs = (ray_t**)ray_data(col); + ray_release(new_cols[c]); + new_cols[c] = parted_gather_str_rows(psegs, col->len, + match_idx, pass_count); + } else { + parted_gather_col(col, match_idx, pass_count, new_cols[c]); + } + } else { + uint8_t esz = col_esz(col); + char* src = (char*)ray_data(col); + char* dst = (char*)ray_data(new_cols[c]); + for (int64_t i = 0; i < pass_count; i++) + memcpy(dst + i * esz, src + match_idx[i] * esz, esz); + } + } + } else if (pool && valid_ncols > 0 && valid_ncols <= MGATHER_MAX_COLS) { + /* Fused multi-column gather */ + multi_gather_ctx_t mgctx = { .idx = match_idx, .ncols = 0 }; + for (int64_t c = 0; c < ncols; c++) { + if (!new_cols[c]) continue; + ray_t* col = ray_table_get_col_idx(input, c); + if (col && col->type == RAY_MAPCOMMON) continue; /* already materialized */ + int64_t ci = mgctx.ncols; + mgctx.srcs[ci] = (char*)ray_data(col); + mgctx.dsts[ci] = (char*)ray_data(new_cols[c]); + mgctx.esz[ci] = col_esz(col); + mgctx.ncols++; + } + ray_pool_dispatch(pool, multi_gather_fn, &mgctx, pass_count); + } else if (pool) { + /* Per-column parallel gather */ + for (int64_t c = 0; c < ncols; c++) { + ray_t* col = ray_table_get_col_idx(input, c); + if (!col || !new_cols[c]) continue; + gather_ctx_t gctx = { + .idx = match_idx, .src_col = col, .dst_col = new_cols[c], + .esz = col_esz(col), .nullable = false, + }; + ray_pool_dispatch(pool, gather_fn, &gctx, pass_count); + } + } else { + /* Sequential gather with index */ + for (int64_t c = 0; c < ncols; c++) { + ray_t* col = ray_table_get_col_idx(input, c); + if (!col || !new_cols[c]) continue; + uint8_t esz = col_esz(col); + char* src = (char*)ray_data(col); + char* dst = (char*)ray_data(new_cols[c]); + for (int64_t i = 0; i < pass_count; i++) + memcpy(dst + i * esz, src + match_idx[i] * esz, esz); + } + } + + /* Propagate str_pool for any RAY_STR columns gathered by index */ + /* Propagate str_pool for non-STR parted and flat columns. + * STR parted columns were already deep-copied with their own pool. */ + for (int64_t c = 0; c < ncols; c++) { + if (!new_cols[c]) continue; + ray_t* col = ray_table_get_col_idx(input, c); + if (!col) continue; + if (RAY_IS_PARTED(col->type)) { + int8_t pb = (int8_t)RAY_PARTED_BASETYPE(col->type); + if (pb != RAY_STR) { + ray_t** sp = (ray_t**)ray_data(col); + col_propagate_str_pool_parted(new_cols[c], sp, col->len); + } + } else { + col_propagate_str_pool(new_cols[c], col); + } + } + + for (int64_t c = 0; c < ncols; c++) { + if (!new_cols[c]) continue; + tbl = ray_table_add_col(tbl, col_names[c], new_cols[c]); + ray_release(new_cols[c]); + } + + scratch_free(idx_hdr); + return tbl; +} + +/* ============================================================================ + * exec_filter_head — filter table, keeping only the first `limit` matches + * + * Scans the predicate sequentially, collecting matching row indices and + * stopping as soon as `limit` matches are found. Only those rows are + * gathered into the result table, avoiding full-table gather when the + * number of matches far exceeds the limit. + * ============================================================================ */ +ray_t* exec_filter_head(ray_t* input, ray_t* pred, int64_t limit) { + if (!input || RAY_IS_ERR(input)) return input; + if (!pred || RAY_IS_ERR(pred)) return pred; + if (input->type != RAY_TABLE || pred->type != RAY_BOOL) return input; + + int64_t ncols = ray_table_ncols(input); + int64_t nrows = ray_table_nrows(input); + if (limit <= 0 || ncols <= 0) return ray_table_new(0); + if (limit > nrows) limit = nrows; + + /* VLA guard */ + if (ncols > 256) return ray_error("limit", "table exceeds 256 columns"); + + /* Collect up to `limit` matching row indices, stopping early */ + ray_t* idx_hdr = NULL; + int64_t* match_idx = (int64_t*)scratch_alloc(&idx_hdr, + (size_t)limit * sizeof(int64_t)); + if (!match_idx) return ray_error("oom", NULL); + + int64_t found = 0; + { + ray_morsel_t mp; + ray_morsel_init(&mp, pred); + int64_t row_base = 0; + while (ray_morsel_next(&mp) && found < limit) { + uint8_t* bits = (uint8_t*)mp.morsel_ptr; + for (int64_t i = 0; i < mp.morsel_len && found < limit; i++) + if (bits[i]) match_idx[found++] = row_base + i; + row_base += mp.morsel_len; + } + } + + /* Build result table with gathered rows */ + ray_t* tbl = ray_table_new(ncols); + if (!tbl || RAY_IS_ERR(tbl)) { scratch_free(idx_hdr); return tbl; } + + for (int64_t c = 0; c < ncols; c++) { + ray_t* col = ray_table_get_col_idx(input, c); + int64_t name_id = ray_table_col_name(input, c); + if (!col) continue; + int8_t out_type = RAY_IS_PARTED(col->type) + ? (int8_t)RAY_PARTED_BASETYPE(col->type) : col->type; + if (out_type == RAY_MAPCOMMON) continue; + uint8_t out_attrs = 0; + if (out_type == RAY_SYM) { + if (RAY_IS_PARTED(col->type)) { + ray_t** sp = (ray_t**)ray_data(col); + out_attrs = parted_first_attrs(sp, col->len); + } else out_attrs = col->attrs; + } + uint8_t esz = ray_sym_elem_size(out_type, out_attrs); + ray_t* new_col = typed_vec_new(out_type, out_attrs, found); + if (!new_col || RAY_IS_ERR(new_col)) continue; + new_col->len = found; + char* dst = (char*)ray_data(new_col); + memset(dst, 0, (size_t)found * esz); + + if (RAY_IS_PARTED(col->type)) { + ray_t** segs = (ray_t**)ray_data(col); + int64_t n_segs = col->len; + if (out_type == RAY_STR) { + /* Deep-copy STR to handle multi-pool segments */ + ray_release(new_col); + new_col = parted_gather_str_rows(segs, n_segs, match_idx, found); + } else { + /* Non-STR parted gather */ + int64_t seg_start = 0; + int64_t cur_seg = 0; + int64_t cur_seg_end = (n_segs > 0 && segs[0]) ? segs[0]->len : 0; + for (int64_t j = 0; j < found; j++) { + int64_t r = match_idx[j]; + while (cur_seg < n_segs - 1 && r >= cur_seg_end) { + seg_start = cur_seg_end; + cur_seg++; + cur_seg_end += segs[cur_seg] ? segs[cur_seg]->len : 0; + } + if (!segs[cur_seg] || !parted_seg_esz_ok(segs[cur_seg], out_type, esz)) + continue; + char* src = (char*)ray_data(segs[cur_seg]); + memcpy(dst + j * esz, src + (r - seg_start) * esz, esz); + } + } + } else { + char* src = (char*)ray_data(col); + for (int64_t j = 0; j < found; j++) + memcpy(dst + j * esz, src + match_idx[j] * esz, esz); + col_propagate_str_pool(new_col, col); + } + tbl = ray_table_add_col(tbl, name_id, new_col); + ray_release(new_col); + } + + scratch_free(idx_hdr); + return tbl; +} + +/* ============================================================================ + * sel_compact — materialize a table by applying a RAY_SEL bitmap + * + * Used at boundary ops (sort/join/window) that need dense contiguous data. + * Reuses the same parallel multi-column gather as exec_filter. + * ============================================================================ */ + +ray_t* sel_compact(ray_graph_t* g, ray_t* tbl, ray_t* sel) { + (void)g; + if (!tbl || RAY_IS_ERR(tbl) || !sel) return tbl; + + int64_t nrows = ray_table_nrows(tbl); + ray_rowsel_t* meta = ray_rowsel_meta(sel); + + /* Defensive: the selection must have been built for a table + * with this exact row count. Mismatch means the caller passed + * a stale selection — aborting here is strictly safer than + * silently gathering via out-of-range indices. */ + if (meta->nrows != nrows) + return ray_error("domain", + "sel_compact: selection nrows mismatch (sel=%lld tbl=%lld)", + (long long)meta->nrows, (long long)nrows); + + int64_t pass_count = meta->total_pass; + + /* All-pass: nothing to compact. (In practice this path is + * unreachable because ray_rowsel_from_pred returns NULL for + * all-pass; the caller skips sel_compact in that case. + * Handled here for safety.) */ + if (pass_count == nrows) { ray_retain(tbl); return tbl; } + + /* None-pass: return empty table with same schema */ + if (pass_count == 0) { + int64_t ncols = ray_table_ncols(tbl); + ray_t* empty = ray_table_new(ncols); + if (!empty || RAY_IS_ERR(empty)) return empty; + for (int64_t c = 0; c < ncols; c++) { + ray_t* col = ray_table_get_col_idx(tbl, c); + if (!col) continue; + int8_t ct = RAY_IS_PARTED(col->type) + ? (int8_t)RAY_PARTED_BASETYPE(col->type) : col->type; + ray_t* nc = ray_vec_new(ct, 0); + if (nc && !RAY_IS_ERR(nc)) { + nc->len = 0; + empty = ray_table_add_col(empty, ray_table_col_name(tbl, c), nc); + ray_release(nc); + } + } + return empty; + } + + int64_t ncols = ray_table_ncols(tbl); + if (ncols <= 0) { ray_retain(tbl); return tbl; } + + /* Build match_idx from bitmap */ + ray_t* idx_hdr = NULL; + int64_t* match_idx = (int64_t*)scratch_alloc(&idx_hdr, + (size_t)pass_count * sizeof(int64_t)); + if (!match_idx) { ray_retain(tbl); return tbl; } + + { + const uint8_t* flags = ray_rowsel_flags(sel); + const uint32_t* offsets = ray_rowsel_offsets(sel); + const uint16_t* idx = ray_rowsel_idx(sel); + uint32_t n_segs = meta->n_segs; + int64_t j = 0; + for (uint32_t seg = 0; seg < n_segs; seg++) { + uint8_t f = flags[seg]; + if (f == RAY_SEL_NONE) continue; + int64_t seg_start = (int64_t)seg * RAY_MORSEL_ELEMS; + int64_t seg_end = seg_start + RAY_MORSEL_ELEMS; + if (seg_end > nrows) seg_end = nrows; + if (f == RAY_SEL_ALL) { + for (int64_t r = seg_start; r < seg_end; r++) + match_idx[j++] = r; + } else { + const uint16_t* slice = idx + offsets[seg]; + uint32_t n = offsets[seg + 1] - offsets[seg]; + for (uint32_t i = 0; i < n; i++) + match_idx[j++] = seg_start + slice[i]; + } + } + } + + /* Parallel multi-column gather (same pattern as exec_filter) */ + ray_pool_t* pool = ray_pool_get(); + ray_t* out = ray_table_new(ncols); + if (!out || RAY_IS_ERR(out)) { scratch_free(idx_hdr); return out; } + + /* VLA guard: 256 cols max for stack arrays */ + if (ncols > 256) { scratch_free(idx_hdr); return ray_error("limit", "table exceeds 256 columns"); } + + ray_t* new_cols[ncols]; + int64_t col_names[ncols]; + int64_t valid_ncols = 0; + bool has_parted = false; + + for (int64_t c = 0; c < ncols; c++) { + ray_t* col = ray_table_get_col_idx(tbl, c); + col_names[c] = ray_table_col_name(tbl, c); + if (!col || RAY_IS_ERR(col)) { new_cols[c] = NULL; continue; } + if (col->type == RAY_MAPCOMMON) { new_cols[c] = NULL; continue; } + int8_t ct = RAY_IS_PARTED(col->type) + ? (int8_t)RAY_PARTED_BASETYPE(col->type) : col->type; + uint8_t ca = 0; + if (ct == RAY_SYM) { + if (RAY_IS_PARTED(col->type)) { + ray_t** sp = (ray_t**)ray_data(col); + ca = parted_first_attrs(sp, col->len); + } else ca = col->attrs; + } + if (RAY_IS_PARTED(col->type)) has_parted = true; + ray_t* nc = typed_vec_new(ct, ca, pass_count); + if (!nc || RAY_IS_ERR(nc)) { new_cols[c] = NULL; continue; } + nc->len = pass_count; + new_cols[c] = nc; + valid_ncols++; + } + + if (has_parted) { + for (int64_t c = 0; c < ncols; c++) { + ray_t* col = ray_table_get_col_idx(tbl, c); + if (!col || !new_cols[c]) continue; + if (RAY_IS_PARTED(col->type)) { + int8_t pbase = (int8_t)RAY_PARTED_BASETYPE(col->type); + if (pbase == RAY_STR) { + ray_t** psegs = (ray_t**)ray_data(col); + ray_release(new_cols[c]); + new_cols[c] = parted_gather_str_rows(psegs, col->len, + match_idx, pass_count); + } else { + parted_gather_col(col, match_idx, pass_count, new_cols[c]); + } + } else { + uint8_t esz = col_esz(col); + char* src = (char*)ray_data(col); + char* dst = (char*)ray_data(new_cols[c]); + for (int64_t i = 0; i < pass_count; i++) + memcpy(dst + i * esz, src + match_idx[i] * esz, esz); + } + } + } else if (pool && valid_ncols > 0 && valid_ncols <= MGATHER_MAX_COLS) { + multi_gather_ctx_t mgctx = { .idx = match_idx, .ncols = 0 }; + for (int64_t c = 0; c < ncols; c++) { + if (!new_cols[c]) continue; + ray_t* col = ray_table_get_col_idx(tbl, c); + int64_t ci = mgctx.ncols; + mgctx.srcs[ci] = (char*)ray_data(col); + mgctx.dsts[ci] = (char*)ray_data(new_cols[c]); + mgctx.esz[ci] = col_esz(col); + mgctx.ncols++; + } + ray_pool_dispatch(pool, multi_gather_fn, &mgctx, pass_count); + } else if (pool) { + for (int64_t c = 0; c < ncols; c++) { + ray_t* col = ray_table_get_col_idx(tbl, c); + if (!col || !new_cols[c]) continue; + gather_ctx_t gctx = { + .idx = match_idx, .src_col = col, .dst_col = new_cols[c], + .esz = col_esz(col), .nullable = false, + }; + ray_pool_dispatch(pool, gather_fn, &gctx, pass_count); + } + } else { + for (int64_t c = 0; c < ncols; c++) { + ray_t* col = ray_table_get_col_idx(tbl, c); + if (!col || !new_cols[c]) continue; + uint8_t esz = col_esz(col); + char* src = (char*)ray_data(col); + char* dst = (char*)ray_data(new_cols[c]); + for (int64_t i = 0; i < pass_count; i++) + memcpy(dst + i * esz, src + match_idx[i] * esz, esz); + } + } + + for (int64_t c = 0; c < ncols; c++) { + if (!new_cols[c]) continue; + ray_t* scol = ray_table_get_col_idx(tbl, c); + if (scol && RAY_IS_PARTED(scol->type)) { + int8_t pb = (int8_t)RAY_PARTED_BASETYPE(scol->type); + if (pb != RAY_STR) { + ray_t** sp = (ray_t**)ray_data(scol); + col_propagate_str_pool_parted(new_cols[c], sp, scol->len); + } + /* Parted null propagation handled in parted_gather_col / parted_gather_str_rows */ + } else if (scol) { + col_propagate_str_pool(new_cols[c], scol); + col_propagate_nulls_gather(new_cols[c], scol, match_idx, pass_count); + } + out = ray_table_add_col(out, col_names[c], new_cols[c]); + ray_release(new_cols[c]); + } + + scratch_free(idx_hdr); + return out; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/fuse.c b/crates/rayforce-sys/vendor/rayforce/src/ops/fuse.c new file mode 100644 index 0000000..44606dd --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/fuse.c @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "fuse.h" +#include "mem/sys.h" +#include + +/* -------------------------------------------------------------------------- + * Fusion pass: merge element-wise chains into single fused nodes + * + * Detection: find maximal chains of element-wise ops where each intermediate + * has exactly one consumer. Mark chains with OP_FLAG_FUSED. + * + * For now this is a lightweight implementation that marks fuseable chains + * but relies on the executor's existing per-op evaluation. A full bytecode + * interpreter over register slots would be added in a production version. + * -------------------------------------------------------------------------- */ + +/* Element-wise opcodes: unary [OP_NEG=10..OP_CAST=19] and + * binary [OP_ADD=20..OP_MAX2=34]. These ranges are contiguous by + * design (see rayforce.h opcode definitions). */ +static bool is_elementwise(uint16_t opcode) { + return (opcode >= OP_NEG && opcode <= OP_CAST) || + (opcode >= OP_ADD && opcode <= OP_MAX2); +} + +/* O(ext_count) per call; acceptable for typical graph sizes (tens to + hundreds of nodes). L2: intentional duplication to keep files + self-contained — also present in opt.c. */ +static ray_op_ext_t* find_ext(ray_graph_t* g, uint32_t node_id) { + for (uint32_t i = 0; i < g->ext_count; i++) { + if (g->ext_nodes[i] && g->ext_nodes[i]->base.id == node_id) + return g->ext_nodes[i]; + } + return NULL; +} + +/* Count references to each node (iterative) */ +static void count_refs(ray_graph_t* g, ray_op_t* root, uint32_t* ref_counts) { + if (!root) return; + + uint32_t nc = g->node_count; + /* M3: Overflow guard — prevent stack_cap from wrapping around on + pathologically large graphs. */ + if (nc > UINT32_MAX / 2) return; + uint32_t stack_cap = nc * 2; + uint32_t stack_local[256]; + uint32_t *stack = stack_cap <= 256 ? stack_local : (uint32_t*)ray_sys_alloc(stack_cap * sizeof(uint32_t)); + if (!stack) return; + int sp = 0; + stack[sp++] = root->id; + while (sp > 0) { + uint32_t nid = stack[--sp]; + ray_op_t* n = &g->nodes[nid]; + ref_counts[nid]++; + if (ref_counts[nid] > 1) continue; /* already counted children */ + for (int i = 0; i < n->arity && i < 2; i++) { + if (n->inputs[i] && sp < (int)stack_cap) + stack[sp++] = n->inputs[i]->id; + } + /* M11: 3-input ops (OP_IF, OP_SUBSTR, OP_REPLACE) store the third + operand node ID as (uintptr_t)ext->literal. */ + if (n->opcode == OP_IF || n->opcode == OP_SUBSTR || n->opcode == OP_REPLACE) { + ray_op_ext_t* ext = find_ext(g, nid); + if (ext) { + uint32_t third_id = (uint32_t)(uintptr_t)ext->literal; + if (third_id < nc && sp < (int)stack_cap) + stack[sp++] = third_id; + } + } + /* M11: OP_CONCAT stores extra arg IDs (beyond inputs[0..1]) as + uint32_t values in trailing bytes after the ext node. + ext->sym holds the total arg count. */ + if (n->opcode == OP_CONCAT) { + ray_op_ext_t* ext = find_ext(g, nid); + /* M4: Guard against ext->sym < 2 — trailing uint32_t values + only exist when there are more than 2 arguments. */ + if (ext && ext->sym >= 2) { + int n_args = (int)ext->sym; + uint32_t* trail = (uint32_t*)((char*)(ext + 1)); + for (int i = 2; i < n_args; i++) { + uint32_t arg_id = trail[i - 2]; + if (arg_id < nc && sp < (int)stack_cap) + stack[sp++] = arg_id; + } + } + } + /* H2: Count refs for ext node children (GROUP keys/aggs, + SORT/SELECT columns, JOIN keys, WINDOW inputs) + so fusion ref counts are accurate. */ + if (n->opcode == OP_GROUP || n->opcode == OP_SORT || + n->opcode == OP_JOIN || n->opcode == OP_WINDOW_JOIN || + n->opcode == OP_WINDOW || + n->opcode == OP_SELECT) { + ray_op_ext_t* ext = find_ext(g, nid); + if (ext) { + switch (n->opcode) { + case OP_GROUP: + for (uint8_t k = 0; k < ext->n_keys; k++) { + if (ext->keys[k] && sp < (int)stack_cap) + stack[sp++] = ext->keys[k]->id; + } + for (uint8_t a = 0; a < ext->n_aggs; a++) { + if (ext->agg_ins[a] && sp < (int)stack_cap) + stack[sp++] = ext->agg_ins[a]->id; + } + break; + case OP_SORT: + case OP_SELECT: + for (uint8_t k = 0; k < ext->sort.n_cols; k++) { + if (ext->sort.columns[k] && sp < (int)stack_cap) + stack[sp++] = ext->sort.columns[k]->id; + } + break; + case OP_JOIN: + for (uint8_t k = 0; k < ext->join.n_join_keys; k++) { + if (ext->join.left_keys[k] && sp < (int)stack_cap) + stack[sp++] = ext->join.left_keys[k]->id; + if (ext->join.right_keys && ext->join.right_keys[k] && sp < (int)stack_cap) + stack[sp++] = ext->join.right_keys[k]->id; + } + break; + case OP_WINDOW_JOIN: + if (ext->asof.time_key && sp < (int)stack_cap) + stack[sp++] = ext->asof.time_key->id; + for (uint8_t k = 0; k < ext->asof.n_eq_keys; k++) { + if (ext->asof.eq_keys[k] && sp < (int)stack_cap) + stack[sp++] = ext->asof.eq_keys[k]->id; + } + break; + case OP_WINDOW: + for (uint8_t k = 0; k < ext->window.n_part_keys; k++) { + if (ext->window.part_keys[k] && sp < (int)stack_cap) + stack[sp++] = ext->window.part_keys[k]->id; + } + for (uint8_t k = 0; k < ext->window.n_order_keys; k++) { + if (ext->window.order_keys[k] && sp < (int)stack_cap) + stack[sp++] = ext->window.order_keys[k]->id; + } + for (uint8_t f = 0; f < ext->window.n_funcs; f++) { + if (ext->window.func_inputs[f] && sp < (int)stack_cap) + stack[sp++] = ext->window.func_inputs[f]->id; + } + break; + default: + break; + } + } + } + } + if (stack_cap > 256) ray_sys_free(stack); +} + +void ray_fuse_pass(ray_graph_t* g, ray_op_t* root) { + if (!g || !root || g->node_count == 0) return; + + uint32_t nc = g->node_count; + uint32_t* ref_counts; + uint32_t ref_counts_stack[256]; + if (nc <= 256) { + ref_counts = ref_counts_stack; + } else { + ref_counts = (uint32_t*)ray_sys_alloc(nc * sizeof(uint32_t)); + if (!ref_counts) return; + } + memset(ref_counts, 0, nc * sizeof(uint32_t)); + + count_refs(g, root, ref_counts); + + /* Mark fuseable chains: element-wise nodes whose inputs have exactly + one consumer (this node) and are also element-wise */ + for (uint32_t i = 0; i < nc; i++) { + ray_op_t* n = &g->nodes[i]; + if (!is_elementwise(n->opcode)) continue; + if (n->flags & OP_FLAG_DEAD) continue; + + /* Check if all inputs are single-consumer element-wise */ + bool can_fuse = false; + for (int j = 0; j < n->arity && j < 2; j++) { + ray_op_t* inp = n->inputs[j]; + if (inp && is_elementwise(inp->opcode) && ref_counts[inp->id] == 1) { + can_fuse = true; + } + } + if (can_fuse) { + n->flags |= OP_FLAG_FUSED; + } + } + if (nc > 256) ray_sys_free(ref_counts); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/fuse.h b/crates/rayforce-sys/vendor/rayforce/src/ops/fuse.h new file mode 100644 index 0000000..779bc16 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/fuse.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_FUSE_H +#define RAY_FUSE_H + +#include "ops.h" + +#endif /* RAY_FUSE_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/fvec.c b/crates/rayforce-sys/vendor/rayforce/src/ops/fvec.c new file mode 100644 index 0000000..b5be031 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/fvec.c @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "fvec.h" +#include "mem/sys.h" +#include "table/sym.h" +#include +#include + +ray_ftable_t* ray_ftable_new(uint16_t n_cols) { + ray_ftable_t* ft = (ray_ftable_t*)ray_sys_alloc(sizeof(ray_ftable_t)); + if (!ft) return NULL; + memset(ft, 0, sizeof(ray_ftable_t)); + + ft->columns = (ray_fvec_t*)ray_sys_alloc((size_t)n_cols * sizeof(ray_fvec_t)); + if (!ft->columns) { + ray_sys_free(ft); + return NULL; + } + memset(ft->columns, 0, (size_t)n_cols * sizeof(ray_fvec_t)); + ft->n_cols = n_cols; + + return ft; +} + +void ray_ftable_free(ray_ftable_t* ft) { + if (!ft) return; + + if (ft->columns) { + for (uint16_t i = 0; i < ft->n_cols; i++) { + if (ft->columns[i].vec) ray_release(ft->columns[i].vec); + } + ray_sys_free(ft->columns); + } + if (ft->semijoin) ray_release(ft->semijoin); + ray_sys_free(ft); +} + +ray_t* ray_ftable_materialize(ray_ftable_t* ft) { + if (!ft || ft->n_cols == 0) return ray_error("type", NULL); + + ray_t* tbl = ray_table_new(ft->n_cols); + if (!tbl || RAY_IS_ERR(tbl)) return tbl; + + for (uint16_t c = 0; c < ft->n_cols; c++) { + ray_fvec_t* fv = &ft->columns[c]; + if (!fv->vec) continue; + + ray_t* col; + if (fv->cur_idx >= 0) { + /* Flat: replicate single value */ + if (fv->cardinality <= 0) { ray_release(tbl); return ray_error("range", NULL); } + col = ray_vec_new(fv->vec->type, fv->cardinality); + if (!col || RAY_IS_ERR(col)) { ray_release(tbl); return col ? col : ray_error("oom", NULL); } + col->len = fv->cardinality; + void* val = ray_vec_get(fv->vec, fv->cur_idx); + if (!val) { ray_release(col); ray_release(tbl); return ray_error("range", NULL); } + uint8_t esz = ray_sym_elem_size(fv->vec->type, fv->vec->attrs); + char* dst = (char*)ray_data(col); + for (int64_t r = 0; r < fv->cardinality; r++) + memcpy(dst + r * esz, val, esz); + } else { + /* Unflat: use as-is */ + col = fv->vec; + ray_retain(col); + } + + char name_buf[12]; + int n = snprintf(name_buf, sizeof(name_buf), "_c%d", c); + int64_t name_id = ray_sym_intern(name_buf, (size_t)n); + ray_t* new_tbl = ray_table_add_col(tbl, name_id, col); + ray_release(col); + if (!new_tbl || RAY_IS_ERR(new_tbl)) { + if (new_tbl != tbl) ray_release(tbl); + return new_tbl ? new_tbl : ray_error("oom", NULL); + } + tbl = new_tbl; + } + + return tbl; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/fvec.h b/crates/rayforce-sys/vendor/rayforce/src/ops/fvec.h new file mode 100644 index 0000000..cbd1a66 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/fvec.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_FVEC_H +#define RAY_FVEC_H + +#include "ops.h" + +/* Factorization state -- pipeline concept, NOT added to ray_t. + * + * Lives in the pipeline context. ray_t itself remains unchanged. + */ +typedef struct ray_fvec { + ray_t* vec; /* underlying ray_t vector (I64, SYM, etc.) */ + int64_t cur_idx; /* >= 0: flat (single value at index) */ + /* -1: unflat (full vector is active) */ + int64_t cardinality; /* for flat: how many rows this represents */ +} ray_fvec_t; + +/* Factorized Table -- accumulation buffer for ASP-Join */ +typedef struct ray_ftable { + ray_fvec_t* columns; /* array of factorized vectors */ + uint16_t n_cols; + int64_t n_tuples; /* factorized tuple count */ + ray_t* semijoin; /* RAY_SEL bitmap of qualifying keys */ +} ray_ftable_t; + +ray_ftable_t* ray_ftable_new(uint16_t n_cols); +void ray_ftable_free(ray_ftable_t* ft); +ray_t* ray_ftable_materialize(ray_ftable_t* ft); + +#endif /* RAY_FVEC_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/glob.c b/crates/rayforce-sys/vendor/rayforce/src/ops/glob.c new file mode 100644 index 0000000..dea37d1 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/glob.c @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + */ + +/* + * Iterative glob matcher. Replaces three pre-existing implementations + * that diverged in syntax (eval used *,?,[abc]; DAG used SQL %,_) and + * one of which (strop.c::str_glob) blew up exponentially on patterns + * like "a*a*a*…a*b" against an a-only string. This single file is + * the only matcher; both call sites delegate here. + */ + +#include "ops/glob.h" + +/* Lowercase an ASCII byte; non-ASCII passes through unchanged. */ +static inline char to_lower(char c) { + return (c >= 'A' && c <= 'Z') ? (char)(c + 32) : c; +} + +/* Match a single character against a class `[ ... ]`. On entry *pi + * points at the byte after `[`. On return *pi points one past `]`. + * Recognises `[abc]`, `[a-z]`, leading `!` for negation, embedded + * `]` is allowed as the first char (after optional `!`). */ +static bool match_class(const char* p, size_t pn, size_t* pi, char c, bool ci) { + size_t i = *pi; + bool neg = false; + if (i < pn && p[i] == '!') { neg = true; i++; } + bool matched = false; + bool first = true; + char ch = ci ? to_lower(c) : c; + while (i < pn && (first || p[i] != ']')) { + char lo = ci ? to_lower(p[i]) : p[i]; + if (i + 2 < pn && p[i + 1] == '-' && p[i + 2] != ']') { + char hi = ci ? to_lower(p[i + 2]) : p[i + 2]; + if (ch >= lo && ch <= hi) matched = true; + i += 3; + } else { + if (ch == lo) matched = true; + i++; + } + first = false; + } + if (i < pn && p[i] == ']') i++; /* consume closing bracket */ + *pi = i; + return neg ? !matched : matched; +} + +static bool glob_impl(const char* s, size_t sn, + const char* p, size_t pn, bool ci) { + size_t si = 0, pi = 0; + size_t star_pi = (size_t)-1, star_si = 0; + + while (si < sn) { + if (pi < pn && p[pi] == '*') { + star_pi = pi++; /* remember star, skip it */ + star_si = si; + } else if (pi < pn && p[pi] == '?') { + pi++; + si++; + } else if (pi < pn && p[pi] == '[') { + size_t cls_pi = pi + 1; + if (match_class(p, pn, &cls_pi, s[si], ci)) { + pi = cls_pi; + si++; + } else if (star_pi != (size_t)-1) { + pi = star_pi + 1; + si = ++star_si; + } else { + return false; + } + } else if (pi < pn) { + char a = ci ? to_lower(s[si]) : s[si]; + char b = ci ? to_lower(p[pi]) : p[pi]; + if (a == b) { + pi++; + si++; + } else if (star_pi != (size_t)-1) { + pi = star_pi + 1; + si = ++star_si; + } else { + return false; + } + } else if (star_pi != (size_t)-1) { + pi = star_pi + 1; + si = ++star_si; + } else { + return false; + } + } + /* Consumed all of input — pattern must be at end, modulo trailing stars. */ + while (pi < pn && p[pi] == '*') pi++; + return pi == pn; +} + +bool ray_glob_match(const char* s, size_t sn, const char* p, size_t pn) { + return glob_impl(s, sn, p, pn, false); +} + +bool ray_glob_match_ci(const char* s, size_t sn, const char* p, size_t pn) { + return glob_impl(s, sn, p, pn, true); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/glob.h b/crates/rayforce-sys/vendor/rayforce/src/ops/glob.h new file mode 100644 index 0000000..71bc3a2 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/glob.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + */ + +#ifndef RAY_OPS_GLOB_H +#define RAY_OPS_GLOB_H + +#include +#include + +/* Glob pattern match, iterative two-pointer (no catastrophic backtracking). + * Worst case O(n*m); typical case linear. + * + * Supported metacharacters: + * * — matches zero or more characters + * ? — matches exactly one character + * [abc] — character class: matches any of a, b, c + * [a-z] — range + * [!abc] — negated class + * + * Matching a literal metacharacter — there is no backslash escape; wrap + * the character in a one-element class instead: + * [*] matches a literal '*' + * [?] matches a literal '?' + * [[] matches a literal '[' + * []] matches a literal ']' (']' as first char inside [...] is literal) + * [-] matches a literal '-' (as the sole char, no range to form) + * + * `glob_match` is case-sensitive. `glob_match_ci` lowercases ASCII letters + * on both sides before comparing (so it matches 'A' against 'a', 'A-Z' + * range matches both case forms, etc.). + * + * Lenient parsing policy: an unterminated character class (e.g. pattern + * "abc[def" with no closing `]`) is accepted — the class consumes input + * up to the end of the pattern and the match continues with whatever + * `matched` flag accumulated. This matches glibc fnmatch's permissive + * behaviour and avoids surprising `error: parse` mid-search. Callers + * that want strict validation should pre-validate the pattern. */ +bool ray_glob_match(const char* s, size_t sn, const char* p, size_t pn); +bool ray_glob_match_ci(const char* s, size_t sn, const char* p, size_t pn); + +#endif /* RAY_OPS_GLOB_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/graph.c b/crates/rayforce-sys/vendor/rayforce/src/ops/graph.c new file mode 100644 index 0000000..3d68f46 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/graph.c @@ -0,0 +1,1822 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "graph.h" +#include "store/csr.h" +#include "store/hnsw.h" +#include "mem/sys.h" +#include + +/* -------------------------------------------------------------------------- + * Graph allocation helpers + * -------------------------------------------------------------------------- */ + +#define GRAPH_INIT_CAP 4096 + +static inline ray_op_t* graph_fix_ptr(ray_op_t* p, ptrdiff_t delta) { + return p ? (ray_op_t*)((char*)p + delta) : NULL; +} + +static void graph_fixup_ext_ptrs(ray_graph_t* g, ptrdiff_t delta) { + for (uint32_t i = 0; i < g->ext_count; i++) { + ray_op_ext_t* ext = g->ext_nodes[i]; + if (!ext) continue; + + ext->base.inputs[0] = graph_fix_ptr(ext->base.inputs[0], delta); + ext->base.inputs[1] = graph_fix_ptr(ext->base.inputs[1], delta); + + switch (ext->base.opcode) { + case OP_SORT: + for (uint8_t k = 0; k < ext->sort.n_cols; k++) + ext->sort.columns[k] = graph_fix_ptr(ext->sort.columns[k], delta); + break; + case OP_GROUP: + for (uint8_t k = 0; k < ext->n_keys; k++) + ext->keys[k] = graph_fix_ptr(ext->keys[k], delta); + for (uint8_t a = 0; a < ext->n_aggs; a++) + ext->agg_ins[a] = graph_fix_ptr(ext->agg_ins[a], delta); + break; + case OP_JOIN: + case OP_ANTIJOIN: + for (uint8_t k = 0; k < ext->join.n_join_keys; k++) + ext->join.left_keys[k] = graph_fix_ptr(ext->join.left_keys[k], delta); + if (ext->join.right_keys) { + for (uint8_t k = 0; k < ext->join.n_join_keys; k++) + ext->join.right_keys[k] = graph_fix_ptr(ext->join.right_keys[k], delta); + } + break; + case OP_WINDOW_JOIN: + ext->asof.time_key = graph_fix_ptr(ext->asof.time_key, delta); + for (uint8_t k = 0; k < ext->asof.n_eq_keys; k++) + ext->asof.eq_keys[k] = graph_fix_ptr(ext->asof.eq_keys[k], delta); + break; + case OP_WINDOW: + for (uint8_t k = 0; k < ext->window.n_part_keys; k++) + ext->window.part_keys[k] = graph_fix_ptr(ext->window.part_keys[k], delta); + for (uint8_t k = 0; k < ext->window.n_order_keys; k++) + ext->window.order_keys[k] = graph_fix_ptr(ext->window.order_keys[k], delta); + for (uint8_t f = 0; f < ext->window.n_funcs; f++) + ext->window.func_inputs[f] = graph_fix_ptr(ext->window.func_inputs[f], delta); + break; + case OP_SELECT: + for (uint8_t k = 0; k < ext->sort.n_cols; k++) + ext->sort.columns[k] = graph_fix_ptr(ext->sort.columns[k], delta); + break; + case OP_PIVOT: + for (uint8_t k = 0; k < ext->pivot.n_index; k++) + ext->pivot.index_cols[k] = graph_fix_ptr(ext->pivot.index_cols[k], delta); + ext->pivot.pivot_col = graph_fix_ptr(ext->pivot.pivot_col, delta); + ext->pivot.value_col = graph_fix_ptr(ext->pivot.value_col, delta); + break; + /* Graph ops: no ray_op_t* pointers in ext union to fix */ + case OP_EXPAND: + case OP_VAR_EXPAND: + case OP_SHORTEST_PATH: + case OP_WCO_JOIN: + break; + default: + break; + } + } +} + +/* After realloc moves g->nodes, fix up all stored input pointers. + old_base is saved as uintptr_t before realloc to avoid GCC 14 + -Wuse-after-free on the stale pointer. */ +static void graph_fixup_ptrs(ray_graph_t* g, uintptr_t old_base) { + ptrdiff_t delta = (ptrdiff_t)((uintptr_t)g->nodes - old_base); + if (delta == 0) return; + for (uint32_t i = 0; i < g->node_count; i++) { + g->nodes[i].inputs[0] = graph_fix_ptr(g->nodes[i].inputs[0], delta); + g->nodes[i].inputs[1] = graph_fix_ptr(g->nodes[i].inputs[1], delta); + } + graph_fixup_ext_ptrs(g, delta); +} + +/* L3: node_count is uint32_t — theoretical overflow at 2^32 nodes is + unreachable in practice (would require ~128 GB for the nodes array). */ +static ray_op_t* graph_alloc_node(ray_graph_t* g) { + if (g->node_count >= g->node_cap) { + uintptr_t old_base = (uintptr_t)g->nodes; + /* H2: Overflow guard — if node_cap is already > UINT32_MAX/2, + doubling would wrap around to a smaller value. */ + if (g->node_cap > UINT32_MAX / 2) return NULL; + uint32_t new_cap = g->node_cap * 2; + ray_op_t* new_nodes = (ray_op_t*)ray_sys_realloc(g->nodes, + new_cap * sizeof(ray_op_t)); + if (!new_nodes) return NULL; + g->nodes = new_nodes; + g->node_cap = new_cap; + graph_fixup_ptrs(g, old_base); + } + ray_op_t* n = &g->nodes[g->node_count]; + memset(n, 0, sizeof(ray_op_t)); + n->id = g->node_count; + g->node_count++; + return n; +} + +static ray_op_ext_t* graph_alloc_ext_node_ex(ray_graph_t* g, size_t extra) { + /* Extended nodes are 64 bytes; extra bytes appended for inline arrays */ + ray_op_ext_t* ext = (ray_op_ext_t*)ray_sys_alloc(sizeof(ray_op_ext_t) + extra); + if (!ext) return NULL; + memset(ext, 0, sizeof(ray_op_ext_t) + extra); + + /* Also add a placeholder in the nodes array for ID tracking */ + if (g->node_count >= g->node_cap) { + if (g->node_cap > UINT32_MAX / 2) { ray_sys_free(ext); return NULL; } + uintptr_t old_base = (uintptr_t)g->nodes; + uint32_t new_cap = g->node_cap * 2; + ray_op_t* new_nodes = (ray_op_t*)ray_sys_realloc(g->nodes, + new_cap * sizeof(ray_op_t)); + if (!new_nodes) { ray_sys_free(ext); return NULL; } + g->nodes = new_nodes; + g->node_cap = new_cap; + graph_fixup_ptrs(g, old_base); + } + ext->base.id = g->node_count; + /* H4: Do NOT copy ext->base to nodes[] here — the caller fills in + fields first and then syncs via g->nodes[ext->base.id] = ext->base. */ + memset(&g->nodes[g->node_count], 0, sizeof(ray_op_t)); + g->nodes[g->node_count].id = g->node_count; + g->node_count++; + + /* Track ext node for cleanup */ + if (g->ext_count >= g->ext_cap) { + if (g->ext_cap > UINT32_MAX / 2) { g->node_count--; ray_sys_free(ext); return NULL; } + uint32_t new_cap = g->ext_cap == 0 ? 16 : g->ext_cap * 2; + ray_op_ext_t** new_exts = (ray_op_ext_t**)ray_sys_realloc(g->ext_nodes, + new_cap * sizeof(ray_op_ext_t*)); + if (!new_exts) { g->node_count--; ray_sys_free(ext); return NULL; } + g->ext_nodes = new_exts; + g->ext_cap = new_cap; + } + g->ext_nodes[g->ext_count++] = ext; + + return ext; +} + +static ray_op_ext_t* graph_alloc_ext_node(ray_graph_t* g) { + return graph_alloc_ext_node_ex(g, 0); +} + +/* Pointer to trailing bytes after the ext node */ +#define EXT_TRAIL(ext) ((char*)((ext) + 1)) + +/* -------------------------------------------------------------------------- + * ray_graph_new / ray_graph_free + * -------------------------------------------------------------------------- */ + +ray_graph_t* ray_graph_new(ray_t* tbl) { + ray_graph_t* g = (ray_graph_t*)ray_sys_alloc(sizeof(ray_graph_t)); + if (!g) return NULL; + + g->nodes = (ray_op_t*)ray_sys_alloc(GRAPH_INIT_CAP * sizeof(ray_op_t)); + if (!g->nodes) { ray_sys_free(g); return NULL; } + g->node_cap = GRAPH_INIT_CAP; + g->node_count = 0; + g->table = tbl; + if (tbl) ray_retain(tbl); + + g->tables = NULL; + g->n_tables = 0; + + g->ext_nodes = NULL; + g->ext_count = 0; + g->ext_cap = 0; + g->selection = NULL; + + g->cexpr_env_top = 0; /* compile-time lambda/let env, initially empty */ + + return g; +} + +void ray_graph_free(ray_graph_t* g) { + if (!g) return; + + /* M6: Release OP_CONST literal values before freeing ext nodes */ + for (uint32_t i = 0; i < g->ext_count; i++) { + ray_op_ext_t* ext = g->ext_nodes[i]; + if (ext && (g->nodes[ext->base.id].opcode == OP_CONST || + g->nodes[ext->base.id].opcode == OP_TIL) && ext->literal) { + ray_release(ext->literal); + } + /* Release runtime-built SIP bitmaps on graph traversal nodes */ + if (ext) { + uint16_t oc = g->nodes[ext->base.id].opcode; + if ((oc == OP_EXPAND || oc == OP_VAR_EXPAND || oc == OP_SHORTEST_PATH) + && ext->graph.sip_sel) { + ray_release((ray_t*)ext->graph.sip_sel); + } + if (oc == OP_ASTAR && ext->graph.node_props) { + ray_release((ray_t*)ext->graph.node_props); + } + } + } + /* Free seg_mask bitmaps (shared across ext nodes — deduplicate) */ + for (uint32_t i = 0; i < g->ext_count; i++) { + ray_op_ext_t* ext = g->ext_nodes[i]; + if (ext && ext->seg_mask) { + uint64_t* mask = ext->seg_mask; + ext->seg_mask = NULL; + /* Clear same pointer from other ext nodes */ + for (uint32_t j = i + 1; j < g->ext_count; j++) { + if (g->ext_nodes[j] && g->ext_nodes[j]->seg_mask == mask) + g->ext_nodes[j]->seg_mask = NULL; + } + ray_sys_free(mask); + } + } + /* Free extended nodes */ + for (uint32_t i = 0; i < g->ext_count; i++) { + ray_sys_free(g->ext_nodes[i]); + } + ray_sys_free(g->ext_nodes); + + ray_sys_free(g->nodes); + if (g->table) ray_release(g->table); + + /* Release table registry */ + if (g->tables) { + for (uint16_t i = 0; i < g->n_tables; i++) { + if (g->tables[i]) ray_release(g->tables[i]); + } + ray_sys_free(g->tables); + } + + if (g->selection) ray_release(g->selection); + ray_sys_free(g); +} + +/* -------------------------------------------------------------------------- + * Source ops + * -------------------------------------------------------------------------- */ + +ray_op_t* ray_scan(ray_graph_t* g, const char* col_name) { + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + ext->base.opcode = OP_SCAN; + ext->base.arity = 0; + + /* Intern the column name to get symbol ID */ + int64_t sym_id = ray_sym_intern(col_name, strlen(col_name)); + ext->sym = sym_id; + + /* Infer output type from the bound table */ + if (g->table) { + ray_t* col = ray_table_get_col(g->table, sym_id); + if (col) { + ext->base.out_type = col->type; + ext->base.est_rows = (uint32_t)col->len; + } + } + + /* Update the nodes array with the filled base */ + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_const_f64(ray_graph_t* g, double val) { + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + ext->base.opcode = OP_CONST; + ext->base.arity = 0; + ext->base.out_type = RAY_F64; + ext->literal = ray_f64(val); + /* L4: null/error check on allocation result */ + if (!ext->literal || RAY_IS_ERR(ext->literal)) ext->literal = NULL; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_const_i64(ray_graph_t* g, int64_t val) { + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + ext->base.opcode = OP_CONST; + ext->base.arity = 0; + ext->base.out_type = RAY_I64; + ext->literal = ray_i64(val); + /* L4: null/error check on allocation result */ + if (!ext->literal || RAY_IS_ERR(ext->literal)) ext->literal = NULL; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_const_bool(ray_graph_t* g, bool val) { + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + ext->base.opcode = OP_CONST; + ext->base.arity = 0; + ext->base.out_type = RAY_BOOL; + ext->literal = ray_bool(val); + /* L4: null/error check on allocation result */ + if (!ext->literal || RAY_IS_ERR(ext->literal)) ext->literal = NULL; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_const_str(ray_graph_t* g, const char* s, size_t len) { + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + ext->base.opcode = OP_CONST; + ext->base.arity = 0; + ext->base.out_type = RAY_SYM; /* string constants resolve to SYM at exec time */ + ext->literal = ray_str(s, len); + /* L4: null/error check on allocation result */ + if (!ext->literal || RAY_IS_ERR(ext->literal)) ext->literal = NULL; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_til(ray_graph_t* g, int64_t n) { + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + ext->base.opcode = OP_TIL; + ext->base.arity = 0; + ext->base.out_type = RAY_I64; + ext->base.est_rows = (uint32_t)(n > UINT32_MAX ? UINT32_MAX : n); + ext->literal = ray_i64(n); /* store n as literal */ + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_const_vec(ray_graph_t* g, ray_t* vec) { + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + ext->base.opcode = OP_CONST; + ext->base.arity = 0; + ext->base.out_type = vec->type; + ext->base.est_rows = (uint32_t)vec->len; + ext->literal = vec; + ray_retain(vec); + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +/* Generic const-atom constructor. Handles any scalar atom type + * (RAY_SYM, RAY_DATE, RAY_TIME, RAY_TIMESTAMP, RAY_GUID, RAY_NULL, + * and any other ray_t* used as an immediate literal). The executor + * OP_CONST handler just returns ext->literal, so the same retain/ + * store mechanism as ray_const_vec works for atoms too. */ +ray_op_t* ray_const_atom(ray_graph_t* g, ray_t* atom) { + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + ext->base.opcode = OP_CONST; + ext->base.arity = 0; + /* Atom types are stored negated (-RAY_I64 etc); the executor + * does not rely on out_type for OP_CONST dispatch, but we keep + * it consistent with the source atom. */ + ext->base.out_type = atom->type; + ext->base.est_rows = 1; + ext->literal = atom; + ray_retain(atom); + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_const_table(ray_graph_t* g, ray_t* tbl) { + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + ext->base.opcode = OP_CONST; + ext->base.arity = 0; + ext->base.out_type = RAY_TABLE; + ext->literal = tbl; + ray_retain(tbl); + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +/* -------------------------------------------------------------------------- + * Helper: create unary/binary node + * -------------------------------------------------------------------------- */ + +static ray_op_t* make_unary(ray_graph_t* g, uint16_t opcode, ray_op_t* a, int8_t out_type) { + /* Save ID before alloc — realloc may invalidate the pointer */ + uint32_t a_id = a->id; + uint32_t est = a->est_rows; + ray_op_t* n = graph_alloc_node(g); + if (!n) return NULL; + a = &g->nodes[a_id]; /* re-resolve after potential realloc */ + + n->opcode = opcode; + n->arity = 1; + n->inputs[0] = a; + n->out_type = out_type; + n->est_rows = est; + return n; +} + +static ray_op_t* make_binary(ray_graph_t* g, uint16_t opcode, ray_op_t* a, ray_op_t* b, int8_t out_type) { + /* Save IDs before alloc — realloc may invalidate the pointers */ + uint32_t a_id = a->id; + uint32_t b_id = b->id; + uint32_t est = a->est_rows > b->est_rows ? a->est_rows : b->est_rows; + ray_op_t* n = graph_alloc_node(g); + if (!n) return NULL; + a = &g->nodes[a_id]; /* re-resolve after potential realloc */ + b = &g->nodes[b_id]; + + n->opcode = opcode; + n->arity = 2; + n->inputs[0] = a; + n->inputs[1] = b; + n->out_type = out_type; + n->est_rows = est; + return n; +} + +/* Type promotion: BOOL < U8 < I16 < I32 < I64 < F64. + * RAY_STR is its own type class — not promotable to numeric types. */ +static int8_t promote(int8_t a, int8_t b) { + if (a == RAY_STR || b == RAY_STR) return RAY_STR; + if (a == RAY_F64 || b == RAY_F64) return RAY_F64; + if (a == RAY_I64 || b == RAY_I64 || a == RAY_SYM || b == RAY_SYM || + a == RAY_TIMESTAMP || b == RAY_TIMESTAMP) return RAY_I64; + if (a == RAY_I32 || b == RAY_I32 || + a == RAY_DATE || b == RAY_DATE || a == RAY_TIME || b == RAY_TIME) return RAY_I32; + if (a == RAY_I16 || b == RAY_I16) return RAY_I16; + if (a == RAY_U8 || b == RAY_U8) return RAY_U8; + return RAY_BOOL; +} + +/* -------------------------------------------------------------------------- + * Unary element-wise ops + * -------------------------------------------------------------------------- */ + +ray_op_t* ray_neg(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_NEG, a, a->out_type); } +ray_op_t* ray_abs(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_ABS, a, a->out_type); } +ray_op_t* ray_not(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_NOT, a, RAY_BOOL); } +ray_op_t* ray_sqrt_op(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_SQRT, a, RAY_F64); } +ray_op_t* ray_log_op(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_LOG, a, RAY_F64); } +ray_op_t* ray_exp_op(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_EXP, a, RAY_F64); } +ray_op_t* ray_ceil_op(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_CEIL, a, a->out_type); } +ray_op_t* ray_floor_op(ray_graph_t* g, ray_op_t* a){ return make_unary(g, OP_FLOOR, a, a->out_type); } +ray_op_t* ray_round_op(ray_graph_t* g, ray_op_t* a){ return make_unary(g, OP_ROUND, a, a->out_type); } +ray_op_t* ray_isnull(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_ISNULL, a, RAY_BOOL); } + +ray_op_t* ray_cast(ray_graph_t* g, ray_op_t* a, int8_t target_type) { + return make_unary(g, OP_CAST, a, target_type); +} + +/* -------------------------------------------------------------------------- + * Binary element-wise ops + * -------------------------------------------------------------------------- */ + +/* Generic binary op constructor — opcode-driven, no switch/case needed by caller */ +ray_op_t* ray_binop(ray_graph_t* g, uint16_t opcode, ray_op_t* a, ray_op_t* b) { + int8_t out; + switch (opcode) { + case OP_EQ: case OP_NE: case OP_LT: case OP_LE: + case OP_GT: case OP_GE: case OP_AND: case OP_OR: + out = RAY_BOOL; break; + case OP_DIV: + out = RAY_F64; break; + default: + out = promote(a->out_type, b->out_type); break; + } + return make_binary(g, opcode, a, b, out); +} + +ray_op_t* ray_add(ray_graph_t* g, ray_op_t* a, ray_op_t* b) { return make_binary(g, OP_ADD, a, b, promote(a->out_type, b->out_type)); } +ray_op_t* ray_sub(ray_graph_t* g, ray_op_t* a, ray_op_t* b) { return make_binary(g, OP_SUB, a, b, promote(a->out_type, b->out_type)); } +ray_op_t* ray_mul(ray_graph_t* g, ray_op_t* a, ray_op_t* b) { return make_binary(g, OP_MUL, a, b, promote(a->out_type, b->out_type)); } +ray_op_t* ray_div(ray_graph_t* g, ray_op_t* a, ray_op_t* b) { return make_binary(g, OP_DIV, a, b, RAY_F64); } +ray_op_t* ray_mod(ray_graph_t* g, ray_op_t* a, ray_op_t* b) { return make_binary(g, OP_MOD, a, b, promote(a->out_type, b->out_type)); } + +ray_op_t* ray_eq(ray_graph_t* g, ray_op_t* a, ray_op_t* b) { return make_binary(g, OP_EQ, a, b, RAY_BOOL); } +ray_op_t* ray_ne(ray_graph_t* g, ray_op_t* a, ray_op_t* b) { return make_binary(g, OP_NE, a, b, RAY_BOOL); } +ray_op_t* ray_lt(ray_graph_t* g, ray_op_t* a, ray_op_t* b) { return make_binary(g, OP_LT, a, b, RAY_BOOL); } +ray_op_t* ray_le(ray_graph_t* g, ray_op_t* a, ray_op_t* b) { return make_binary(g, OP_LE, a, b, RAY_BOOL); } +ray_op_t* ray_gt(ray_graph_t* g, ray_op_t* a, ray_op_t* b) { return make_binary(g, OP_GT, a, b, RAY_BOOL); } +ray_op_t* ray_ge(ray_graph_t* g, ray_op_t* a, ray_op_t* b) { return make_binary(g, OP_GE, a, b, RAY_BOOL); } +ray_op_t* ray_and(ray_graph_t* g, ray_op_t* a, ray_op_t* b){ return make_binary(g, OP_AND, a, b, RAY_BOOL); } +ray_op_t* ray_or(ray_graph_t* g, ray_op_t* a, ray_op_t* b) { return make_binary(g, OP_OR, a, b, RAY_BOOL); } +ray_op_t* ray_min2(ray_graph_t* g, ray_op_t* a, ray_op_t* b){ return make_binary(g, OP_MIN2, a, b, promote(a->out_type, b->out_type)); } +ray_op_t* ray_max2(ray_graph_t* g, ray_op_t* a, ray_op_t* b){ return make_binary(g, OP_MAX2, a, b, promote(a->out_type, b->out_type)); } +ray_op_t* ray_in(ray_graph_t* g, ray_op_t* col, ray_op_t* set){ return make_binary(g, OP_IN, col, set, RAY_BOOL); } +ray_op_t* ray_not_in(ray_graph_t* g, ray_op_t* col, ray_op_t* set){ return make_binary(g, OP_NOT_IN, col, set, RAY_BOOL); } + +ray_op_t* ray_if(ray_graph_t* g, ray_op_t* cond, ray_op_t* then_val, ray_op_t* else_val) { + /* 3-input node: cond, then, else — needs ext node */ + uint32_t cond_id = cond->id; + uint32_t then_id = then_val->id; + uint32_t else_id = else_val->id; + int8_t out_type = promote(then_val->out_type, else_val->out_type); + /* IF preserves string types: promote() handles RAY_STR (wins over SYM); + * SYM override only applies when neither side is RAY_STR */ + if (out_type != RAY_STR && + (then_val->out_type == RAY_SYM || else_val->out_type == RAY_SYM)) + out_type = RAY_SYM; + uint32_t est = cond->est_rows; + + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + /* Re-resolve after potential realloc (else_val stored as index, not pointer) */ + cond = &g->nodes[cond_id]; + then_val = &g->nodes[then_id]; + + ext->base.opcode = OP_IF; + ext->base.arity = 2; /* inputs[0]=cond, inputs[1]=then; else via ext */ + ext->base.inputs[0] = cond; + ext->base.inputs[1] = then_val; + ext->base.out_type = out_type; + ext->base.est_rows = est; + /* Store else_val as a node ID (not a pointer) in the literal field. + * Recovered via (uint32_t)(uintptr_t)ext->literal in fuse.c/exec.c. */ + ext->literal = (ray_t*)(uintptr_t)else_id; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_like(ray_graph_t* g, ray_op_t* input, ray_op_t* pattern) { + return make_binary(g, OP_LIKE, input, pattern, RAY_BOOL); +} + +ray_op_t* ray_ilike(ray_graph_t* g, ray_op_t* input, ray_op_t* pattern) { + return make_binary(g, OP_ILIKE, input, pattern, RAY_BOOL); +} + +/* String ops */ +ray_op_t* ray_upper(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_UPPER, a, a->out_type == RAY_STR ? RAY_STR : RAY_SYM); } +ray_op_t* ray_lower(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_LOWER, a, a->out_type == RAY_STR ? RAY_STR : RAY_SYM); } +ray_op_t* ray_strlen(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_STRLEN, a, RAY_I64); } +ray_op_t* ray_trim_op(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_TRIM, a, a->out_type == RAY_STR ? RAY_STR : RAY_SYM); } + +ray_op_t* ray_substr(ray_graph_t* g, ray_op_t* str, ray_op_t* start, ray_op_t* len) { + /* 3-input: str=inputs[0], start=inputs[1], len stored via literal field */ + uint32_t s_id = str->id; + uint32_t st_id = start->id; + uint32_t l_id = len->id; + uint32_t est = str->est_rows; + + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + str = &g->nodes[s_id]; + start = &g->nodes[st_id]; + + ext->base.opcode = OP_SUBSTR; + ext->base.arity = 2; + ext->base.inputs[0] = str; + ext->base.inputs[1] = start; + ext->base.out_type = (str->out_type == RAY_STR) ? RAY_STR : RAY_SYM; + ext->base.est_rows = est; + ext->literal = (ray_t*)(uintptr_t)l_id; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_replace(ray_graph_t* g, ray_op_t* str, ray_op_t* from, ray_op_t* to) { + /* 3-input: str=inputs[0], from=inputs[1], to stored via literal field */ + uint32_t s_id = str->id; + uint32_t f_id = from->id; + uint32_t t_id = to->id; + uint32_t est = str->est_rows; + + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + str = &g->nodes[s_id]; + from = &g->nodes[f_id]; + + ext->base.opcode = OP_REPLACE; + ext->base.arity = 2; + ext->base.inputs[0] = str; + ext->base.inputs[1] = from; + ext->base.out_type = (str->out_type == RAY_STR) ? RAY_STR : RAY_SYM; + ext->base.est_rows = est; + ext->literal = (ray_t*)(uintptr_t)t_id; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_concat(ray_graph_t* g, ray_op_t** args, int n) { + /* Variadic: first 2 in inputs[], rest in trailing IDs */ + if (!args || n < 2) return NULL; + /* M4: Guard VLA upper bound */ + if (n > 256) return NULL; + size_t n_args = (size_t)n; + if (n_args > (SIZE_MAX / sizeof(uint32_t))) return NULL; + size_t extra = (n > 2) ? (size_t)(n - 2) * sizeof(uint32_t) : 0; + + /* Save IDs before alloc (n is small — bounded by function arity) */ + uint32_t ids[n]; + for (int i = 0; i < n; i++) ids[i] = args[i]->id; + uint32_t est = args[0]->est_rows; + + ray_op_ext_t* ext = graph_alloc_ext_node_ex(g, extra); + if (!ext) return NULL; + + ext->base.opcode = OP_CONCAT; + ext->base.arity = 2; + ext->base.inputs[0] = &g->nodes[ids[0]]; + ext->base.inputs[1] = &g->nodes[ids[1]]; + /* RAY_STR if any input is RAY_STR, else RAY_SYM */ + int8_t out_type = RAY_SYM; + for (int i = 0; i < n; i++) { + if (args[i]->out_type == RAY_STR) { out_type = RAY_STR; break; } + } + ext->base.out_type = out_type; + ext->base.est_rows = est; + ext->sym = n; /* total arg count stored in sym field */ + + /* Extra args in trailing bytes */ + uint32_t* trail = (uint32_t*)EXT_TRAIL(ext); + for (int i = 2; i < n; i++) trail[i - 2] = ids[i]; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +/* -------------------------------------------------------------------------- + * Reduction ops + * -------------------------------------------------------------------------- */ + +ray_op_t* ray_sum(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_SUM, a, a->out_type == RAY_F64 ? RAY_F64 : RAY_I64); } +ray_op_t* ray_prod(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_PROD, a, a->out_type == RAY_F64 ? RAY_F64 : RAY_I64); } +ray_op_t* ray_min_op(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_MIN, a, a->out_type); } +ray_op_t* ray_max_op(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_MAX, a, a->out_type); } +ray_op_t* ray_count(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_COUNT, a, RAY_I64); } +ray_op_t* ray_avg(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_AVG, a, RAY_F64); } +ray_op_t* ray_first(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_FIRST, a, a->out_type); } +ray_op_t* ray_last(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_LAST, a, a->out_type); } +ray_op_t* ray_count_distinct(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_COUNT_DISTINCT, a, RAY_I64); } +ray_op_t* ray_stddev(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_STDDEV, a, RAY_F64); } +ray_op_t* ray_stddev_pop(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_STDDEV_POP, a, RAY_F64); } +ray_op_t* ray_var(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_VAR, a, RAY_F64); } +ray_op_t* ray_var_pop(ray_graph_t* g, ray_op_t* a) { return make_unary(g, OP_VAR_POP, a, RAY_F64); } + +/* -------------------------------------------------------------------------- + * Structural ops + * -------------------------------------------------------------------------- */ + +ray_op_t* ray_filter(ray_graph_t* g, ray_op_t* input, ray_op_t* predicate) { + uint32_t input_id = input->id; + uint32_t pred_id = predicate->id; + uint32_t est = input->est_rows / 2; /* estimate: 50% selectivity */ + + ray_op_t* n = graph_alloc_node(g); + if (!n) return NULL; + + input = &g->nodes[input_id]; + predicate = &g->nodes[pred_id]; + + n->opcode = OP_FILTER; + n->arity = 2; + n->inputs[0] = input; + n->inputs[1] = predicate; + n->out_type = input->out_type; + n->est_rows = est; + return n; +} + +ray_op_t* ray_sort_op(ray_graph_t* g, ray_op_t* table_node, + ray_op_t** keys, uint8_t* descs, uint8_t* nulls_first, + uint8_t n_cols) { + uint32_t table_id = table_node->id; + /* L5: n_cols is uint8_t (max 255) so 256-element array is always sufficient. */ + uint32_t key_ids[256]; + for (uint8_t i = 0; i < n_cols; i++) key_ids[i] = keys[i]->id; + + size_t keys_sz = (size_t)n_cols * sizeof(ray_op_t*); + size_t descs_sz = (size_t)n_cols; + size_t nf_sz = (size_t)n_cols; + ray_op_ext_t* ext = graph_alloc_ext_node_ex(g, keys_sz + descs_sz + nf_sz); + if (!ext) return NULL; + + table_node = &g->nodes[table_id]; + + ext->base.opcode = OP_SORT; + ext->base.arity = 1; + ext->base.inputs[0] = table_node; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = table_node->est_rows; + + /* Arrays embedded in trailing space — freed with ext node */ + char* trail = EXT_TRAIL(ext); + ext->sort.columns = (ray_op_t**)trail; + for (uint8_t i = 0; i < n_cols; i++) + ext->sort.columns[i] = &g->nodes[key_ids[i]]; + ext->sort.desc = (uint8_t*)(trail + keys_sz); + memcpy(ext->sort.desc, descs, descs_sz); + ext->sort.nulls_first = (uint8_t*)(trail + keys_sz + descs_sz); + if (nulls_first) { + memcpy(ext->sort.nulls_first, nulls_first, nf_sz); + } else { + /* Default: NULLS LAST for ASC, NULLS FIRST for DESC */ + for (uint8_t i = 0; i < n_cols; i++) + ext->sort.nulls_first[i] = descs[i] ? 1 : 0; + } + ext->sort.n_cols = n_cols; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_group(ray_graph_t* g, ray_op_t** keys, uint8_t n_keys, + uint16_t* agg_ops, ray_op_t** agg_ins, uint8_t n_aggs) { + uint32_t key_ids[256]; + uint32_t agg_ids[256]; + for (uint8_t i = 0; i < n_keys; i++) key_ids[i] = keys[i]->id; + for (uint8_t i = 0; i < n_aggs; i++) agg_ids[i] = agg_ins[i]->id; + + size_t keys_sz = (size_t)n_keys * sizeof(ray_op_t*); + size_t ops_sz = (size_t)n_aggs * sizeof(uint16_t); + size_t ins_sz = (size_t)n_aggs * sizeof(ray_op_t*); + /* Align ops after keys (pointer-sized), ins after ops (needs ptr alignment) */ + size_t ops_off = keys_sz; + size_t ins_off = ops_off + ops_sz; + /* Round ins_off up to pointer alignment */ + ins_off = (ins_off + sizeof(ray_op_t*) - 1) & ~(sizeof(ray_op_t*) - 1); + ray_op_ext_t* ext = graph_alloc_ext_node_ex(g, ins_off + ins_sz); + if (!ext) return NULL; + + ext->base.opcode = OP_GROUP; + ext->base.arity = 0; + ext->base.out_type = RAY_TABLE; + if (n_keys > 0 && keys[0]) + ext->base.est_rows = g->nodes[key_ids[0]].est_rows / 10; /* rough estimate */ + ext->base.inputs[0] = n_keys > 0 ? &g->nodes[key_ids[0]] : NULL; + + /* Arrays embedded in trailing space — freed with ext node */ + char* trail = EXT_TRAIL(ext); + ext->keys = (ray_op_t**)trail; + for (uint8_t i = 0; i < n_keys; i++) + ext->keys[i] = &g->nodes[key_ids[i]]; + ext->agg_ops = (uint16_t*)(trail + ops_off); + if (ops_sz > 0) memcpy(ext->agg_ops, agg_ops, ops_sz); + ext->agg_ins = (ray_op_t**)(trail + ins_off); + for (uint8_t i = 0; i < n_aggs; i++) + ext->agg_ins[i] = &g->nodes[agg_ids[i]]; + ext->n_keys = n_keys; + ext->n_aggs = n_aggs; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_distinct(ray_graph_t* g, ray_op_t** keys, uint8_t n_keys) { + return ray_group(g, keys, n_keys, NULL, NULL, 0); +} + +ray_op_t* ray_pivot_op(ray_graph_t* g, + ray_op_t** index_cols, uint8_t n_index, + ray_op_t* pivot_col, + ray_op_t* value_col, + uint16_t agg_op) { + uint32_t idx_ids[16]; + for (uint8_t i = 0; i < n_index; i++) idx_ids[i] = index_cols[i]->id; + uint32_t pcol_id = pivot_col->id; + uint32_t vcol_id = value_col->id; + + size_t idx_sz = (size_t)n_index * sizeof(ray_op_t*); + ray_op_ext_t* ext = graph_alloc_ext_node_ex(g, idx_sz); + if (!ext) return NULL; + + ext->base.opcode = OP_PIVOT; + ext->base.arity = 0; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = 0; /* unknown until execution */ + + char* trail = EXT_TRAIL(ext); + ext->pivot.index_cols = (ray_op_t**)trail; + for (uint8_t i = 0; i < n_index; i++) + ext->pivot.index_cols[i] = &g->nodes[idx_ids[i]]; + ext->pivot.pivot_col = &g->nodes[pcol_id]; + ext->pivot.value_col = &g->nodes[vcol_id]; + ext->pivot.agg_op = agg_op; + ext->pivot.n_index = n_index; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_join(ray_graph_t* g, + ray_op_t* left_table, ray_op_t** left_keys, + ray_op_t* right_table, ray_op_t** right_keys, + uint8_t n_keys, uint8_t join_type) { + uint32_t left_table_id = left_table->id; + uint32_t right_table_id = right_table->id; + uint32_t lkey_ids[256]; + uint32_t rkey_ids[256]; + for (uint8_t i = 0; i < n_keys; i++) { + lkey_ids[i] = left_keys[i]->id; + rkey_ids[i] = right_keys[i]->id; + } + + size_t keys_sz = (size_t)n_keys * sizeof(ray_op_t*); + ray_op_ext_t* ext = graph_alloc_ext_node_ex(g, keys_sz * 2); + if (!ext) return NULL; + + left_table = &g->nodes[left_table_id]; + right_table = &g->nodes[right_table_id]; + + ext->base.opcode = OP_JOIN; + ext->base.arity = 2; + ext->base.inputs[0] = left_table; + ext->base.inputs[1] = right_table; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = left_table->est_rows; + + /* Arrays embedded in trailing space — freed with ext node */ + char* trail = EXT_TRAIL(ext); + ext->join.left_keys = (ray_op_t**)trail; + for (uint8_t i = 0; i < n_keys; i++) + ext->join.left_keys[i] = &g->nodes[lkey_ids[i]]; + ext->join.right_keys = (ray_op_t**)(trail + (size_t)n_keys * sizeof(ray_op_t*)); + for (uint8_t i = 0; i < n_keys; i++) + ext->join.right_keys[i] = &g->nodes[rkey_ids[i]]; + ext->join.n_join_keys = n_keys; + ext->join.join_type = join_type; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_antijoin(ray_graph_t* g, + ray_op_t* left_table, ray_op_t** left_keys, + ray_op_t* right_table, ray_op_t** right_keys, + uint8_t n_keys) { + uint32_t left_table_id = left_table->id; + uint32_t right_table_id = right_table->id; + uint32_t lkey_ids[256]; + uint32_t rkey_ids[256]; + for (uint8_t i = 0; i < n_keys; i++) { + lkey_ids[i] = left_keys[i]->id; + rkey_ids[i] = right_keys[i]->id; + } + + size_t keys_sz = (size_t)n_keys * sizeof(ray_op_t*); + ray_op_ext_t* ext = graph_alloc_ext_node_ex(g, keys_sz * 2); + if (!ext) return NULL; + + left_table = &g->nodes[left_table_id]; + right_table = &g->nodes[right_table_id]; + + ext->base.opcode = OP_ANTIJOIN; + ext->base.arity = 2; + ext->base.inputs[0] = left_table; + ext->base.inputs[1] = right_table; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = left_table->est_rows; + + char* trail = EXT_TRAIL(ext); + ext->join.left_keys = (ray_op_t**)trail; + for (uint8_t i = 0; i < n_keys; i++) + ext->join.left_keys[i] = &g->nodes[lkey_ids[i]]; + ext->join.right_keys = (ray_op_t**)(trail + (size_t)n_keys * sizeof(ray_op_t*)); + for (uint8_t i = 0; i < n_keys; i++) + ext->join.right_keys[i] = &g->nodes[rkey_ids[i]]; + ext->join.n_join_keys = n_keys; + ext->join.join_type = 3; /* anti */ + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_asof_join(ray_graph_t* g, + ray_op_t* left_table, ray_op_t* right_table, + ray_op_t* time_key, + ray_op_t** eq_keys, uint8_t n_eq_keys, + uint8_t join_type) { + uint32_t left_id = left_table->id; + uint32_t right_id = right_table->id; + uint32_t time_id = time_key->id; + uint32_t eq_ids[256]; + for (uint8_t i = 0; i < n_eq_keys; i++) eq_ids[i] = eq_keys[i]->id; + + /* Trailing: [eq_keys: n_eq_keys * ptr] */ + size_t keys_sz = (size_t)n_eq_keys * sizeof(ray_op_t*); + ray_op_ext_t* ext = graph_alloc_ext_node_ex(g, keys_sz); + if (!ext) return NULL; + + left_table = &g->nodes[left_id]; + right_table = &g->nodes[right_id]; + + ext->base.opcode = OP_WINDOW_JOIN; + ext->base.arity = 2; + ext->base.inputs[0] = left_table; + ext->base.inputs[1] = right_table; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = left_table->est_rows; + + ext->asof.time_key = &g->nodes[time_id]; + ext->asof.n_eq_keys = n_eq_keys; + ext->asof.join_type = join_type; + ext->asof.eq_keys = (ray_op_t**)EXT_TRAIL(ext); + for (uint8_t i = 0; i < n_eq_keys; i++) + ext->asof.eq_keys[i] = &g->nodes[eq_ids[i]]; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_window_op(ray_graph_t* g, ray_op_t* table_node, + ray_op_t** part_keys, uint8_t n_part, + ray_op_t** order_keys, uint8_t* order_descs, uint8_t n_order, + uint8_t* func_kinds, ray_op_t** func_inputs, + int64_t* func_params, uint8_t n_funcs, + uint8_t frame_type, uint8_t frame_start, uint8_t frame_end, + int64_t frame_start_n, int64_t frame_end_n) { + uint32_t part_ids[256]; + uint32_t order_ids[256]; + uint32_t func_ids[256]; + for (uint8_t i = 0; i < n_part; i++) part_ids[i] = part_keys[i]->id; + for (uint8_t i = 0; i < n_order; i++) order_ids[i] = order_keys[i]->id; + for (uint8_t i = 0; i < n_funcs; i++) func_ids[i] = func_inputs[i]->id; + + /* Trailing layout: + * [part_keys: n_part * ptr] + * [order_keys: n_order * ptr] + * [order_descs: n_order * 1B] + * [padding to ptr alignment] + * [func_inputs: n_funcs * ptr] + * [func_kinds: n_funcs * 1B] + * [padding to 8B alignment] + * [func_params: n_funcs * 8B] + */ + size_t pk_sz = (size_t)n_part * sizeof(ray_op_t*); + size_t ok_sz = (size_t)n_order * sizeof(ray_op_t*); + size_t od_sz = (size_t)n_order; + size_t od_end = pk_sz + ok_sz + od_sz; + size_t fi_off = (od_end + sizeof(ray_op_t*) - 1) & ~(sizeof(ray_op_t*) - 1); + size_t fi_sz = (size_t)n_funcs * sizeof(ray_op_t*); + size_t fk_off = fi_off + fi_sz; + size_t fk_sz = (size_t)n_funcs; + size_t fp_off = (fk_off + fk_sz + 7) & ~(size_t)7; + size_t fp_sz = (size_t)n_funcs * sizeof(int64_t); + size_t total = fp_off + fp_sz; + + /* Save IDs before alloc — realloc may invalidate pointers */ + uint32_t table_id = table_node->id; + uint32_t est = table_node->est_rows; + + ray_op_ext_t* ext = graph_alloc_ext_node_ex(g, total); + if (!ext) return NULL; + + /* Re-resolve table_node after potential realloc */ + table_node = &g->nodes[table_id]; + + ext->base.opcode = OP_WINDOW; + ext->base.arity = 1; + ext->base.inputs[0] = table_node; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = est; /* window preserves row count */ + + /* Fill trailing arrays */ + char* trail = EXT_TRAIL(ext); + ext->window.part_keys = (ray_op_t**)trail; + for (uint8_t i = 0; i < n_part; i++) + ext->window.part_keys[i] = &g->nodes[part_ids[i]]; + + ext->window.order_keys = (ray_op_t**)(trail + pk_sz); + for (uint8_t i = 0; i < n_order; i++) + ext->window.order_keys[i] = &g->nodes[order_ids[i]]; + + ext->window.order_descs = (uint8_t*)(trail + pk_sz + ok_sz); + if (n_order) memcpy(ext->window.order_descs, order_descs, od_sz); + + ext->window.func_inputs = (ray_op_t**)(trail + fi_off); + for (uint8_t i = 0; i < n_funcs; i++) + ext->window.func_inputs[i] = &g->nodes[func_ids[i]]; + + ext->window.func_kinds = (uint8_t*)(trail + fk_off); + if (n_funcs) memcpy(ext->window.func_kinds, func_kinds, fk_sz); + + ext->window.func_params = (int64_t*)(trail + fp_off); + if (n_funcs) memcpy(ext->window.func_params, func_params, fp_sz); + + ext->window.n_part_keys = n_part; + ext->window.n_order_keys = n_order; + ext->window.n_funcs = n_funcs; + ext->window.frame_type = frame_type; + ext->window.frame_start = frame_start; + ext->window.frame_end = frame_end; + ext->window.frame_start_n = frame_start_n; + ext->window.frame_end_n = frame_end_n; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_select(ray_graph_t* g, ray_op_t* input, + ray_op_t** cols, uint8_t n_cols) { + uint32_t input_id = input->id; + uint32_t col_ids[256]; + for (uint8_t i = 0; i < n_cols; i++) col_ids[i] = cols[i]->id; + + size_t cols_sz = (size_t)n_cols * sizeof(ray_op_t*); + ray_op_ext_t* ext = graph_alloc_ext_node_ex(g, cols_sz); + if (!ext) return NULL; + + input = &g->nodes[input_id]; + + ext->base.opcode = OP_SELECT; + ext->base.arity = 1; + ext->base.inputs[0] = input; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = input->est_rows; + + /* Array embedded in trailing space — freed with ext node */ + ext->sort.columns = (ray_op_t**)EXT_TRAIL(ext); + for (uint8_t i = 0; i < n_cols; i++) + ext->sort.columns[i] = &g->nodes[col_ids[i]]; + ext->sort.n_cols = n_cols; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +/* L6: When n (stored as ext->sym) is 0, HEAD produces an empty result + with the same schema as the input. */ +ray_op_t* ray_head(ray_graph_t* g, ray_op_t* input, int64_t n) { + uint32_t input_id = input->id; + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + input = &g->nodes[input_id]; + + ext->base.opcode = OP_HEAD; + ext->base.arity = 1; + ext->base.inputs[0] = input; + ext->base.out_type = input->out_type; + ext->base.est_rows = (uint32_t)n; + ext->sym = n; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_tail(ray_graph_t* g, ray_op_t* input, int64_t n) { + uint32_t input_id = input->id; + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + input = &g->nodes[input_id]; + + ext->base.opcode = OP_TAIL; + ext->base.arity = 1; + ext->base.inputs[0] = input; + ext->base.out_type = input->out_type; + ext->base.est_rows = (uint32_t)n; + ext->sym = n; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_alias(ray_graph_t* g, ray_op_t* input, const char* name) { + uint32_t input_id = input->id; + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + input = &g->nodes[input_id]; + + ext->base.opcode = OP_ALIAS; + ext->base.arity = 1; + ext->base.inputs[0] = input; + ext->base.out_type = input->out_type; + ext->base.est_rows = input->est_rows; + ext->sym = ray_sym_intern(name, strlen(name)); + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_extract(ray_graph_t* g, ray_op_t* col, int64_t field) { + uint32_t col_id = col->id; + uint32_t est = col->est_rows; + + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + col = &g->nodes[col_id]; /* re-resolve after potential realloc */ + + ext->base.opcode = OP_EXTRACT; + ext->base.arity = 1; + ext->base.inputs[0] = col; + ext->base.out_type = RAY_I64; + ext->base.est_rows = est; + ext->sym = field; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_date_trunc(ray_graph_t* g, ray_op_t* col, int64_t field) { + uint32_t col_id = col->id; + uint32_t est = col->est_rows; + + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + col = &g->nodes[col_id]; /* re-resolve after potential realloc */ + + ext->base.opcode = OP_DATE_TRUNC; + ext->base.arity = 1; + ext->base.inputs[0] = col; + ext->base.out_type = RAY_TIMESTAMP; /* returns timestamp (microseconds) */ + ext->base.est_rows = est; + ext->sym = field; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_materialize(ray_graph_t* g, ray_op_t* input) { + uint32_t input_id = input->id; + ray_op_t* n = graph_alloc_node(g); + if (!n) return NULL; + + input = &g->nodes[input_id]; + + n->opcode = OP_MATERIALIZE; + n->arity = 1; + n->inputs[0] = input; + n->out_type = input->out_type; + n->est_rows = input->est_rows; + return n; +} + +/* -------------------------------------------------------------------------- + * Multi-table support + * -------------------------------------------------------------------------- */ + +uint16_t ray_graph_add_table(ray_graph_t* g, ray_t* table) { + uint16_t id = g->n_tables; + uint16_t new_cap = id + 1; + + ray_t** new_tables = (ray_t**)ray_sys_realloc(g->tables, + (size_t)new_cap * sizeof(ray_t*)); + if (!new_tables) return UINT16_MAX; /* error sentinel */ + g->tables = new_tables; + g->tables[id] = table; + ray_retain(table); + g->n_tables = new_cap; + + return id; +} + +ray_op_t* ray_scan_table(ray_graph_t* g, uint16_t table_id, const char* col_name) { + if (table_id >= g->n_tables || !g->tables[table_id]) return NULL; + + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + ext->base.opcode = OP_SCAN; + ext->base.arity = 0; + + int64_t sym_id = ray_sym_intern(col_name, strlen(col_name)); + ext->sym = sym_id; + + /* Store table_id+1 in pad[0..1] as uint16_t (0 = default g->table) */ + uint16_t stored_id = table_id + 1; + memcpy(ext->base.pad, &stored_id, sizeof(uint16_t)); + + /* Infer output type from the specified table */ + ray_t* tbl = g->tables[table_id]; + if (tbl) { + ray_t* col = ray_table_get_col(tbl, sym_id); + if (col) { + ext->base.out_type = col->type; + ext->base.est_rows = (uint32_t)col->len; + } + } + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +/* -------------------------------------------------------------------------- + * Graph traversal DAG builders + * -------------------------------------------------------------------------- */ + +ray_op_t* ray_expand(ray_graph_t* g, ray_op_t* src_nodes, + ray_rel_t* rel, uint8_t direction) { + uint32_t src_id = src_nodes->id; + uint32_t est = src_nodes->est_rows; + + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + src_nodes = &g->nodes[src_id]; + + ext->base.opcode = OP_EXPAND; + ext->base.arity = 1; + ext->base.inputs[0] = src_nodes; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = est * 10; /* rough estimate: 10x fan-out */ + ext->graph.rel = rel; + ext->graph.direction = direction; + ext->graph.min_depth = 1; + ext->graph.max_depth = 1; + ext->graph.path_tracking = 0; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_var_expand(ray_graph_t* g, ray_op_t* start_nodes, + ray_rel_t* rel, uint8_t direction, + uint8_t min_depth, uint8_t max_depth, + bool track_path) { + uint32_t src_id = start_nodes->id; + uint32_t est = start_nodes->est_rows; + + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + start_nodes = &g->nodes[src_id]; + + ext->base.opcode = OP_VAR_EXPAND; + ext->base.arity = 1; + ext->base.inputs[0] = start_nodes; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = est * 100; /* rough estimate */ + ext->graph.rel = rel; + ext->graph.direction = direction; + ext->graph.min_depth = min_depth; + ext->graph.max_depth = max_depth; + ext->graph.path_tracking = track_path ? 1 : 0; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_shortest_path(ray_graph_t* g, ray_op_t* src, ray_op_t* dst, + ray_rel_t* rel, uint8_t max_depth) { + uint32_t src_id = src->id; + uint32_t dst_id = dst->id; + + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + src = &g->nodes[src_id]; + dst = &g->nodes[dst_id]; + + ext->base.opcode = OP_SHORTEST_PATH; + ext->base.arity = 2; + ext->base.inputs[0] = src; + ext->base.inputs[1] = dst; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = max_depth; + ext->graph.rel = rel; + ext->graph.direction = 0; /* forward by default */ + ext->graph.min_depth = 0; + ext->graph.max_depth = max_depth; + ext->graph.path_tracking = 0; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +/* -------------------------------------------------------------------------- + * Graph algorithm builders + * -------------------------------------------------------------------------- */ + +ray_op_t* ray_pagerank(ray_graph_t* g, ray_rel_t* rel, + uint16_t max_iter, double damping) { + if (!g || !rel) return NULL; + + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + ext->base.opcode = OP_PAGERANK; + ext->base.arity = 0; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = (uint32_t)rel->fwd.n_nodes; + ext->graph.rel = rel; + ext->graph.max_iter = max_iter; + ext->graph.damping = damping; + ext->graph.direction = 0; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_connected_comp(ray_graph_t* g, ray_rel_t* rel) { + if (!g || !rel) return NULL; + + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + ext->base.opcode = OP_CONNECTED_COMP; + ext->base.arity = 0; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = (uint32_t)rel->fwd.n_nodes; + ext->graph.rel = rel; + ext->graph.direction = 2; /* both directions for undirected */ + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_dijkstra(ray_graph_t* g, ray_op_t* src, ray_op_t* dst, + ray_rel_t* rel, const char* weight_col, + uint8_t max_depth) { + if (!g || !src || !rel || !weight_col) return NULL; + + /* Save IDs before alloc — realloc may invalidate the pointers */ + uint32_t src_id = src->id; + uint32_t dst_id = dst ? dst->id : 0; + + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + src = &g->nodes[src_id]; + if (dst) dst = &g->nodes[dst_id]; + + ext->base.opcode = OP_DIJKSTRA; + ext->base.arity = dst ? 2 : 1; + ext->base.inputs[0] = src; + ext->base.inputs[1] = dst; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = (uint32_t)rel->fwd.n_nodes; + ext->graph.rel = rel; + ext->graph.direction = 0; + ext->graph.max_depth = max_depth; + ext->graph.weight_col_sym = ray_sym_intern(weight_col, (int64_t)strlen(weight_col)); + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_louvain(ray_graph_t* g, ray_rel_t* rel, uint16_t max_iter) { + if (!g || !rel) return NULL; + + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + ext->base.opcode = OP_LOUVAIN; + ext->base.arity = 0; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = (uint32_t)rel->fwd.n_nodes; + ext->graph.rel = rel; + ext->graph.max_iter = max_iter > 0 ? max_iter : 100; + ext->graph.direction = 2; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_degree_cent(ray_graph_t* g, ray_rel_t* rel) { + if (!g || !rel) return NULL; + + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + ext->base.opcode = OP_DEGREE_CENT; + ext->base.arity = 0; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = (uint32_t)rel->fwd.n_nodes; + ext->graph.rel = rel; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_topsort(ray_graph_t* g, ray_rel_t* rel) { + if (!g || !rel) return NULL; + + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + ext->base.opcode = OP_TOPSORT; + ext->base.arity = 0; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = (uint32_t)rel->fwd.n_nodes; + ext->graph.rel = rel; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_dfs(ray_graph_t* g, ray_op_t* src, ray_rel_t* rel, uint8_t max_depth) { + if (!g || !src || !rel) return NULL; + + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + uint32_t src_id = src->id; + src = &g->nodes[src_id]; + + ext->base.opcode = OP_DFS; + ext->base.arity = 1; + ext->base.inputs[0] = src; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = (uint32_t)rel->fwd.n_nodes; + ext->graph.rel = rel; + ext->graph.direction = 0; + ext->graph.max_depth = max_depth; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_wco_join(ray_graph_t* g, + ray_rel_t** rels, uint8_t n_rels, + uint8_t n_vars) { + size_t extra = (size_t)n_rels * sizeof(ray_rel_t*); + ray_op_ext_t* ext = graph_alloc_ext_node_ex(g, extra); + if (!ext) return NULL; + + ext->base.opcode = OP_WCO_JOIN; + ext->base.arity = 0; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = 1000; /* rough estimate */ + + /* Copy rels array into trailing bytes */ + ray_rel_t** trail = (ray_rel_t**)EXT_TRAIL(ext); + if (n_rels > 0) memcpy(trail, rels, (size_t)n_rels * sizeof(ray_rel_t*)); + ext->wco.rels = (void**)trail; + ext->wco.n_rels = n_rels; + ext->wco.n_vars = n_vars; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +/* -------------------------------------------------------------------------- + * Vector similarity builders + * -------------------------------------------------------------------------- */ + +ray_op_t* ray_cosine_sim(ray_graph_t* g, ray_op_t* emb_col, + const float* query_vec, int32_t dim) { + if (!g || !emb_col || !query_vec || dim <= 0) return NULL; + + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + emb_col = &g->nodes[emb_col->id]; + + ext->base.opcode = OP_COSINE_SIM; + ext->base.arity = 1; + ext->base.inputs[0] = emb_col; + ext->base.out_type = RAY_F64; + ext->base.est_rows = emb_col->est_rows; + ext->vector.query_vec = (float*)query_vec; + ext->vector.dim = dim; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_euclidean_dist(ray_graph_t* g, ray_op_t* emb_col, + const float* query_vec, int32_t dim) { + if (!g || !emb_col || !query_vec || dim <= 0) return NULL; + + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + emb_col = &g->nodes[emb_col->id]; + + ext->base.opcode = OP_EUCLIDEAN_DIST; + ext->base.arity = 1; + ext->base.inputs[0] = emb_col; + ext->base.out_type = RAY_F64; + ext->base.est_rows = emb_col->est_rows; + ext->vector.query_vec = (float*)query_vec; + ext->vector.dim = dim; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_knn(ray_graph_t* g, ray_op_t* emb_col, + const float* query_vec, int32_t dim, int64_t k, + ray_hnsw_metric_t metric) { + if (!g || !emb_col || !query_vec || dim <= 0 || k <= 0) return NULL; + + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + emb_col = &g->nodes[emb_col->id]; + + ext->base.opcode = OP_KNN; + ext->base.arity = 1; + ext->base.inputs[0] = emb_col; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = (uint32_t)k; + ext->vector.query_vec = (float*)query_vec; + ext->vector.dim = dim; + ext->vector.k = k; + ext->vector.metric = (int32_t)metric; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_cluster_coeff(ray_graph_t* g, ray_rel_t* rel) { + if (!g || !rel) return NULL; + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + ext->base.opcode = OP_CLUSTER_COEFF; + ext->base.arity = 0; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = (uint32_t)rel->fwd.n_nodes; + ext->graph.rel = rel; + ext->graph.direction = 2; + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_random_walk(ray_graph_t* g, ray_op_t* src, ray_rel_t* rel, + uint16_t walk_length) { + if (!g || !src || !rel) return NULL; + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + uint32_t src_id = src->id; + src = &g->nodes[src_id]; + ext->base.opcode = OP_RANDOM_WALK; + ext->base.arity = 1; + ext->base.inputs[0] = src; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = walk_length + 1; + ext->graph.rel = rel; + ext->graph.max_iter = walk_length; + ext->graph.direction = 0; + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_astar(ray_graph_t* g, ray_op_t* src, ray_op_t* dst, + ray_rel_t* rel, const char* weight_col, + const char* lat_col, const char* lon_col, + ray_t* node_props, uint8_t max_depth) { + if (!g || !src || !dst || !rel || !weight_col || !lat_col || !lon_col || !node_props) + return NULL; + + /* Save IDs before alloc — realloc may invalidate the pointers */ + uint32_t src_id = src->id; + uint32_t dst_id = dst->id; + + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + src = &g->nodes[src_id]; + dst = &g->nodes[dst_id]; + + ext->base.opcode = OP_ASTAR; + ext->base.arity = 2; + ext->base.inputs[0] = src; + ext->base.inputs[1] = dst; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = (uint32_t)rel->fwd.n_nodes; + ext->graph.rel = rel; + ext->graph.direction = 0; + ext->graph.max_depth = max_depth; + ext->graph.weight_col_sym = ray_sym_intern(weight_col, (int64_t)strlen(weight_col)); + ext->graph.coord_col_syms[0] = ray_sym_intern(lat_col, (int64_t)strlen(lat_col)); + ext->graph.coord_col_syms[1] = ray_sym_intern(lon_col, (int64_t)strlen(lon_col)); + ext->graph.node_props = node_props; + ray_retain(node_props); + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_k_shortest(ray_graph_t* g, ray_op_t* src, ray_op_t* dst, + ray_rel_t* rel, const char* weight_col, uint16_t k) { + if (!g || !src || !dst || !rel || !weight_col || k == 0) return NULL; + + /* Save IDs before alloc — realloc may invalidate the pointers */ + uint32_t src_id = src->id; + uint32_t dst_id = dst->id; + + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + src = &g->nodes[src_id]; + dst = &g->nodes[dst_id]; + + ext->base.opcode = OP_K_SHORTEST; + ext->base.arity = 2; + ext->base.inputs[0] = src; + ext->base.inputs[1] = dst; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = (uint32_t)(k * rel->fwd.n_nodes); + ext->graph.rel = rel; + ext->graph.direction = 0; + ext->graph.max_iter = k; + ext->graph.weight_col_sym = ray_sym_intern(weight_col, (int64_t)strlen(weight_col)); + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_betweenness(ray_graph_t* g, ray_rel_t* rel, uint16_t sample_size) { + if (!g || !rel) return NULL; + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + ext->base.opcode = OP_BETWEENNESS; + ext->base.arity = 0; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = (uint32_t)rel->fwd.n_nodes; + ext->graph.rel = rel; + ext->graph.direction = 2; /* undirected BFS */ + ext->graph.max_iter = sample_size; /* 0 = exact */ + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_closeness(ray_graph_t* g, ray_rel_t* rel, uint16_t sample_size) { + if (!g || !rel) return NULL; + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + ext->base.opcode = OP_CLOSENESS; + ext->base.arity = 0; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = (uint32_t)rel->fwd.n_nodes; + ext->graph.rel = rel; + ext->graph.direction = 2; /* undirected BFS */ + ext->graph.max_iter = sample_size; /* 0 = exact */ + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_mst(ray_graph_t* g, ray_rel_t* rel, const char* weight_col) { + if (!g || !rel || !weight_col) return NULL; + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + ext->base.opcode = OP_MST; + ext->base.arity = 0; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = (uint32_t)(rel->fwd.n_nodes > 0 ? rel->fwd.n_nodes - 1 : 0); + ext->graph.rel = rel; + ext->graph.direction = 2; + ext->graph.weight_col_sym = ray_sym_intern(weight_col, (int64_t)strlen(weight_col)); + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_hnsw_knn(ray_graph_t* g, ray_hnsw_t* idx, + const float* query_vec, int32_t dim, + int64_t k, int32_t ef_search) { + if (!g || !idx || !query_vec || dim <= 0 || k <= 0) return NULL; + + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + + ext->base.opcode = OP_HNSW_KNN; + ext->base.arity = 0; /* nullary: reads from index directly */ + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = (uint32_t)k; + ext->hnsw.hnsw_idx = idx; + ext->hnsw.query_vec = (float*)query_vec; + ext->hnsw.dim = dim; + ext->hnsw.k = k; + ext->hnsw.ef_search = ef_search > 0 ? ef_search : HNSW_DEFAULT_EF_S; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_ann_rerank(ray_graph_t* g, ray_op_t* src, + ray_hnsw_t* idx, const float* query_vec, + int32_t dim, int64_t k, int32_t ef_search) { + if (!g || !src || !idx || !query_vec || dim <= 0 || k <= 0) return NULL; + + uint32_t src_id = src->id; + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + src = &g->nodes[src_id]; + + ext->base.opcode = OP_ANN_RERANK; + ext->base.arity = 1; + ext->base.inputs[0] = src; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = (uint32_t)k; + ext->rerank.hnsw_idx = idx; + ext->rerank.col_sym = 0; + ext->rerank.query_vec = (float*)query_vec; + ext->rerank.dim = dim; + ext->rerank.metric = idx ? idx->metric : RAY_HNSW_COSINE; + ext->rerank.k = k; + ext->rerank.ef_search = ef_search > 0 ? ef_search : HNSW_DEFAULT_EF_S; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +ray_op_t* ray_knn_rerank(ray_graph_t* g, ray_op_t* src, + int64_t col_sym, const float* query_vec, + int32_t dim, int64_t k, ray_hnsw_metric_t metric) { + if (!g || !src || !query_vec || dim <= 0 || k <= 0 || col_sym <= 0) return NULL; + + uint32_t src_id = src->id; + ray_op_ext_t* ext = graph_alloc_ext_node(g); + if (!ext) return NULL; + src = &g->nodes[src_id]; + + ext->base.opcode = OP_KNN_RERANK; + ext->base.arity = 1; + ext->base.inputs[0] = src; + ext->base.out_type = RAY_TABLE; + ext->base.est_rows = (uint32_t)k; + ext->rerank.hnsw_idx = NULL; + ext->rerank.col_sym = col_sym; + ext->rerank.query_vec = (float*)query_vec; + ext->rerank.dim = dim; + ext->rerank.metric = (int32_t)metric; + ext->rerank.k = k; + ext->rerank.ef_search = 0; + + g->nodes[ext->base.id] = ext->base; + return &g->nodes[ext->base.id]; +} + +/* -------------------------------------------------------------------------- + * Lazy DAG handles + * -------------------------------------------------------------------------- */ + +ray_op_t* ray_graph_input_vec(ray_graph_t* g, ray_t* vec) { + return ray_const_vec(g, vec); +} + +ray_t* ray_lazy_wrap(ray_graph_t* g, ray_op_t* op) { + ray_t* h = ray_alloc(0); + if (!h) { ray_graph_free(g); return ray_error("oom", NULL); } + h->type = RAY_LAZY; + h->attrs = 0; + RAY_LAZY_GRAPH(h) = g; + RAY_LAZY_OP(h) = op; + return h; +} + +ray_t* ray_lazy_append(ray_t* lazy, uint16_t opcode) { + ray_graph_t* g = RAY_LAZY_GRAPH(lazy); + ray_op_t* prev = RAY_LAZY_OP(lazy); + + /* Determine output type based on opcode */ + int8_t out_type; + switch (opcode) { + case OP_COUNT: + case OP_COUNT_DISTINCT: + out_type = RAY_I64; break; + case OP_AVG: + case OP_STDDEV: + case OP_STDDEV_POP: + case OP_VAR: + case OP_VAR_POP: + out_type = RAY_F64; break; + case OP_SUM: + case OP_PROD: + out_type = (prev->out_type == RAY_F64) ? RAY_F64 : RAY_I64; break; + default: + out_type = prev->out_type; break; + } + + ray_op_t* op = make_unary(g, opcode, prev, out_type); + if (!op) return ray_error("oom", NULL); + RAY_LAZY_OP(lazy) = op; + return lazy; +} + +ray_t* ray_lazy_materialize(ray_t* val) { + if (!ray_is_lazy(val)) return val; + + ray_graph_t* g = RAY_LAZY_GRAPH(val); + ray_op_t* op = RAY_LAZY_OP(val); + ray_t* result = ray_execute(g, op); + + ray_graph_free(g); + /* Clear graph pointer before releasing to prevent double-free in + * ray_release_owned_refs */ + RAY_LAZY_GRAPH(val) = NULL; + ray_release(val); + return result; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/graph.h b/crates/rayforce-sys/vendor/rayforce/src/ops/graph.h new file mode 100644 index 0000000..b32a17a --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/graph.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_GRAPH_H +#define RAY_GRAPH_H + +#include "ops.h" + +#endif /* RAY_GRAPH_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/group.c b/crates/rayforce-sys/vendor/rayforce/src/ops/group.c new file mode 100644 index 0000000..c26fffe --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/group.c @@ -0,0 +1,4392 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ops/internal.h" +#include "ops/rowsel.h" + +/* ============================================================================ + * Reduction execution + * ============================================================================ */ + +typedef struct { + double sum_f, min_f, max_f, prod_f, first_f, last_f, sum_sq_f; + int64_t sum_i, min_i, max_i, prod_i, first_i, last_i, sum_sq_i; + int64_t cnt; + int64_t null_count; + bool has_first; +} reduce_acc_t; + +static void reduce_acc_init(reduce_acc_t* acc) { + acc->sum_f = 0; acc->min_f = DBL_MAX; acc->max_f = -DBL_MAX; + acc->prod_f = 1.0; acc->first_f = 0; acc->last_f = 0; acc->sum_sq_f = 0; + acc->sum_i = 0; acc->min_i = INT64_MAX; acc->max_i = INT64_MIN; + acc->prod_i = 1; acc->first_i = 0; acc->last_i = 0; acc->sum_sq_i = 0; + acc->cnt = 0; acc->null_count = 0; acc->has_first = false; +} + +/* Integer reduction loop — reads native type T, accumulates as i64 */ +#define REDUCE_LOOP_I(T, base, start, end, acc, has_nulls, null_bm) \ + do { \ + const T* d = (const T*)(base); \ + for (int64_t row = start; row < end; row++) { \ + if (has_nulls && (null_bm[row/8] >> (row%8)) & 1) { (acc)->null_count++; continue; } \ + int64_t v = (int64_t)d[row]; \ + /* sum/sum_sq may overflow on signed arithmetic — use defined \ + * unsigned wrap (same semantic, no UBSan whine). */ \ + (acc)->sum_i = (int64_t)((uint64_t)(acc)->sum_i + (uint64_t)v); \ + (acc)->sum_sq_i = (int64_t)((uint64_t)(acc)->sum_sq_i + (uint64_t)v * (uint64_t)v); \ + (acc)->prod_i = (int64_t)((uint64_t)(acc)->prod_i * (uint64_t)v); \ + if (v < (acc)->min_i) (acc)->min_i = v; \ + if (v > (acc)->max_i) (acc)->max_i = v; \ + if (!(acc)->has_first) { (acc)->first_i = v; (acc)->has_first = true; } \ + (acc)->last_i = v; (acc)->cnt++; \ + } \ + } while (0) + +/* Float reduction loop */ +#define REDUCE_LOOP_F(base, start, end, acc, has_nulls, null_bm) \ + do { \ + const double* d = (const double*)(base); \ + for (int64_t row = start; row < end; row++) { \ + if (has_nulls && (null_bm[row/8] >> (row%8)) & 1) { (acc)->null_count++; continue; } \ + double v = d[row]; \ + (acc)->sum_f += v; (acc)->sum_sq_f += v * v; (acc)->prod_f *= v; \ + if (v < (acc)->min_f) (acc)->min_f = v; \ + if (v > (acc)->max_f) (acc)->max_f = v; \ + if (!(acc)->has_first) { (acc)->first_f = v; (acc)->has_first = true; } \ + (acc)->last_f = v; (acc)->cnt++; \ + } \ + } while (0) + +static void reduce_range(ray_t* input, int64_t start, int64_t end, + reduce_acc_t* acc, bool has_nulls, + const uint8_t* null_bm) { + void* base = ray_data(input); + switch (input->type) { + case RAY_BOOL: case RAY_U8: + REDUCE_LOOP_I(uint8_t, base, start, end, acc, has_nulls, null_bm); break; + case RAY_I16: + REDUCE_LOOP_I(int16_t, base, start, end, acc, has_nulls, null_bm); break; + case RAY_I32: case RAY_DATE: case RAY_TIME: + REDUCE_LOOP_I(int32_t, base, start, end, acc, has_nulls, null_bm); break; + case RAY_I64: case RAY_TIMESTAMP: + REDUCE_LOOP_I(int64_t, base, start, end, acc, has_nulls, null_bm); break; + case RAY_F64: + REDUCE_LOOP_F(base, start, end, acc, has_nulls, null_bm); break; + case RAY_SYM: { + /* Adaptive-width SYM columns — use read_col_i64 */ + for (int64_t row = start; row < end; row++) { + if (has_nulls && (null_bm[row/8] >> (row%8)) & 1) { acc->null_count++; continue; } + int64_t v = read_col_i64(base, row, input->type, input->attrs); + acc->sum_i += v; acc->sum_sq_i += v * v; + acc->prod_i = (int64_t)((uint64_t)acc->prod_i * (uint64_t)v); + if (v < acc->min_i) acc->min_i = v; + if (v > acc->max_i) acc->max_i = v; + if (!acc->has_first) { acc->first_i = v; acc->has_first = true; } + acc->last_i = v; acc->cnt++; + } + break; + } + default: break; + } +} + +/* Context for parallel reduction */ +typedef struct { + ray_t* input; + reduce_acc_t* accs; /* one per worker */ + bool has_nulls; + const uint8_t* null_bm; +} par_reduce_ctx_t; + +static void par_reduce_fn(void* ctx, uint32_t worker_id, int64_t start, int64_t end) { + par_reduce_ctx_t* c = (par_reduce_ctx_t*)ctx; + reduce_range(c->input, start, end, &c->accs[worker_id], + c->has_nulls, c->null_bm); +} + +static void reduce_merge(reduce_acc_t* dst, const reduce_acc_t* src, int8_t in_type) { + if (in_type == RAY_F64) { + dst->sum_f += src->sum_f; + dst->sum_sq_f += src->sum_sq_f; + dst->prod_f *= src->prod_f; + if (src->min_f < dst->min_f) dst->min_f = src->min_f; + if (src->max_f > dst->max_f) dst->max_f = src->max_f; + } else { + /* Defined unsigned wrap — matches REDUCE_LOOP_I's per-row path. */ + dst->sum_i = (int64_t)((uint64_t)dst->sum_i + (uint64_t)src->sum_i); + dst->sum_sq_i = (int64_t)((uint64_t)dst->sum_sq_i + (uint64_t)src->sum_sq_i); + dst->prod_i = (int64_t)((uint64_t)dst->prod_i * (uint64_t)src->prod_i); + if (src->min_i < dst->min_i) dst->min_i = src->min_i; + if (src->max_i > dst->max_i) dst->max_i = src->max_i; + } + dst->cnt += src->cnt; + dst->null_count += src->null_count; + /* reduce_merge does not merge first/last; caller handles these separately. + * Since workers process sequential ranges, worker 0's first is the global first, + * and the last worker's last is the global last. */ +} + +/* Hash-based count distinct for integer/float columns */ +ray_t* exec_count_distinct(ray_graph_t* g, ray_op_t* op, ray_t* input) { + (void)g; (void)op; + if (!input || RAY_IS_ERR(input)) return input; + + int8_t in_type = input->type; + int64_t len = input->len; + + if (len == 0) return ray_i64(0); + + /* Only numeric/ordinal/sym column types are supported */ + switch (in_type) { + case RAY_BOOL: case RAY_U8: + case RAY_I16: case RAY_I32: case RAY_I64: + case RAY_F64: case RAY_DATE: case RAY_TIME: case RAY_TIMESTAMP: + case RAY_SYM: + break; + default: + return ray_error("type", NULL); + } + + /* Use a simple open-addressing hash set for int64 values */ + uint64_t cap = (uint64_t)(len < 16 ? 32 : len) * 2; + /* Round up to power of 2 */ + uint64_t c = 1; + while (c && c < cap) c <<= 1; + if (!c) return ray_error("oom", NULL); /* overflow: cap too large */ + cap = c; + + ray_t* set_hdr; + int64_t* set = (int64_t*)scratch_calloc(&set_hdr, + (size_t)cap * sizeof(int64_t)); + ray_t* used_hdr; + uint8_t* used = (uint8_t*)scratch_calloc(&used_hdr, + (size_t)cap * sizeof(uint8_t)); + if (!set || !used) { + if (set_hdr) scratch_free(set_hdr); + if (used_hdr) scratch_free(used_hdr); + return ray_error("oom", NULL); + } + + int64_t count = 0; + uint64_t mask = cap - 1; + void* base = ray_data(input); + + for (int64_t i = 0; i < len; i++) { + int64_t val; + if (in_type == RAY_F64) { + double fv = ((double*)base)[i]; + /* Normalize: NaN → canonical NaN, -0.0 → +0.0 */ + if (fv != fv) fv = (double)NAN; /* canonical NaN */ + else if (fv == 0.0) fv = 0.0; /* +0.0 */ + memcpy(&val, &fv, sizeof(int64_t)); + } else { + val = read_col_i64(base, i, in_type, input->attrs); + } + + /* Open-addressing linear probe */ + uint64_t h = (uint64_t)val * 0x9E3779B97F4A7C15ULL; + uint64_t slot = h & mask; + while (used[slot]) { + if (set[slot] == val) goto next_val; + slot = (slot + 1) & mask; + } + /* New distinct value */ + set[slot] = val; + used[slot] = 1; + count++; + next_val:; + } + + scratch_free(set_hdr); + scratch_free(used_hdr); + return ray_i64(count); +} + +ray_t* exec_reduction(ray_graph_t* g, ray_op_t* op, ray_t* input) { + (void)g; + if (!input || RAY_IS_ERR(input)) return input; + + /* TABLE input: COUNT returns row count, others need a column */ + if (input->type == RAY_TABLE) { + if (op->opcode == OP_COUNT) + return ray_i64(ray_table_nrows(input)); + return ray_error("type", NULL); + } + + int8_t in_type = input->type; + int64_t len = input->len; + + /* Resolve null bitmap once before dispatching. ray_vec_nullmap_bytes + * handles slice / ext / inline / HAS_INDEX uniformly so this works on + * vectors that carry an attached accelerator index. */ + bool has_nulls = (input->attrs & RAY_ATTR_HAS_NULLS) != 0; + const uint8_t* null_bm = ray_vec_nullmap_bytes(input, NULL, NULL); + + /* O(1) short-circuit: first/last on numeric columns don't need a + * full reduction pass. Non-numeric types (STR, GUID) fall through + * to the serial reduction path below. */ + if ((op->opcode == OP_FIRST || op->opcode == OP_LAST) && + (in_type == RAY_I64 || in_type == RAY_F64 || in_type == RAY_I32 || + in_type == RAY_I16 || in_type == RAY_BOOL || in_type == RAY_U8 || + in_type == RAY_TIMESTAMP || in_type == RAY_DATE || in_type == RAY_TIME || + in_type == RAY_SYM)) { + int64_t row; + if (op->opcode == OP_FIRST) { + for (row = 0; row < len; row++) + if (!has_nulls || !((null_bm[row/8] >> (row%8)) & 1)) break; + } else { + for (row = len - 1; row >= 0; row--) + if (!has_nulls || !((null_bm[row/8] >> (row%8)) & 1)) break; + } + if (row < 0 || row >= len) + return ray_typed_null(-in_type); + void* base = ray_data(input); + if (in_type == RAY_F64) return ray_f64(((const double*)base)[row]); + return ray_i64(read_col_i64(base, row, in_type, input->attrs)); + } + + ray_pool_t* pool = ray_pool_get(); + if (pool && len >= RAY_PARALLEL_THRESHOLD) { + uint32_t nw = ray_pool_total_workers(pool); + ray_t* accs_hdr; + reduce_acc_t* accs = (reduce_acc_t*)scratch_calloc(&accs_hdr, nw * sizeof(reduce_acc_t)); + if (!accs) return ray_error("oom", NULL); + for (uint32_t i = 0; i < nw; i++) reduce_acc_init(&accs[i]); + + par_reduce_ctx_t ctx = { .input = input, .accs = accs, + .has_nulls = has_nulls, .null_bm = null_bm }; + ray_pool_dispatch(pool, par_reduce_fn, &ctx, len); + + /* Merge: worker 0 is the base, merge the rest in order */ + reduce_acc_t merged; + reduce_acc_init(&merged); + merged = accs[0]; + for (uint32_t i = 1; i < nw; i++) { + if (!accs[i].has_first) continue; + reduce_merge(&merged, &accs[i], in_type); + } + /* first = accs[first worker with data], last = accs[last worker with data] */ + for (uint32_t i = 0; i < nw; i++) { + if (accs[i].has_first) { + if (in_type == RAY_F64) merged.first_f = accs[i].first_f; + else merged.first_i = accs[i].first_i; + break; + } + } + for (int32_t i = (int32_t)nw - 1; i >= 0; i--) { + if (accs[i].has_first) { + if (in_type == RAY_F64) merged.last_f = accs[i].last_f; + else merged.last_i = accs[i].last_i; + break; + } + } + + ray_t* result; + switch (op->opcode) { + case OP_SUM: result = in_type == RAY_F64 ? ray_f64(merged.sum_f) : ray_i64(merged.sum_i); break; + case OP_PROD: result = in_type == RAY_F64 ? ray_f64(merged.prod_f) : ray_i64(merged.prod_i); break; + case OP_MIN: result = merged.cnt > 0 ? (in_type == RAY_F64 ? ray_f64(merged.min_f) : ray_i64(merged.min_i)) : ray_typed_null(-in_type); break; + case OP_MAX: result = merged.cnt > 0 ? (in_type == RAY_F64 ? ray_f64(merged.max_f) : ray_i64(merged.max_i)) : ray_typed_null(-in_type); break; + case OP_COUNT: result = ray_i64(merged.cnt); break; + case OP_AVG: result = merged.cnt > 0 ? ray_f64(in_type == RAY_F64 ? merged.sum_f / merged.cnt : (double)merged.sum_i / merged.cnt) : ray_typed_null(-RAY_F64); break; + case OP_FIRST: result = merged.has_first ? (in_type == RAY_F64 ? ray_f64(merged.first_f) : ray_i64(merged.first_i)) : ray_typed_null(-in_type); break; + case OP_LAST: result = merged.has_first ? (in_type == RAY_F64 ? ray_f64(merged.last_f) : ray_i64(merged.last_i)) : ray_typed_null(-in_type); break; + case OP_VAR: case OP_VAR_POP: + case OP_STDDEV: case OP_STDDEV_POP: { + bool insufficient = (op->opcode == OP_VAR || op->opcode == OP_STDDEV) ? merged.cnt <= 1 : merged.cnt <= 0; + if (insufficient) { result = ray_typed_null(-RAY_F64); break; } + double mean, var_pop; + if (in_type == RAY_F64) { mean = merged.sum_f / merged.cnt; var_pop = merged.sum_sq_f / merged.cnt - mean * mean; } + else { mean = (double)merged.sum_i / merged.cnt; var_pop = (double)merged.sum_sq_i / merged.cnt - mean * mean; } + if (var_pop < 0) var_pop = 0; + double val; + if (op->opcode == OP_VAR_POP) val = var_pop; + else if (op->opcode == OP_VAR) val = var_pop * merged.cnt / (merged.cnt - 1); + else if (op->opcode == OP_STDDEV_POP) val = sqrt(var_pop); + else val = sqrt(var_pop * merged.cnt / (merged.cnt - 1)); + result = ray_f64(val); + break; + } + default: result = ray_error("nyi", NULL); break; + } + scratch_free(accs_hdr); + return result; + } + + reduce_acc_t acc; + reduce_acc_init(&acc); + reduce_range(input, 0, len, &acc, has_nulls, null_bm); + + switch (op->opcode) { + case OP_SUM: return in_type == RAY_F64 ? ray_f64(acc.sum_f) : ray_i64(acc.sum_i); + case OP_PROD: return in_type == RAY_F64 ? ray_f64(acc.prod_f) : ray_i64(acc.prod_i); + case OP_MIN: return acc.cnt > 0 ? (in_type == RAY_F64 ? ray_f64(acc.min_f) : ray_i64(acc.min_i)) : ray_typed_null(-in_type); + case OP_MAX: return acc.cnt > 0 ? (in_type == RAY_F64 ? ray_f64(acc.max_f) : ray_i64(acc.max_i)) : ray_typed_null(-in_type); + case OP_COUNT: return ray_i64(acc.cnt); + case OP_AVG: return acc.cnt > 0 ? ray_f64(in_type == RAY_F64 ? acc.sum_f / acc.cnt : (double)acc.sum_i / acc.cnt) : ray_typed_null(-RAY_F64); + case OP_FIRST: return acc.has_first ? (in_type == RAY_F64 ? ray_f64(acc.first_f) : ray_i64(acc.first_i)) : ray_typed_null(-in_type); + case OP_LAST: return acc.has_first ? (in_type == RAY_F64 ? ray_f64(acc.last_f) : ray_i64(acc.last_i)) : ray_typed_null(-in_type); + case OP_VAR: case OP_VAR_POP: + case OP_STDDEV: case OP_STDDEV_POP: { + bool insufficient = (op->opcode == OP_VAR || op->opcode == OP_STDDEV) ? acc.cnt <= 1 : acc.cnt <= 0; + if (insufficient) return ray_typed_null(-RAY_F64); + double mean, var_pop; + if (in_type == RAY_F64) { mean = acc.sum_f / acc.cnt; var_pop = acc.sum_sq_f / acc.cnt - mean * mean; } + else { mean = (double)acc.sum_i / acc.cnt; var_pop = (double)acc.sum_sq_i / acc.cnt - mean * mean; } + if (var_pop < 0) var_pop = 0; + double val; + if (op->opcode == OP_VAR_POP) val = var_pop; + else if (op->opcode == OP_VAR) val = var_pop * acc.cnt / (acc.cnt - 1); + else if (op->opcode == OP_STDDEV_POP) val = sqrt(var_pop); + else val = sqrt(var_pop * acc.cnt / (acc.cnt - 1)); + return ray_f64(val); + } + default: return ray_error("nyi", NULL); + } +} + +/* ============================================================================ + * Group-by execution — with parallel local hash tables + merge + * ============================================================================ */ + + +/* Flags controlling which accumulator arrays are allocated */ +/* GHT_NEED_* defined in exec_internal.h */ + +/* ── Row-layout HT ────────────────────────────────────────────────────── + * Keys + accumulators stored inline in both radix entries and group rows. + * After phase1 copies data from original columns, phase2 and phase3 never + * touch column data again — all access is sequential/local. + * ────────────────────────────────────────────────────────────────────── */ + +/* ght_layout_t defined in exec_internal.h */ + +ght_layout_t ght_compute_layout(uint8_t n_keys, uint8_t n_aggs, + ray_t** agg_vecs, uint8_t need_flags, + const uint16_t* agg_ops, + const int8_t* key_types) { + ght_layout_t ly; + memset(&ly, 0, sizeof(ly)); + ly.n_keys = n_keys; + ly.n_aggs = n_aggs; + ly.need_flags = need_flags; + + /* Mark wide keys (those that don't fit in 8 bytes). For each + * wide key, the fat-entry and HT-row key slot stores a source + * row index; probe/rehash/scatter resolve the actual bytes via + * group_ht_t.key_data[k]. Currently only RAY_GUID is supported. */ + if (key_types) { + for (uint8_t k = 0; k < n_keys && k < 8; k++) { + if (key_types[k] == RAY_GUID) { + ly.wide_key_mask |= (uint8_t)(1u << k); + ly.wide_key_esz[k] = 16; + } + } + } + + uint8_t nv = 0; + for (uint8_t a = 0; a < n_aggs && a < 8; a++) { + if (agg_vecs[a]) { + ly.agg_val_slot[a] = (int8_t)nv; + if (agg_vecs[a]->type == RAY_F64) + ly.agg_is_f64 |= (1u << a); + nv++; + } else { + ly.agg_val_slot[a] = -1; + } + if (agg_ops) { + if (agg_ops[a] == OP_FIRST) ly.agg_is_first |= (1u << a); + if (agg_ops[a] == OP_LAST) ly.agg_is_last |= (1u << a); + } + } + ly.n_agg_vals = nv; + /* Key region = n_keys*8 + 8-byte null mask slot (stored after last key). + * The null mask slot holds a bitmap of which keys were null in the source + * row (bit k = key k is null). Folding this slot into hash/memcmp lets + * null and 0 form distinct groups. */ + uint16_t key_region = (uint16_t)((uint16_t)n_keys * 8 + 8); + ly.entry_stride = (uint16_t)(8 + key_region + (uint16_t)nv * 8); + + uint16_t off = (uint16_t)(8 + key_region); + uint16_t block = (uint16_t)nv * 8; + if (need_flags & GHT_NEED_SUM) { ly.off_sum = off; off += block; } + if (need_flags & GHT_NEED_MIN) { ly.off_min = off; off += block; } + if (need_flags & GHT_NEED_MAX) { ly.off_max = off; off += block; } + if (need_flags & GHT_NEED_SUMSQ) { ly.off_sumsq = off; off += block; } + ly.row_stride = off; + return ly; +} + +/* Packed HT slots: [salt:8 | gid:24] in 4 bytes. + * Max groups per HT = 16M (24 bits) — ample for partitioned probes. + * 4B slots halve cache footprint vs 8B, fitting HT in L2. */ +#define HT_EMPTY UINT32_MAX +#define HT_PACK(salt, gid) (((uint32_t)(uint8_t)(salt) << 24) | ((gid) & 0xFFFFFF)) +#define HT_GID(s) ((s) & 0xFFFFFF) +#define HT_SALT_V(s) ((uint8_t)((s) >> 24)) + +/* group_ht_t defined in exec_internal.h */ + +static bool group_ht_init_sized(group_ht_t* ht, uint32_t cap, + const ght_layout_t* ly, uint32_t init_grp_cap) { + ht->ht_cap = cap; + ht->oom = 0; + ht->layout = *ly; + /* key_data must be populated by the caller via group_ht_set_key_data + * whenever wide_key_mask != 0. */ + memset(ht->key_data, 0, sizeof(ht->key_data)); + ht->slots = (uint32_t*)scratch_alloc(&ht->_h_slots, (size_t)cap * sizeof(uint32_t)); + if (!ht->slots) return false; + memset(ht->slots, 0xFF, (size_t)cap * sizeof(uint32_t)); /* HT_EMPTY = all-1s */ + ht->grp_cap = init_grp_cap; + ht->grp_count = 0; + ht->rows = (char*)scratch_alloc(&ht->_h_rows, + (size_t)init_grp_cap * ly->row_stride); + if (!ht->rows) return false; + return true; +} + +bool group_ht_init(group_ht_t* ht, uint32_t cap, const ght_layout_t* ly) { + return group_ht_init_sized(ht, cap, ly, 256); +} + +/* Populate key_data[k] for wide-key resolution. Called by the HT path + * right after group_ht_init / group_ht_init_sized when any key is wide. */ +static inline void group_ht_set_key_data(group_ht_t* ht, void** kd) { + uint8_t mask = ht->layout.wide_key_mask; + if (!mask || !kd) return; + for (uint8_t k = 0; k < ht->layout.n_keys && k < 8; k++) { + if (mask & (1u << k)) ht->key_data[k] = kd[k]; + } +} + +void group_ht_free(group_ht_t* ht) { + scratch_free(ht->_h_slots); + scratch_free(ht->_h_rows); +} + +static bool group_ht_grow(group_ht_t* ht) { + uint32_t old_cap = ht->grp_cap; + uint32_t new_cap = old_cap * 2; + uint16_t rs = ht->layout.row_stride; + char* new_rows = (char*)scratch_realloc( + &ht->_h_rows, (size_t)old_cap * rs, (size_t)new_cap * rs); + if (!new_rows) return false; + ht->rows = new_rows; + ht->grp_cap = new_cap; + return true; +} + +/* Hash inline int64_t keys (for rehash — resolves wide keys via + * the HT's key_data pointers). */ +static inline uint64_t hash_keys_inline(const int64_t* keys, const int8_t* key_types, + uint8_t n_keys, uint8_t wide_mask, + const uint8_t* wide_esz, void* const* key_data) { + uint64_t h = 0; + for (uint8_t k = 0; k < n_keys; k++) { + uint64_t kh; + if (wide_mask & (1u << k)) { + /* Wide key: keys[k] is the source row index. Hash the + * actual bytes from key_data[k]. */ + int64_t row_idx = keys[k]; + uint8_t esz = wide_esz[k]; + const void* src = (const char*)key_data[k] + (size_t)row_idx * esz; + kh = ray_hash_bytes(src, esz); + } else if (key_types[k] == RAY_F64) { + double dv; + memcpy(&dv, &keys[k], 8); + kh = ray_hash_f64(dv); + } else { + kh = ray_hash_i64(keys[k]); + } + h = (k == 0) ? kh : ray_hash_combine(h, kh); + } + /* Fold null mask (slot n_keys) into hash so null/0 form distinct groups */ + int64_t null_mask = keys[n_keys]; + if (null_mask) + h = ray_hash_combine(h, ray_hash_i64(null_mask)); + return h; +} + +static void group_ht_rehash(group_ht_t* ht, const int8_t* key_types) { + uint32_t new_cap = ht->ht_cap * 2; + ray_t* new_h = NULL; + uint32_t* new_slots = (uint32_t*)scratch_alloc(&new_h, (size_t)new_cap * sizeof(uint32_t)); + if (!new_slots) return; /* OOM: keep old HT, it still works (just slower) */ + scratch_free(ht->_h_slots); + ht->_h_slots = new_h; + ht->slots = new_slots; + memset(ht->slots, 0xFF, (size_t)new_cap * sizeof(uint32_t)); + ht->ht_cap = new_cap; + uint32_t mask = new_cap - 1; + uint16_t rs = ht->layout.row_stride; + uint8_t nk = ht->layout.n_keys; + uint8_t wide = ht->layout.wide_key_mask; + for (uint32_t gi = 0; gi < ht->grp_count; gi++) { + const int64_t* row_keys = (const int64_t*)(ht->rows + (size_t)gi * rs + 8); + uint64_t h = hash_keys_inline(row_keys, key_types, nk, wide, + ht->layout.wide_key_esz, ht->key_data); + uint32_t slot = (uint32_t)(h & mask); + while (ht->slots[slot] != HT_EMPTY) + slot = (slot + 1) & mask; + ht->slots[slot] = HT_PACK(HT_SALT(h), gi); + } +} + +/* Initialize accumulators for a new group from entry's inline agg values. + * Each unified block has n_agg_vals slots of 8 bytes, typed by agg_is_f64. */ +static inline void init_accum_from_entry(char* row, const char* entry, + const ght_layout_t* ly) { + uint16_t accum_start = (uint16_t)(8 + ((uint16_t)ly->n_keys + 1) * 8); + if (ly->row_stride > accum_start) + memset(row + accum_start, 0, ly->row_stride - accum_start); + + const char* agg_data = entry + 8 + ((size_t)ly->n_keys + 1) * 8; + uint8_t na = ly->n_aggs; + uint8_t nf = ly->need_flags; + + for (uint8_t a = 0; a < na; a++) { + int8_t s = ly->agg_val_slot[a]; + if (s < 0) continue; + /* Copy raw 8 bytes from entry into each enabled accumulator block */ + if (nf & GHT_NEED_SUM) memcpy(row + ly->off_sum + s * 8, agg_data + s * 8, 8); + if (nf & GHT_NEED_MIN) memcpy(row + ly->off_min + s * 8, agg_data + s * 8, 8); + if (nf & GHT_NEED_MAX) memcpy(row + ly->off_max + s * 8, agg_data + s * 8, 8); + if (nf & GHT_NEED_SUMSQ) { + /* sumsq = v * v for the first entry */ + if (ly->agg_is_f64 & (1u << a)) { + double v; memcpy(&v, agg_data + s * 8, 8); + double sq = v * v; + memcpy(row + ly->off_sumsq + s * 8, &sq, 8); + } else { + int64_t v; memcpy(&v, agg_data + s * 8, 8); + double sq = (double)v * (double)v; + memcpy(row + ly->off_sumsq + s * 8, &sq, 8); + } + } + } +} + +/* Row-layout accessors: cast through void* for strict-aliasing safety. + * All row offsets are 8-byte aligned by construction. */ +/* ROW_RD/WR macros defined in exec_internal.h */ + +/* Accumulate into existing group from entry's inline agg values */ +static inline void accum_from_entry(char* row, const char* entry, + const ght_layout_t* ly) { + const char* agg_data = entry + 8 + ((size_t)ly->n_keys + 1) * 8; + uint8_t na = ly->n_aggs; + uint8_t nf = ly->need_flags; + + for (uint8_t a = 0; a < na; a++) { + int8_t s = ly->agg_val_slot[a]; + if (s < 0) continue; + const char* val = agg_data + s * 8; + + uint8_t amask = (1u << a); + if (ly->agg_is_f64 & amask) { + double v; + memcpy(&v, val, 8); + if (nf & GHT_NEED_SUM) { + if (ly->agg_is_first & amask) { /* keep init value */ } + else if (ly->agg_is_last & amask) { memcpy(row + ly->off_sum + s * 8, val, 8); } + else { ROW_WR_F64(row, ly->off_sum, s) += v; } + } + if (nf & GHT_NEED_MIN) { double* p = &ROW_WR_F64(row, ly->off_min, s); if (v < *p) *p = v; } + if (nf & GHT_NEED_MAX) { double* p = &ROW_WR_F64(row, ly->off_max, s); if (v > *p) *p = v; } + if (nf & GHT_NEED_SUMSQ) { ROW_WR_F64(row, ly->off_sumsq, s) += v * v; } + } else { + int64_t v; + memcpy(&v, val, 8); + if (nf & GHT_NEED_SUM) { + if (ly->agg_is_first & amask) { /* keep init value */ } + else if (ly->agg_is_last & amask) { memcpy(row + ly->off_sum + s * 8, val, 8); } + else { ROW_WR_I64(row, ly->off_sum, s) += v; } + } + if (nf & GHT_NEED_MIN) { int64_t* p = &ROW_WR_I64(row, ly->off_min, s); if (v < *p) *p = v; } + if (nf & GHT_NEED_MAX) { int64_t* p = &ROW_WR_I64(row, ly->off_max, s); if (v > *p) *p = v; } + if (nf & GHT_NEED_SUMSQ) { ROW_WR_F64(row, ly->off_sumsq, s) += (double)v * (double)v; } + } + } +} + +/* Compare the n_keys key slots of two rows, handling wide keys via + * key_data[] resolution. Returns true if all keys are bytewise equal. + * Hot path: when wide_mask == 0, reduces to a single memcmp over the + * packed 8-byte-per-key region. */ +static inline bool group_keys_equal(const int64_t* a_keys, const int64_t* b_keys, + const ght_layout_t* ly, void* const* key_data) { + uint8_t wide = ly->wide_key_mask; + uint8_t nk = ly->n_keys; + if (wide == 0) { + /* memcmp covers nk values + trailing 8-byte null mask slot */ + return memcmp(a_keys, b_keys, (size_t)(nk + 1) * 8) == 0; + } + for (uint8_t k = 0; k < nk; k++) { + if (wide & (1u << k)) { + int64_t ra = a_keys[k]; + int64_t rb = b_keys[k]; + if (ra == rb) continue; /* same source row - trivially equal */ + uint8_t esz = ly->wide_key_esz[k]; + const char* base = (const char*)key_data[k]; + if (memcmp(base + (size_t)ra * esz, + base + (size_t)rb * esz, esz) != 0) return false; + } else { + if (a_keys[k] != b_keys[k]) return false; + } + } + /* Null mask slot must match too */ + if (a_keys[nk] != b_keys[nk]) return false; + return true; +} + +/* Probe + accumulate a single fat entry into the HT. Returns updated mask. */ +static inline uint32_t group_probe_entry(group_ht_t* ht, + const char* entry, const int8_t* key_types, uint32_t mask) { + const ght_layout_t* ly = &ht->layout; + uint64_t hash = *(const uint64_t*)entry; + const char* ekeys = entry + 8; + uint8_t salt = HT_SALT(hash); + uint32_t slot = (uint32_t)(hash & mask); + uint16_t key_bytes = (uint16_t)((ly->n_keys + 1) * 8); + + for (;;) { + uint32_t sv = ht->slots[slot]; + if (sv == HT_EMPTY) { + /* New group */ + if (ht->grp_count >= ht->grp_cap) { + if (!group_ht_grow(ht)) { ht->oom = 1; return mask; } + } + uint32_t gid = ht->grp_count++; + char* row = ht->rows + (size_t)gid * ly->row_stride; + *(int64_t*)row = 1; /* count = 1 */ + memcpy(row + 8, ekeys, key_bytes); + init_accum_from_entry(row, entry, ly); + ht->slots[slot] = HT_PACK(salt, gid); + if (ht->grp_count * 2 > ht->ht_cap) { + group_ht_rehash(ht, key_types); + mask = ht->ht_cap - 1; + } + return mask; + } + if (HT_SALT_V(sv) == salt) { + uint32_t gid = HT_GID(sv); + char* row = ht->rows + (size_t)gid * ly->row_stride; + if (group_keys_equal((const int64_t*)(row + 8), + (const int64_t*)ekeys, ly, ht->key_data)) { + (*(int64_t*)row)++; /* count++ */ + accum_from_entry(row, entry, ly); + return mask; + } + } + slot = (slot + 1) & mask; + } +} + +/* Process rows [start, end) from original columns into a local hash table. + * Converts each row to a fat entry on the stack, then probes. */ +#define GROUP_PREFETCH_BATCH 16 + +void group_rows_range(group_ht_t* ht, void** key_data, int8_t* key_types, + uint8_t* key_attrs, ray_t** key_vecs, ray_t** agg_vecs, + int64_t start, int64_t end, + const int64_t* match_idx) { + const ght_layout_t* ly = &ht->layout; + uint8_t nk = ly->n_keys; + uint8_t na = ly->n_aggs; + uint8_t wide = ly->wide_key_mask; + uint32_t mask = ht->ht_cap - 1; + /* Stack buffer for one entry: hash + (nk+1) key slots + nv agg_vals. + * Max size: 8 + 9*8 + 8*8 = 144 bytes. */ + char ebuf[8 + 9 * 8 + 8 * 8]; + + /* Check which key columns can produce nulls (parent vec's HAS_NULLS + * attr for slices) — skips per-row null checks on the fast path. */ + uint8_t nullable_mask = 0; + for (uint8_t k = 0; k < nk; k++) { + if (!key_vecs || !key_vecs[k]) continue; + ray_t* kv = key_vecs[k]; + ray_t* src = (kv->attrs & RAY_ATTR_SLICE) ? kv->slice_parent : kv; + if (src && (src->attrs & RAY_ATTR_HAS_NULLS)) + nullable_mask |= (uint8_t)(1u << k); + } + + /* Wire the HT's key_data pointer table so probe/rehash can + * resolve wide keys via the source columns. */ + if (wide) group_ht_set_key_data(ht, key_data); + + for (int64_t i = start; i < end; i++) { + /* Cancellation checkpoint every 65536 rows — ~150 polls on a + * 10M-row ingest, imperceptible in the inner loop and still + * sub-100ms response time on Ctrl-C. */ + if (((i - start) & 65535) == 0 && ray_interrupted()) break; + int64_t row = match_idx ? match_idx[i] : i; + uint64_t h = 0; + int64_t* ek = (int64_t*)(ebuf + 8); + int64_t null_mask = 0; + for (uint8_t k = 0; k < nk; k++) { + int8_t t = key_types[k]; + uint64_t kh; + bool is_null = (nullable_mask & (1u << k)) + && ray_vec_is_null(key_vecs[k], row); + if (is_null) { + null_mask |= (int64_t)(1u << k); + ek[k] = 0; /* canonical null value — real 0 differs via null_mask */ + kh = ray_hash_i64(0); + } else if (wide & (1u << k)) { + /* Wide key: store source row index, hash the actual bytes. */ + uint8_t esz = ly->wide_key_esz[k]; + const void* src = (const char*)key_data[k] + (size_t)row * esz; + ek[k] = row; + kh = ray_hash_bytes(src, esz); + } else if (t == RAY_F64) { + int64_t kv; + memcpy(&kv, &((double*)key_data[k])[row], 8); + ek[k] = kv; + kh = ray_hash_f64(((double*)key_data[k])[row]); + } else { + int64_t kv = read_col_i64(key_data[k], row, t, key_attrs[k]); + ek[k] = kv; + kh = ray_hash_i64(kv); + } + h = (k == 0) ? kh : ray_hash_combine(h, kh); + } + ek[nk] = null_mask; + if (null_mask) h = ray_hash_combine(h, ray_hash_i64(null_mask)); + *(uint64_t*)ebuf = h; + + int64_t* ev = (int64_t*)(ebuf + 8 + ((size_t)nk + 1) * 8); + uint8_t vi = 0; + for (uint8_t a = 0; a < na; a++) { + ray_t* ac = agg_vecs[a]; + if (!ac) continue; + if (ac->type == RAY_F64) + memcpy(&ev[vi], &((double*)ray_data(ac))[row], 8); + else + ev[vi] = read_col_i64(ray_data(ac), row, ac->type, ac->attrs); + vi++; + } + + mask = group_probe_entry(ht, ebuf, key_types, mask); + } +} + +/* ============================================================================ + * Radix-partitioned parallel group-by + * + * Phase 1 (parallel): Each worker reads keys+agg values from original columns, + * packs into fat entries (hash, keys, agg_vals), scatters into + * thread-local per-partition buffers. + * Phase 2 (parallel): Each partition is aggregated independently using + * inline data — no original column access needed. + * Phase 3: Build result columns from inline group rows. + * ============================================================================ */ + +#define RADIX_BITS 8 +#define RADIX_P (1u << RADIX_BITS) /* 256 partitions */ +#define RADIX_MASK (RADIX_P - 1) +#define RADIX_PART(h) (((uint32_t)((h) >> 16)) & RADIX_MASK) + +/* Per-worker, per-partition buffer of fat entries */ +typedef struct { + char* data; /* flat buffer: data[i * entry_stride] */ + uint32_t count; + uint32_t cap; + bool oom; /* set on realloc failure */ + ray_t* _hdr; +} radix_buf_t; + +static inline void radix_buf_push(radix_buf_t* buf, uint16_t entry_stride, + uint64_t hash, const int64_t* keys, uint8_t n_keys, + int64_t null_mask, + const int64_t* agg_vals, uint8_t n_agg_vals) { + if (__builtin_expect(buf->count >= buf->cap, 0)) { + uint32_t old_cap = buf->cap; + uint32_t new_cap = old_cap * 2; + char* new_data = (char*)scratch_realloc( + &buf->_hdr, (size_t)old_cap * entry_stride, + (size_t)new_cap * entry_stride); + if (!new_data) { buf->oom = true; return; } + buf->data = new_data; + buf->cap = new_cap; + } + char* dst = buf->data + (size_t)buf->count * entry_stride; + *(uint64_t*)dst = hash; + memcpy(dst + 8, keys, (size_t)n_keys * 8); + /* Null mask slot sits right after the keys */ + memcpy(dst + 8 + (size_t)n_keys * 8, &null_mask, 8); + if (n_agg_vals) + memcpy(dst + 8 + ((size_t)n_keys + 1) * 8, agg_vals, (size_t)n_agg_vals * 8); + buf->count++; +} + +typedef struct { + void** key_data; + int8_t* key_types; + uint8_t* key_attrs; + ray_t** key_vecs; + uint8_t nullable_mask; /* bit k = key k column may contain nulls */ + ray_t** agg_vecs; + uint32_t n_workers; + radix_buf_t* bufs; /* [n_workers * RADIX_P] */ + ght_layout_t layout; + /* When non-NULL, workers iterate match_idx[start..end) and + * read row=match_idx[i]. When NULL, row=i. */ + const int64_t* match_idx; +} radix_phase1_ctx_t; + +static void radix_phase1_fn(void* ctx, uint32_t worker_id, int64_t start, int64_t end) { + radix_phase1_ctx_t* c = (radix_phase1_ctx_t*)ctx; + const ght_layout_t* ly = &c->layout; + radix_buf_t* my_bufs = &c->bufs[(size_t)worker_id * RADIX_P]; + uint8_t nk = ly->n_keys; + uint8_t na = ly->n_aggs; + uint8_t nv = ly->n_agg_vals; + uint8_t wide = ly->wide_key_mask; + uint16_t estride = ly->entry_stride; + const int64_t* match_idx = c->match_idx; + + int64_t keys[8]; + int64_t agg_vals[8]; + + uint8_t nullable = c->nullable_mask; + for (int64_t i = start; i < end; i++) { + /* Cancellation checkpoint every 65536 rows — ~150 polls on a + * 10M-row ingest, imperceptible in the inner loop and still + * sub-100ms response time on Ctrl-C. */ + if (((i - start) & 65535) == 0 && ray_interrupted()) break; + int64_t row = match_idx ? match_idx[i] : i; + uint64_t h = 0; + int64_t null_mask = 0; + for (uint8_t k = 0; k < nk; k++) { + int8_t t = c->key_types[k]; + uint64_t kh; + bool is_null = (nullable & (1u << k)) + && ray_vec_is_null(c->key_vecs[k], row); + if (is_null) { + null_mask |= (int64_t)(1u << k); + keys[k] = 0; + kh = ray_hash_i64(0); + } else if (wide & (1u << k)) { + uint8_t esz = ly->wide_key_esz[k]; + const void* src = (const char*)c->key_data[k] + (size_t)row * esz; + keys[k] = row; + kh = ray_hash_bytes(src, esz); + } else if (t == RAY_F64) { + int64_t kv; + memcpy(&kv, &((double*)c->key_data[k])[row], 8); + keys[k] = kv; + kh = ray_hash_f64(((double*)c->key_data[k])[row]); + } else { + int64_t kv = read_col_i64(c->key_data[k], row, t, c->key_attrs[k]); + keys[k] = kv; + kh = ray_hash_i64(kv); + } + h = (k == 0) ? kh : ray_hash_combine(h, kh); + } + if (null_mask) h = ray_hash_combine(h, ray_hash_i64(null_mask)); + + uint8_t vi = 0; + for (uint8_t a = 0; a < na; a++) { + ray_t* ac = c->agg_vecs[a]; + if (!ac) continue; + if (ac->type == RAY_F64) + memcpy(&agg_vals[vi], &((double*)ray_data(ac))[row], 8); + else + agg_vals[vi] = read_col_i64(ray_data(ac), row, ac->type, ac->attrs); + vi++; + } + + uint32_t part = RADIX_PART(h); + radix_buf_push(&my_bufs[part], estride, h, keys, nk, null_mask, agg_vals, nv); + } +} + +/* Process pre-partitioned fat entries into an HT with prefetch batching. + * Two-phase prefetch: (1) prefetch HT slots, (2) prefetch group rows. */ +static void group_rows_indirect(group_ht_t* ht, const int8_t* key_types, + const char* entries, uint32_t n_entries, + uint16_t entry_stride) { + uint32_t mask = ht->ht_cap - 1; + /* Stride-ahead prefetch: prefetch HT slot for entry i+D while processing i. + * D=8 covers ~200ns L2/L3 latency at ~25ns per probe iteration. */ + enum { PF_DIST = 8 }; + /* Prime the prefetch pipeline */ + uint32_t pf_end = (n_entries < PF_DIST) ? n_entries : PF_DIST; + for (uint32_t j = 0; j < pf_end; j++) { + uint64_t h = *(const uint64_t*)(entries + (size_t)j * entry_stride); + __builtin_prefetch(&ht->slots[(uint32_t)(h & mask)], 0, 1); + } + for (uint32_t i = 0; i < n_entries; i++) { + /* Prefetch PF_DIST entries ahead */ + if (i + PF_DIST < n_entries) { + uint64_t h = *(const uint64_t*)(entries + (size_t)(i + PF_DIST) * entry_stride); + __builtin_prefetch(&ht->slots[(uint32_t)(h & mask)], 0, 1); + } + const char* e = entries + (size_t)i * entry_stride; + mask = group_probe_entry(ht, e, key_types, mask); + } +} + +/* Phase 3: build result columns from inline group rows */ +typedef struct { + int8_t out_type; + bool src_f64; + uint16_t agg_op; + bool affine; + double bias_f64; + int64_t bias_i64; + void* dst; + ray_t* vec; +} agg_out_t; + +/* Aliases for shared parallel null helpers from internal.h */ +#define grp_set_null par_set_null +#define grp_prepare_nullmap par_prepare_nullmap +#define grp_finalize_nulls par_finalize_nulls + +typedef struct { + group_ht_t* part_hts; + uint32_t* part_offsets; + char** key_dsts; + int8_t* key_types; + uint8_t* key_attrs; + uint8_t* key_esizes; + ray_t** key_cols; /* [n_keys] output key vecs (for null bit writes) */ + uint8_t n_keys; + agg_out_t* agg_outs; + uint8_t n_aggs; + /* For wide-key columns (RAY_GUID), the stored key slot is a + * source row index and we copy the actual bytes from the source + * column here during the result scatter. */ + void** key_src_data; /* [n_keys]; NULL entry if not wide */ +} radix_phase3_ctx_t; + +static void radix_phase3_fn(void* ctx, uint32_t worker_id, int64_t start, int64_t end) { + (void)worker_id; + radix_phase3_ctx_t* c = (radix_phase3_ctx_t*)ctx; + uint8_t nk = c->n_keys; + uint8_t na = c->n_aggs; + + for (int64_t p = start; p < end; p++) { + group_ht_t* ph = &c->part_hts[p]; + uint32_t gc = ph->grp_count; + if (gc == 0) continue; + uint32_t off = c->part_offsets[p]; + const ght_layout_t* ly = &ph->layout; + uint16_t rs = ly->row_stride; + + /* Single pass over group rows: read each row once, scatter keys + aggs. + * Reduces memory traffic from nk+na passes over group data to 1 pass. */ + for (uint32_t gi = 0; gi < gc; gi++) { + const char* row = ph->rows + (size_t)gi * rs; + const int64_t* rkeys = (const int64_t*)(const void*)(row + 8); + int64_t cnt = *(const int64_t*)(const void*)row; + int64_t null_mask = rkeys[nk]; + uint32_t di = off + gi; + + /* Scatter keys to result columns */ + for (uint8_t k = 0; k < nk; k++) { + if (null_mask & (int64_t)(1u << k)) { + if (c->key_cols && c->key_cols[k]) + grp_set_null(c->key_cols[k], di); + continue; + } + int64_t kv = rkeys[k]; + int8_t kt = c->key_types[k]; + char* dst = c->key_dsts[k]; + uint8_t esz = c->key_esizes[k]; + size_t doff = (size_t)di * esz; + if (ly->wide_key_mask & (1u << k)) { + /* Wide key: kv is the source row index; copy the + * bytes from the source column into the output. */ + const char* src = (const char*)c->key_src_data[k]; + memcpy(dst + doff, src + (size_t)kv * esz, esz); + } else if (kt == RAY_F64) { + memcpy(dst + doff, &kv, 8); + } else { + write_col_i64(dst, di, kv, kt, c->key_attrs[k]); + } + } + + /* Scatter agg results to result columns */ + for (uint8_t a = 0; a < na; a++) { + agg_out_t* ao = &c->agg_outs[a]; + if (!ao->dst) continue; /* allocation failed (OOM) */ + uint16_t op = ao->agg_op; + bool sf = ao->src_f64; + int8_t s = ly->agg_val_slot[a]; + if (ao->out_type == RAY_F64) { + double v; + switch (op) { + case OP_SUM: + v = sf ? ROW_RD_F64(row, ly->off_sum, s) + : (double)ROW_RD_I64(row, ly->off_sum, s); + if (ao->affine) v += ao->bias_f64 * cnt; + break; + case OP_AVG: + v = sf ? ROW_RD_F64(row, ly->off_sum, s) / cnt + : (double)ROW_RD_I64(row, ly->off_sum, s) / cnt; + if (ao->affine) v += ao->bias_f64; + break; + case OP_MIN: + v = sf ? ROW_RD_F64(row, ly->off_min, s) + : (double)ROW_RD_I64(row, ly->off_min, s); + break; + case OP_MAX: + v = sf ? ROW_RD_F64(row, ly->off_max, s) + : (double)ROW_RD_I64(row, ly->off_max, s); + break; + case OP_FIRST: case OP_LAST: + v = sf ? ROW_RD_F64(row, ly->off_sum, s) + : (double)ROW_RD_I64(row, ly->off_sum, s); + break; + case OP_VAR: case OP_VAR_POP: + case OP_STDDEV: case OP_STDDEV_POP: { + bool insuf = (op == OP_VAR || op == OP_STDDEV) ? cnt <= 1 : cnt <= 0; + if (insuf) { v = 0.0; grp_set_null(ao->vec, di); break; } + double sum_val = sf ? ROW_RD_F64(row, ly->off_sum, s) + : (double)ROW_RD_I64(row, ly->off_sum, s); + double sq_val = ly->off_sumsq ? ROW_RD_F64(row, ly->off_sumsq, s) : 0.0; + double mean = sum_val / cnt; + double var_pop = sq_val / cnt - mean * mean; + if (var_pop < 0) var_pop = 0; + if (op == OP_VAR_POP) v = var_pop; + else if (op == OP_VAR) v = var_pop * cnt / (cnt - 1); + else if (op == OP_STDDEV_POP) v = sqrt(var_pop); + else v = sqrt(var_pop * cnt / (cnt - 1)); + break; + } + default: v = 0.0; break; + } + ((double*)(void*)ao->dst)[di] = v; + } else { + int64_t v; + switch (op) { + case OP_SUM: + v = ROW_RD_I64(row, ly->off_sum, s); + if (ao->affine) v += ao->bias_i64 * cnt; + break; + case OP_COUNT: v = cnt; break; + case OP_MIN: v = ROW_RD_I64(row, ly->off_min, s); break; + case OP_MAX: v = ROW_RD_I64(row, ly->off_max, s); break; + case OP_FIRST: case OP_LAST: v = ROW_RD_I64(row, ly->off_sum, s); break; + default: v = 0; break; + } + ((int64_t*)(void*)ao->dst)[di] = v; + } + } + } + } +} + +/* Phase 2: aggregate each partition independently using inline data */ +typedef struct { + int8_t* key_types; + uint8_t n_keys; + uint32_t n_workers; + radix_buf_t* bufs; + group_ht_t* part_hts; + ght_layout_t layout; + /* Shared (read-only) source column bases for wide-key resolution. + * Each partition HT stashes the ones matching wide_key_mask. */ + void** key_data; +} radix_phase2_ctx_t; + +static void radix_phase2_fn(void* ctx, uint32_t worker_id, int64_t start, int64_t end) { + (void)worker_id; + radix_phase2_ctx_t* c = (radix_phase2_ctx_t*)ctx; + uint16_t estride = c->layout.entry_stride; + + for (int64_t p = start; p < end; p++) { + uint32_t total = 0; + for (uint32_t w = 0; w < c->n_workers; w++) + total += c->bufs[(size_t)w * RADIX_P + p].count; + if (total == 0) continue; + + uint32_t part_ht_cap = 256; + { + uint64_t target = (uint64_t)total * 2; + if (target < 256) target = 256; + while (part_ht_cap < target) part_ht_cap *= 2; + } + /* Pre-size group store to avoid grows. Use next_pow2(total) as upper + * bound on groups. Over-allocation is bounded: worst case total >> groups, + * but total * row_stride is already committed via HT capacity anyway. */ + uint32_t init_grp = 256; + while (init_grp < total && init_grp < 65536) init_grp *= 2; + if (!group_ht_init_sized(&c->part_hts[p], part_ht_cap, &c->layout, init_grp)) + continue; + /* Wide keys need source-column resolution during probe/rehash. */ + if (c->layout.wide_key_mask && c->key_data) + group_ht_set_key_data(&c->part_hts[p], c->key_data); + + for (uint32_t w = 0; w < c->n_workers; w++) { + radix_buf_t* buf = &c->bufs[(size_t)w * RADIX_P + p]; + if (buf->count == 0) continue; + group_rows_indirect(&c->part_hts[p], c->key_types, + buf->data, buf->count, estride); + } + } +} + +/* ============================================================================ + * Parallel direct-array accumulation for low-cardinality single integer key + * ============================================================================ */ + +/* Parallel min/max scan for direct-array key range detection */ +typedef struct { + const void* key_data; + int8_t key_type; + uint8_t key_attrs; + int64_t* per_worker_min; /* [n_workers] */ + int64_t* per_worker_max; /* [n_workers] */ + uint32_t n_workers; + const int64_t* match_idx; /* NULL = no selection */ +} minmax_ctx_t; + +static void minmax_scan_fn(void* ctx, uint32_t worker_id, int64_t start, int64_t end) { + minmax_ctx_t* c = (minmax_ctx_t*)ctx; + uint32_t wid = worker_id % c->n_workers; + const int64_t* match_idx = c->match_idx; + int64_t kmin = INT64_MAX, kmax = INT64_MIN; + int8_t t = c->key_type; + + #define MINMAX_SEG_LOOP(TYPE, CAST) \ + do { \ + const TYPE* kd = (const TYPE*)c->key_data; \ + for (int64_t i = start; i < end; i++) { \ + int64_t r = match_idx ? match_idx[i] : i; \ + int64_t v = (int64_t)CAST kd[r]; \ + if (v < kmin) kmin = v; \ + if (v > kmax) kmax = v; \ + } \ + } while (0) + + if (t == RAY_I64 || t == RAY_TIMESTAMP) + MINMAX_SEG_LOOP(int64_t, ); + else if (RAY_IS_SYM(t)) { + uint8_t w = c->key_attrs & RAY_SYM_W_MASK; + if (w == RAY_SYM_W64) MINMAX_SEG_LOOP(int64_t, ); + else if (w == RAY_SYM_W32) MINMAX_SEG_LOOP(uint32_t, ); + else if (w == RAY_SYM_W16) MINMAX_SEG_LOOP(uint16_t, ); + else MINMAX_SEG_LOOP(uint8_t, ); + } + else if (t == RAY_BOOL || t == RAY_U8) + MINMAX_SEG_LOOP(uint8_t, ); + else if (t == RAY_I16) + MINMAX_SEG_LOOP(int16_t, ); + else /* RAY_I32, RAY_DATE, RAY_TIME */ + MINMAX_SEG_LOOP(int32_t, ); + + #undef MINMAX_SEG_LOOP + + /* Merge with existing per-worker values (a worker may process multiple morsels) */ + if (kmin < c->per_worker_min[wid]) c->per_worker_min[wid] = kmin; + if (kmax > c->per_worker_max[wid]) c->per_worker_max[wid] = kmax; +} + +typedef union { double f; int64_t i; } da_val_t; + +typedef struct { + da_val_t* sum; /* SUM/AVG/FIRST/LAST [n_slots * n_aggs] */ + da_val_t* min_val; /* MIN [n_slots * n_aggs] */ + da_val_t* max_val; /* MAX [n_slots * n_aggs] */ + double* sumsq_f64; /* sum-of-squares for STDDEV/VAR */ + int64_t* count; /* group counts [n_slots] */ + /* Arena headers */ + ray_t* _h_sum; + ray_t* _h_min; + ray_t* _h_max; + ray_t* _h_sumsq; + ray_t* _h_count; +} da_accum_t; + +static inline void da_accum_free(da_accum_t* a) { + scratch_free(a->_h_sum); + scratch_free(a->_h_min); + scratch_free(a->_h_max); + scratch_free(a->_h_sumsq); + scratch_free(a->_h_count); +} + +/* Unified agg result emitter — used by both DA and HT paths. + * Arrays indexed by [gi * n_aggs + a], counts by [gi]. */ +static void emit_agg_columns(ray_t** result, ray_graph_t* g, const ray_op_ext_t* ext, + ray_t* const* agg_vecs, uint32_t grp_count, + uint8_t n_aggs, + const double* sum_f64, const int64_t* sum_i64, + const double* min_f64, const double* max_f64, + const int64_t* min_i64, const int64_t* max_i64, + const int64_t* counts, + const agg_affine_t* affine, + const double* sumsq_f64) { + for (uint8_t a = 0; a < n_aggs; a++) { + uint16_t agg_op = ext->agg_ops[a]; + ray_t* agg_col = agg_vecs[a]; + bool is_f64 = agg_col && agg_col->type == RAY_F64; + int8_t out_type; + switch (agg_op) { + case OP_AVG: + case OP_STDDEV: case OP_STDDEV_POP: + case OP_VAR: case OP_VAR_POP: + out_type = RAY_F64; break; + case OP_COUNT: out_type = RAY_I64; break; + case OP_SUM: case OP_PROD: + out_type = is_f64 ? RAY_F64 : RAY_I64; break; + default: + out_type = agg_col ? agg_col->type : RAY_I64; break; + } + ray_t* new_col = ray_vec_new(out_type, (int64_t)grp_count); + if (!new_col || RAY_IS_ERR(new_col)) continue; + new_col->len = (int64_t)grp_count; + for (uint32_t gi = 0; gi < grp_count; gi++) { + size_t idx = (size_t)gi * n_aggs + a; + if (out_type == RAY_F64) { + double v; + switch (agg_op) { + case OP_SUM: + v = is_f64 ? sum_f64[idx] : (double)sum_i64[idx]; + if (affine && affine[a].enabled) + v += affine[a].bias_f64 * counts[gi]; + break; + case OP_AVG: + v = is_f64 ? sum_f64[idx] / counts[gi] : (double)sum_i64[idx] / counts[gi]; + if (affine && affine[a].enabled) + v += affine[a].bias_f64; + break; + case OP_MIN: v = is_f64 ? min_f64[idx] : (double)min_i64[idx]; break; + case OP_MAX: v = is_f64 ? max_f64[idx] : (double)max_i64[idx]; break; + case OP_FIRST: case OP_LAST: + v = is_f64 ? sum_f64[idx] : (double)sum_i64[idx]; break; + case OP_VAR: case OP_VAR_POP: + case OP_STDDEV: case OP_STDDEV_POP: { + int64_t cnt = counts[gi]; + bool insuf = (agg_op == OP_VAR || agg_op == OP_STDDEV) ? cnt <= 1 : cnt <= 0; + if (insuf) { v = 0.0; ray_vec_set_null(new_col, gi, true); break; } + double sum_val = is_f64 ? sum_f64[idx] : (double)sum_i64[idx]; + double sq_val = sumsq_f64 ? sumsq_f64[idx] : 0.0; + double mean = sum_val / cnt; + double var_pop = sq_val / cnt - mean * mean; + if (var_pop < 0) var_pop = 0; + if (agg_op == OP_VAR_POP) v = var_pop; + else if (agg_op == OP_VAR) v = var_pop * cnt / (cnt - 1); + else if (agg_op == OP_STDDEV_POP) v = sqrt(var_pop); + else v = sqrt(var_pop * cnt / (cnt - 1)); + break; + } + default: v = 0.0; break; + } + ((double*)ray_data(new_col))[gi] = v; + } else { + int64_t v; + switch (agg_op) { + case OP_SUM: + v = sum_i64[idx]; + if (affine && affine[a].enabled) + v += affine[a].bias_i64 * counts[gi]; + break; + case OP_COUNT: v = counts[gi]; break; + case OP_MIN: v = min_i64[idx]; break; + case OP_MAX: v = max_i64[idx]; break; + case OP_FIRST: case OP_LAST: v = sum_i64[idx]; break; + default: v = 0; break; + } + ((int64_t*)ray_data(new_col))[gi] = v; + } + } + /* Generate unique column name: base_name + agg suffix (e.g. "v1_sum") */ + ray_op_ext_t* agg_ext = find_ext(g, ext->agg_ins[a]->id); + int64_t name_id; + if (agg_ext && agg_ext->base.opcode == OP_SCAN) { + ray_t* name_atom = ray_sym_str(agg_ext->sym); + const char* base = name_atom ? ray_str_ptr(name_atom) : NULL; + size_t blen = base ? ray_str_len(name_atom) : 0; + const char* sfx = ""; + size_t slen = 0; + switch (agg_op) { + case OP_SUM: sfx = "_sum"; slen = 4; break; + case OP_COUNT: sfx = "_count"; slen = 6; break; + case OP_AVG: sfx = "_mean"; slen = 5; break; + case OP_MIN: sfx = "_min"; slen = 4; break; + case OP_MAX: sfx = "_max"; slen = 4; break; + case OP_FIRST: sfx = "_first"; slen = 6; break; + case OP_LAST: sfx = "_last"; slen = 5; break; + case OP_STDDEV: sfx = "_stddev"; slen = 7; break; + case OP_STDDEV_POP: sfx = "_stddev_pop"; slen = 11; break; + case OP_VAR: sfx = "_var"; slen = 4; break; + case OP_VAR_POP: sfx = "_var_pop"; slen = 8; break; + } + char buf[256]; + if (base && blen + slen < sizeof(buf)) { + memcpy(buf, base, blen); + memcpy(buf + blen, sfx, slen); + name_id = ray_sym_intern(buf, blen + slen); + } else { + name_id = agg_ext->sym; + } + } else { + /* Expression agg input — synthetic name like "_e0_sum" */ + char nbuf[32]; + int np = 0; + nbuf[np++] = '_'; nbuf[np++] = 'e'; + /* Multi-digit agg index */ + { uint8_t v = a; char dig[3]; int nd = 0; + do { dig[nd++] = (char)('0' + v % 10); v /= 10; } while (v); + while (nd--) nbuf[np++] = dig[nd]; } + const char* nsfx = ""; + size_t nslen = 0; + switch (agg_op) { + case OP_SUM: nsfx = "_sum"; nslen = 4; break; + case OP_COUNT: nsfx = "_count"; nslen = 6; break; + case OP_AVG: nsfx = "_mean"; nslen = 5; break; + case OP_MIN: nsfx = "_min"; nslen = 4; break; + case OP_MAX: nsfx = "_max"; nslen = 4; break; + case OP_FIRST: nsfx = "_first"; nslen = 6; break; + case OP_LAST: nsfx = "_last"; nslen = 5; break; + case OP_STDDEV: nsfx = "_stddev"; nslen = 7; break; + case OP_STDDEV_POP: nsfx = "_stddev_pop"; nslen = 11; break; + case OP_VAR: nsfx = "_var"; nslen = 4; break; + case OP_VAR_POP: nsfx = "_var_pop"; nslen = 8; break; + } + memcpy(nbuf + np, nsfx, nslen); + name_id = ray_sym_intern(nbuf, (size_t)np + nslen); + } + *result = ray_table_add_col(*result, name_id, new_col); + ray_release(new_col); + } +} + +/* Bitmask for which accumulator arrays are actually needed */ +#define DA_NEED_SUM 0x01 /* da_val_t sum array */ +#define DA_NEED_MIN 0x02 /* da_val_t min_val array */ +#define DA_NEED_MAX 0x04 /* da_val_t max_val array */ +#define DA_NEED_COUNT 0x08 /* count array */ +#define DA_NEED_SUMSQ 0x10 /* sumsq_f64 array (for STDDEV/VAR) */ + +typedef struct { + da_accum_t* accums; + uint32_t n_accums; /* number of accumulator sets (may < pool workers) */ + void** key_ptrs; /* key data pointers [n_keys] */ + int8_t* key_types; /* key type codes [n_keys] */ + uint8_t* key_attrs; /* key attrs for RAY_SYM width [n_keys] */ + uint8_t* key_esz; /* pre-computed per-key elem size [n_keys] */ + int64_t* key_mins; /* per-key minimum [n_keys] */ + int64_t* key_strides; /* per-key stride [n_keys] */ + uint8_t n_keys; + void** agg_ptrs; + int8_t* agg_types; + uint16_t* agg_ops; /* per-agg operation code */ + uint8_t n_aggs; + uint8_t need_flags; /* DA_NEED_* bitmask */ + uint32_t agg_f64_mask; /* bitmask: bit a set if agg[a] is RAY_F64 */ + bool all_sum; /* true when all ops are SUM/AVG/COUNT (no MIN/MAX/FIRST/LAST) */ + uint32_t n_slots; + const int64_t* match_idx; /* NULL = no selection */ +} da_ctx_t; + +/* Composite GID from multi-key. Arithmetic overflow is prevented in practice + * by the DA budget check (DA_PER_WORKER_MAX) which limits total_slots to 262K. */ +static inline int32_t da_composite_gid(da_ctx_t* c, int64_t r) { + int32_t gid = 0; + for (uint8_t k = 0; k < c->n_keys; k++) { + int64_t val = read_by_esz(c->key_ptrs[k], r, c->key_esz[k]); + gid += (int32_t)((val - c->key_mins[k]) * c->key_strides[k]); + } + return gid; +} + +/* Typed composite GID: eliminates per-element switch when all keys share width */ +#define DEFINE_DA_COMPOSITE_GID_TYPED(SUFFIX, KTYPE) \ +static inline int32_t da_composite_gid_##SUFFIX(da_ctx_t* c, int64_t r) { \ + int32_t gid = 0; \ + for (uint8_t k = 0; k < c->n_keys; k++) { \ + int64_t val = (int64_t)((const KTYPE*)c->key_ptrs[k])[r]; \ + gid += (int32_t)((val - c->key_mins[k]) * c->key_strides[k]); \ + } \ + return gid; \ +} +DEFINE_DA_COMPOSITE_GID_TYPED(u8, uint8_t) +DEFINE_DA_COMPOSITE_GID_TYPED(u16, uint16_t) +DEFINE_DA_COMPOSITE_GID_TYPED(u32, uint32_t) +DEFINE_DA_COMPOSITE_GID_TYPED(i64, int64_t) +#undef DEFINE_DA_COMPOSITE_GID_TYPED + +static inline void da_read_val(const void* ptr, int8_t type, uint8_t attrs, + int64_t r, double* out_f64, int64_t* out_i64) { + if (type == RAY_F64) { + *out_f64 = ((const double*)ptr)[r]; + *out_i64 = (int64_t)*out_f64; + } else { + *out_i64 = read_col_i64(ptr, r, type, attrs); + *out_f64 = (double)*out_i64; + } +} + +/* Materialize a scalar (atom or len-1 vector) into a full-length vector so + * group-aggregation loops can read row-wise without out-of-bounds access. */ +static ray_t* materialize_broadcast_input(ray_t* src, int64_t nrows) { + if (!src || RAY_IS_ERR(src) || nrows < 0) return NULL; + + int8_t out_type = ray_is_atom(src) ? (int8_t)-src->type : src->type; + if (out_type <= 0 || out_type >= RAY_TYPE_COUNT) return NULL; + + ray_t* out = ray_vec_new(out_type, nrows); + if (!out || RAY_IS_ERR(out)) return out; + out->len = nrows; + if (nrows == 0) return out; + + if (!ray_is_atom(src)) { + uint8_t esz = col_esz(src); + const char* s = (const char*)ray_data(src); + char* d = (char*)ray_data(out); + for (int64_t i = 0; i < nrows; i++) + memcpy(d + (size_t)i * esz, s, esz); + return out; + } + + switch (src->type) { + case -RAY_F64: { + double v = src->f64; + for (int64_t i = 0; i < nrows; i++) ((double*)ray_data(out))[i] = v; + return out; + } + case -RAY_I64: + case -RAY_SYM: + case -RAY_TIMESTAMP: { + int64_t v = src->i64; + for (int64_t i = 0; i < nrows; i++) ((int64_t*)ray_data(out))[i] = v; + return out; + } + case -RAY_DATE: + case -RAY_TIME: { + int32_t v = (int32_t)src->i64; + for (int64_t i = 0; i < nrows; i++) ((int32_t*)ray_data(out))[i] = v; + return out; + } + case -RAY_I32: { + int32_t v = src->i32; + for (int64_t i = 0; i < nrows; i++) ((int32_t*)ray_data(out))[i] = v; + return out; + } + case -RAY_I16: { + int16_t v = src->i16; + for (int64_t i = 0; i < nrows; i++) ((int16_t*)ray_data(out))[i] = v; + return out; + } + case -RAY_U8: + case -RAY_BOOL: { + uint8_t v = src->u8; + for (int64_t i = 0; i < nrows; i++) ((uint8_t*)ray_data(out))[i] = v; + return out; + } + default: + ray_release(out); + return NULL; + } +} + +/* ---- Scalar aggregate (n_keys==0): one flat scan, no GID, no hash ---- */ +typedef struct { + void** agg_ptrs; + int8_t* agg_types; + uint16_t* agg_ops; + agg_linear_t* agg_linear; + uint8_t n_aggs; + uint8_t need_flags; + const int64_t* match_idx; /* NULL = no selection */ + /* per-worker accumulators (1 slot each) */ + da_accum_t* accums; + uint32_t n_accums; +} scalar_ctx_t; + +static inline int64_t scalar_i64_at(const void* ptr, int8_t type, int64_t r) { + return read_col_i64(ptr, r, type, 0); /* attrs=0: agg columns are numeric, never SYM */ +} + +/* Tight SIMD-friendly loop for single SUM/AVG on i64 (no mask). + * Note: int64 sum can overflow; caller responsibility to use appropriate types. */ +static void scalar_sum_i64_fn(void* ctx, uint32_t worker_id, int64_t start, int64_t end) { + scalar_ctx_t* c = (scalar_ctx_t*)ctx; + da_accum_t* acc = &c->accums[worker_id]; + const int64_t* restrict data = (const int64_t*)c->agg_ptrs[0]; + int64_t sum = 0; + for (int64_t r = start; r < end; r++) + sum += data[r]; + acc->sum[0].i += sum; + acc->count[0] += end - start; +} + +/* Tight SIMD-friendly loop for single SUM/AVG on f64 (no mask) */ +static void scalar_sum_f64_fn(void* ctx, uint32_t worker_id, int64_t start, int64_t end) { + scalar_ctx_t* c = (scalar_ctx_t*)ctx; + da_accum_t* acc = &c->accums[worker_id]; + const double* restrict data = (const double*)c->agg_ptrs[0]; + double sum = 0.0; + for (int64_t r = start; r < end; r++) + sum += data[r]; + acc->sum[0].f += sum; + acc->count[0] += end - start; +} + +/* Tight loop for single SUM/AVG on integer linear expression (no mask). */ +static void scalar_sum_linear_i64_fn(void* ctx, uint32_t worker_id, int64_t start, int64_t end) { + scalar_ctx_t* c = (scalar_ctx_t*)ctx; + da_accum_t* acc = &c->accums[worker_id]; + const agg_linear_t* lin = &c->agg_linear[0]; + int64_t n = end - start; + + int64_t sum = lin->bias_i64 * n; + for (uint8_t t = 0; t < lin->n_terms; t++) { + int64_t coeff = lin->coeff_i64[t]; + if (coeff == 0) continue; + const void* ptr = lin->term_ptrs[t]; + int8_t type = lin->term_types[t]; + int64_t term_sum = 0; + for (int64_t r = start; r < end; r++) + term_sum += scalar_i64_at(ptr, type, r); + sum += coeff * term_sum; + } + + acc->sum[0].i += sum; + acc->count[0] += n; +} + +/* Generic scalar accumulation: handles all ops, all types, mask */ +/* Inner scalar accumulation for a single row */ +static inline void scalar_accum_row(scalar_ctx_t* c, da_accum_t* acc, int64_t r) { + uint8_t n_aggs = c->n_aggs; + acc->count[0]++; + for (uint8_t a = 0; a < n_aggs; a++) { + double fv; int64_t iv; + if (c->agg_linear && c->agg_linear[a].enabled) { + const agg_linear_t* lin = &c->agg_linear[a]; + iv = lin->bias_i64; + for (uint8_t t = 0; t < lin->n_terms; t++) { + iv += lin->coeff_i64[t] * + scalar_i64_at(lin->term_ptrs[t], lin->term_types[t], r); + } + fv = (double)iv; + } else { + if (!c->agg_ptrs[a]) continue; + da_read_val(c->agg_ptrs[a], c->agg_types[a], 0, r, &fv, &iv); + } + uint16_t op = c->agg_ops[a]; + bool is_f = (c->agg_types[a] == RAY_F64); + if (op == OP_SUM || op == OP_AVG || op == OP_STDDEV || op == OP_STDDEV_POP || op == OP_VAR || op == OP_VAR_POP) { + if (is_f) acc->sum[a].f += fv; + else acc->sum[a].i += iv; + if (acc->sumsq_f64) acc->sumsq_f64[a] += fv * fv; + } else if (op == OP_FIRST) { + if (acc->count[0] == 1) { + if (is_f) acc->sum[a].f = fv; else acc->sum[a].i = iv; + } + } else if (op == OP_LAST) { + if (is_f) acc->sum[a].f = fv; else acc->sum[a].i = iv; + } else if (op == OP_MIN) { + if (is_f) { if (fv < acc->min_val[a].f) acc->min_val[a].f = fv; } + else { if (iv < acc->min_val[a].i) acc->min_val[a].i = iv; } + } else if (op == OP_MAX) { + if (is_f) { if (fv > acc->max_val[a].f) acc->max_val[a].f = fv; } + else { if (iv > acc->max_val[a].i) acc->max_val[a].i = iv; } + } + } +} + +static void scalar_accum_fn(void* ctx, uint32_t worker_id, int64_t start, int64_t end) { + scalar_ctx_t* c = (scalar_ctx_t*)ctx; + da_accum_t* acc = &c->accums[worker_id]; + const int64_t* match_idx = c->match_idx; + + for (int64_t i = start; i < end; i++) { + int64_t r = match_idx ? match_idx[i] : i; + scalar_accum_row(c, acc, r); + } +} + +/* Inner DA accumulation for a single row — shared by single-key and multi-key paths. + * Fast path for SUM/AVG-only queries: eliminates op-code dispatch and da_read_val + * dual-write overhead. The branch on c->all_sum is perfectly predicted (invariant + * across all rows). */ +static inline void da_accum_row(da_ctx_t* c, da_accum_t* acc, int32_t gid, int64_t r) { + uint8_t n_aggs = c->n_aggs; + acc->count[gid]++; + size_t base = (size_t)gid * n_aggs; + + if (RAY_LIKELY(c->all_sum)) { + /* SUM/AVG/COUNT fast path — no op-code dispatch, typed read only. + * COUNT-only queries have acc->sum==NULL; count[gid]++ above suffices. */ + if (!acc->sum) return; + uint32_t f64m = c->agg_f64_mask; + for (uint8_t a = 0; a < n_aggs; a++) { + if (!c->agg_ptrs[a]) continue; + size_t idx = base + a; + if (f64m & (1u << a)) + acc->sum[idx].f += ((const double*)c->agg_ptrs[a])[r]; + else + acc->sum[idx].i += read_col_i64(c->agg_ptrs[a], r, + c->agg_types[a], 0); + } + return; + } + + for (uint8_t a = 0; a < n_aggs; a++) { + if (!c->agg_ptrs[a]) continue; + size_t idx = base + a; + double fv; int64_t iv; + da_read_val(c->agg_ptrs[a], c->agg_types[a], 0, r, &fv, &iv); + uint16_t op = c->agg_ops[a]; + if (op == OP_SUM || op == OP_AVG || op == OP_STDDEV || op == OP_STDDEV_POP || op == OP_VAR || op == OP_VAR_POP) { + if (c->agg_types[a] == RAY_F64) acc->sum[idx].f += fv; + else acc->sum[idx].i = (int64_t)((uint64_t)acc->sum[idx].i + (uint64_t)iv); + if (acc->sumsq_f64) acc->sumsq_f64[idx] += fv * fv; + } else if (op == OP_FIRST) { + if (acc->count[gid] == 1) { + if (c->agg_types[a] == RAY_F64) acc->sum[idx].f = fv; + else acc->sum[idx].i = iv; + } + } else if (op == OP_LAST) { + if (c->agg_types[a] == RAY_F64) acc->sum[idx].f = fv; + else acc->sum[idx].i = iv; + } else if (op == OP_MIN) { + if (c->agg_types[a] == RAY_F64) { + if (fv < acc->min_val[idx].f) acc->min_val[idx].f = fv; + } else { + if (iv < acc->min_val[idx].i) acc->min_val[idx].i = iv; + } + } else if (op == OP_MAX) { + if (c->agg_types[a] == RAY_F64) { + if (fv > acc->max_val[idx].f) acc->max_val[idx].f = fv; + } else { + if (iv > acc->max_val[idx].i) acc->max_val[idx].i = iv; + } + } + } +} + +static void da_accum_fn(void* ctx, uint32_t worker_id, int64_t start, int64_t end) { + da_ctx_t* c = (da_ctx_t*)ctx; + da_accum_t* acc = &c->accums[worker_id]; + uint8_t n_aggs = c->n_aggs; + uint8_t n_keys = c->n_keys; + const int64_t* match_idx = c->match_idx; + + /* Fast path: single key — avoid composite GID loop overhead. + * Templated by key element size: the entire loop is stamped out per width + * so the compiler generates direct movzbl/movzwl/movl/movq — zero dispatch. */ + #define DA_PF_DIST 8 + #define DA_SINGLE_KEY_LOOP(KTYPE, KCAST) \ + do { \ + const KTYPE* kp = (const KTYPE*)c->key_ptrs[0]; \ + int64_t kmin = c->key_mins[0]; \ + bool da_pf = c->n_slots >= 4096; \ + for (int64_t i = start; i < end; i++) { \ + int64_t r = match_idx ? match_idx[i] : i; \ + if (da_pf && RAY_LIKELY(i + DA_PF_DIST < end)) { \ + int64_t pf_r = match_idx ? match_idx[i + DA_PF_DIST] : (i + DA_PF_DIST); \ + int64_t pfk = (int64_t)KCAST kp[pf_r]; \ + __builtin_prefetch(&acc->count[(int32_t)(pfk - kmin)], 1, 1); \ + if (acc->sum) __builtin_prefetch( \ + &acc->sum[(size_t)(int32_t)(pfk - kmin) * n_aggs], 1, 1); \ + } \ + int64_t kv = (int64_t)KCAST kp[r]; \ + da_accum_row(c, acc, (int32_t)(kv - kmin), r); \ + } \ + } while (0) + + if (n_keys == 1) { + switch (c->key_esz[0]) { + case 1: DA_SINGLE_KEY_LOOP(uint8_t, ); break; + case 2: DA_SINGLE_KEY_LOOP(uint16_t, ); break; + case 4: DA_SINGLE_KEY_LOOP(uint32_t, (int64_t)); break; + default: DA_SINGLE_KEY_LOOP(int64_t, ); break; + } + #undef DA_SINGLE_KEY_LOOP + return; + } + + /* Multi-key composite GID — typed inner loop eliminates read_by_esz switch. + * When all keys share the same element size, use da_composite_gid_XX(). */ + #define DA_MULTI_KEY_LOOP(GID_FN) \ + do { \ + bool _da_pf = c->n_slots >= 4096; \ + for (int64_t i = start; i < end; i++) { \ + int64_t r = match_idx ? match_idx[i] : i; \ + if (_da_pf && RAY_LIKELY(i + DA_PF_DIST < end)) { \ + int64_t pf_r = match_idx ? match_idx[i + DA_PF_DIST] : (i + DA_PF_DIST); \ + int32_t pf_gid = GID_FN(pf_r); \ + __builtin_prefetch(&acc->count[pf_gid], 1, 1); \ + if (acc->sum) __builtin_prefetch(&acc->sum[(size_t)pf_gid * n_aggs], 1, 1); \ + } \ + da_accum_row(c, acc, GID_FN(r), r); \ + } \ + } while (0) + + /* Check if all keys share the same element size */ + bool uniform_esz = true; + for (uint8_t k = 1; k < n_keys; k++) + if (c->key_esz[k] != c->key_esz[0]) { uniform_esz = false; break; } + + if (uniform_esz) { + switch (c->key_esz[0]) { + case 1: +#define GID_FN(R) da_composite_gid_u8(c, (R)) + DA_MULTI_KEY_LOOP(GID_FN); +#undef GID_FN + break; + case 2: +#define GID_FN(R) da_composite_gid_u16(c, (R)) + DA_MULTI_KEY_LOOP(GID_FN); +#undef GID_FN + break; + case 4: +#define GID_FN(R) da_composite_gid_u32(c, (R)) + DA_MULTI_KEY_LOOP(GID_FN); +#undef GID_FN + break; + default: +#define GID_FN(R) da_composite_gid_i64(c, (R)) + DA_MULTI_KEY_LOOP(GID_FN); +#undef GID_FN + break; + } + } else { +#define GID_FN(R) da_composite_gid(c, (R)) + DA_MULTI_KEY_LOOP(GID_FN); +#undef GID_FN + } + #undef DA_MULTI_KEY_LOOP + #undef DA_PF_DIST +} + +/* Parallel DA merge: merge per-worker accumulators into accums[0] by + * dispatching disjoint slot ranges across pool workers. */ +typedef struct { + da_accum_t* accums; + uint32_t n_src_workers; /* number of source workers to merge (1..n) */ + uint8_t need_flags; + uint8_t n_aggs; + const int8_t* agg_types; /* per-agg value type (for typed merge) */ + const uint16_t* agg_ops; /* per-agg opcode (for FIRST/LAST merge) */ +} da_merge_ctx_t; + +static void da_merge_fn(void* ctx, uint32_t wid, int64_t start, int64_t end) { + (void)wid; + da_merge_ctx_t* c = (da_merge_ctx_t*)ctx; + da_accum_t* merged = &c->accums[0]; + uint8_t n_aggs = c->n_aggs; + const int8_t* agg_types = c->agg_types; + for (uint32_t w = 1; w < c->n_src_workers; w++) { + da_accum_t* wa = &c->accums[w]; + for (int64_t s = start; s < end; s++) { + size_t base = (size_t)s * n_aggs; + if (c->need_flags & DA_NEED_SUMSQ) { + for (uint8_t a = 0; a < n_aggs; a++) + merged->sumsq_f64[base + a] += wa->sumsq_f64[base + a]; + } + if (c->need_flags & DA_NEED_SUM) { + for (uint8_t a = 0; a < n_aggs; a++) { + size_t idx = base + a; + uint16_t aop = c->agg_ops ? c->agg_ops[a] : OP_SUM; + if (aop == OP_FIRST) { + /* Keep worker 0 value; take from w only if merged has no data */ + if (merged->count[s] == 0 && wa->count[s] > 0) + merged->sum[idx] = wa->sum[idx]; + } else if (aop == OP_LAST) { + /* Overwrite with last worker that has data */ + if (wa->count[s] > 0) + merged->sum[idx] = wa->sum[idx]; + } else if (agg_types[a] == RAY_F64) + merged->sum[idx].f += wa->sum[idx].f; + else + merged->sum[idx].i += wa->sum[idx].i; + } + } + if (c->need_flags & DA_NEED_MIN) { + for (uint8_t a = 0; a < n_aggs; a++) { + size_t idx = base + a; + if (agg_types[a] == RAY_F64) { + if (wa->min_val[idx].f < merged->min_val[idx].f) + merged->min_val[idx].f = wa->min_val[idx].f; + } else { + if (wa->min_val[idx].i < merged->min_val[idx].i) + merged->min_val[idx].i = wa->min_val[idx].i; + } + } + } + if (c->need_flags & DA_NEED_MAX) { + for (uint8_t a = 0; a < n_aggs; a++) { + size_t idx = base + a; + if (agg_types[a] == RAY_F64) { + if (wa->max_val[idx].f > merged->max_val[idx].f) + merged->max_val[idx].f = wa->max_val[idx].f; + } else { + if (wa->max_val[idx].i > merged->max_val[idx].i) + merged->max_val[idx].i = wa->max_val[idx].i; + } + } + } + merged->count[s] += wa->count[s]; + } + } +} + +/* ============================================================================ + * Partition-aware group-by: detect parted columns, concatenate segments into + * a flat table, then run standard exec_group once. + * ============================================================================ */ +ray_t* exec_group(ray_graph_t* g, ray_op_t* op, ray_t* tbl, + int64_t group_limit); /* forward decl */ + +/* Forward declaration — defined below exec_group */ +static ray_t* exec_group_per_partition(ray_t* parted_tbl, ray_op_ext_t* ext, + int32_t n_parts, const int64_t* key_syms, + const int64_t* agg_syms, int has_avg, + int has_stddev, int64_t group_limit); + +/* -------------------------------------------------------------------------- + * exec_group_parted — dispatch per-partition or concat-fallback + * -------------------------------------------------------------------------- */ +static ray_t* exec_group_parted(ray_graph_t* g, ray_op_t* op, ray_t* parted_tbl, + int64_t group_limit) { + int64_t ncols = ray_table_ncols(parted_tbl); + if (ncols <= 0) return ray_error("nyi", NULL); + + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + uint8_t n_keys = ext->n_keys; + uint8_t n_aggs = ext->n_aggs; + + /* Find partition count and total rows from first parted column */ + int32_t n_parts = 0; + int64_t total_rows = 0; + for (int64_t c = 0; c < ncols; c++) { + ray_t* col = ray_table_get_col_idx(parted_tbl, c); + if (col && RAY_IS_PARTED(col->type)) { + n_parts = (int32_t)col->len; + total_rows = ray_parted_nrows(col); + break; + } + } + if (n_parts <= 0 || total_rows <= 0) return ray_error("nyi", NULL); + + /* Check eligibility for per-partition exec + merge: + * - All keys and agg inputs must be simple SCANs + * - Supported agg ops: SUM, COUNT, MIN, MAX, AVG, FIRST, LAST, + * STDDEV, STDDEV_POP, VAR, VAR_POP */ + int can_partition = 1; + int has_avg = 0; + int has_stddev = 0; + int64_t key_syms[8]; + for (uint8_t k = 0; k < n_keys && can_partition; k++) { + ray_op_ext_t* ke = find_ext(g, ext->keys[k]->id); + if (!ke || ke->base.opcode != OP_SCAN) { can_partition = 0; break; } + key_syms[k] = ke->sym; + } + int64_t agg_syms[8]; + for (uint8_t a = 0; a < n_aggs && can_partition; a++) { + uint16_t aop = ext->agg_ops[a]; + if (aop != OP_SUM && aop != OP_COUNT && aop != OP_MIN && + aop != OP_MAX && aop != OP_AVG && aop != OP_FIRST && + aop != OP_LAST && aop != OP_STDDEV && aop != OP_STDDEV_POP && + aop != OP_VAR && aop != OP_VAR_POP) { can_partition = 0; break; } + if (aop == OP_AVG) has_avg = 1; + if (aop == OP_STDDEV || aop == OP_STDDEV_POP || + aop == OP_VAR || aop == OP_VAR_POP) has_stddev = 1; + ray_op_ext_t* ae = find_ext(g, ext->agg_ins[a]->id); + if (!ae || ae->base.opcode != OP_SCAN) { can_partition = 0; break; } + agg_syms[a] = ae->sym; + } + + /* Cardinality gate: estimate groups from first partition. + * Per-partition only wins when #groups << partition_size. */ + if (can_partition) { + int64_t rows_per_part = total_rows / n_parts; + int64_t est_groups = 1; + for (uint8_t k = 0; k < n_keys; k++) { + ray_t* pcol = ray_table_get_col(parted_tbl, key_syms[k]); + if (!pcol) { est_groups = rows_per_part; break; } + /* MAPCOMMON key: constant per partition — excluded from + * per-partition sub-GROUP-BY, contributes 0 to cardinality. */ + if (pcol->type == RAY_MAPCOMMON) { continue; } + if (!RAY_IS_PARTED(pcol->type)) { est_groups = rows_per_part; break; } + ray_t* seg0 = ((ray_t**)ray_data(pcol))[0]; + if (!seg0 || seg0->len <= 0) { est_groups = rows_per_part; break; } + int8_t bt = RAY_PARTED_BASETYPE(pcol->type); + int64_t card; + if (RAY_IS_SYM(bt)) { + uint32_t sym_n = ray_sym_count(); + if (sym_n == 0 || sym_n > 4194304) { est_groups = rows_per_part; break; } + size_t bwords = ((size_t)sym_n + 63) / 64; + ray_t* bits_hdr = NULL; + uint64_t* bits = (uint64_t*)scratch_calloc(&bits_hdr, bwords * 8); + if (!bits) { est_groups = rows_per_part; break; } + for (int64_t r = 0; r < seg0->len; r++) { + uint32_t id = (uint32_t)ray_read_sym(ray_data(seg0), r, seg0->type, seg0->attrs); + bits[id / 64] |= 1ULL << (id % 64); + } + card = 0; + for (size_t i = 0; i < bwords; i++) + card += __builtin_popcountll(bits[i]); + scratch_free(bits_hdr); + } else if (bt == RAY_I64) { + const int64_t* v = (const int64_t*)ray_data(seg0); + int64_t lo = v[0], hi = v[0]; + for (int64_t r = 1; r < seg0->len; r++) { + if (v[r] < lo) lo = v[r]; + if (v[r] > hi) hi = v[r]; + } + card = hi - lo + 1; + } else if (bt == RAY_I32) { + const int32_t* v = (const int32_t*)ray_data(seg0); + int32_t lo = v[0], hi = v[0]; + for (int64_t r = 1; r < seg0->len; r++) { + if (v[r] < lo) lo = v[r]; + if (v[r] > hi) hi = v[r]; + } + card = (int64_t)(hi - lo + 1); + } else { + card = seg0->len; + } + est_groups *= card; + if (est_groups > rows_per_part) { est_groups = rows_per_part; break; } + } + /* Block per-partition when cardinality is high AND the concat + * fallback would fit in memory (< 4 GB estimated). When concat is + * too large, per-partition with batched merge is the only option. */ + int64_t concat_bytes = total_rows * 8LL * (int64_t)(n_keys + n_aggs); + if (est_groups * 100 > rows_per_part && + concat_bytes < 4LL * 1024 * 1024 * 1024) + can_partition = 0; + } + + /* Try per-partition path (separate noinline function to avoid I-cache pressure) */ + if (can_partition) { + ray_t* result = exec_group_per_partition(parted_tbl, ext, n_parts, + key_syms, agg_syms, has_avg, + has_stddev, group_limit); + if (result) return result; + /* NULL = per-partition failed, fall through to concat */ + } + + /* ---- Concat fallback ---- */ + /* ---- Concat-only-needed-columns fallback ---- + * Used when query has AVG or expression keys/aggs. + * Only concatenates the columns actually referenced by the GROUP BY. */ + { + /* Collect needed column sym IDs (keys + agg inputs) */ + int64_t needed[16]; + int n_needed = 0; + for (uint8_t k = 0; k < n_keys; k++) { + ray_op_ext_t* ke = find_ext(g, ext->keys[k]->id); + if (ke && ke->base.opcode == OP_SCAN) { + int dup = 0; + for (int i = 0; i < n_needed; i++) + if (needed[i] == ke->sym) { dup = 1; break; } + if (!dup) needed[n_needed++] = ke->sym; + } + } + for (uint8_t a = 0; a < n_aggs; a++) { + ray_op_ext_t* ae = find_ext(g, ext->agg_ins[a]->id); + if (ae && ae->base.opcode == OP_SCAN) { + int dup = 0; + for (int i = 0; i < n_needed; i++) + if (needed[i] == ae->sym) { dup = 1; break; } + if (!dup) needed[n_needed++] = ae->sym; + } else { + /* Expression agg input — need all columns for evaluation. + * Fall back to copying everything. */ + n_needed = 0; + break; + } + } + + /* Build flat table with only needed columns (or all if n_needed==0) */ + ray_t* flat_tbl = ray_table_new(n_needed > 0 ? (int64_t)n_needed : ncols); + if (!flat_tbl || RAY_IS_ERR(flat_tbl)) return flat_tbl; + + int64_t cols_to_iter = n_needed > 0 ? (int64_t)n_needed : ncols; + for (int64_t ci = 0; ci < cols_to_iter; ci++) { + ray_t* col; + int64_t name_id; + if (n_needed > 0) { + col = ray_table_get_col(parted_tbl, needed[ci]); + name_id = needed[ci]; + } else { + col = ray_table_get_col_idx(parted_tbl, ci); + name_id = ray_table_col_name(parted_tbl, ci); + } + if (!col) continue; + if (col->type == RAY_MAPCOMMON) { + ray_t* mc_flat = materialize_mapcommon(col); + if (mc_flat && !RAY_IS_ERR(mc_flat)) { + flat_tbl = ray_table_add_col(flat_tbl, name_id, mc_flat); + ray_release(mc_flat); + } + continue; + } + + if (!RAY_IS_PARTED(col->type)) { + ray_retain(col); + flat_tbl = ray_table_add_col(flat_tbl, name_id, col); + ray_release(col); + continue; + } + + int8_t base_type = (int8_t)RAY_PARTED_BASETYPE(col->type); + ray_t** segs = (ray_t**)ray_data(col); + ray_t* flat; + + if (base_type == RAY_STR) { + flat = parted_flatten_str(segs, col->len, total_rows); + } else { + uint8_t base_attrs = (base_type == RAY_SYM) + ? parted_first_attrs(segs, col->len) : 0; + flat = typed_vec_new(base_type, base_attrs, total_rows); + if (!flat || RAY_IS_ERR(flat)) { + ray_release(flat_tbl); + return ray_error("oom", NULL); + } + flat->len = total_rows; + + size_t elem_size = (size_t)ray_sym_elem_size(base_type, base_attrs); + int64_t offset = 0; + for (int32_t p = 0; p < n_parts; p++) { + ray_t* seg = segs[p]; + if (!seg || seg->len <= 0) continue; + if (parted_seg_esz_ok(seg, base_type, (uint8_t)elem_size)) { + memcpy((char*)ray_data(flat) + (size_t)offset * elem_size, + ray_data(seg), (size_t)seg->len * elem_size); + } else { + memset((char*)ray_data(flat) + (size_t)offset * elem_size, + 0, (size_t)seg->len * elem_size); + } + offset += seg->len; + } + } + if (!flat || RAY_IS_ERR(flat)) { + ray_release(flat_tbl); + return ray_error("oom", NULL); + } + + flat_tbl = ray_table_add_col(flat_tbl, name_id, flat); + ray_release(flat); + } + + ray_t* saved = g->table; + g->table = flat_tbl; + ray_t* result = exec_group(g, op, flat_tbl, 0); + g->table = saved; + ray_release(flat_tbl); + return result; + } +} + +ray_t* exec_group(ray_graph_t* g, ray_op_t* op, ray_t* tbl, + int64_t group_limit) { + if (!tbl || RAY_IS_ERR(tbl)) return tbl; + + /* Selection-shape guard — runs BEFORE any fast path (parted + * dispatch, factorized shortcut) so every exec_group code path + * sees the same validated selection state. A mismatch here + * indicates a graph-construction bug: the caller installed a + * selection that was built for a different table shape, and + * silently ignoring it would return unfiltered results. */ + if (g->selection) { + ray_rowsel_t* sm = ray_rowsel_meta(g->selection); + int64_t tbl_nrows = ray_table_nrows(tbl); + if (sm->nrows != tbl_nrows) + return ray_error("domain", + "exec_group: selection nrows mismatch (sel=%lld tbl=%lld)", + (long long)sm->nrows, (long long)tbl_nrows); + } + + /* Parted dispatch: detect parted input columns */ + { + int64_t nc = ray_table_ncols(tbl); + for (int64_t c = 0; c < nc; c++) { + ray_t* col = ray_table_get_col_idx(tbl, c); + if (col && (RAY_IS_PARTED(col->type) || col->type == RAY_MAPCOMMON)) { + /* exec_group_parted has no rowsel plumbing — a + * selection in flight would be silently ignored. + * Reject rather than produce unfiltered results. */ + if (g->selection) + return ray_error("nyi", + "GROUP BY with selection on parted table"); + return exec_group_parted(g, op, tbl, group_limit); + } + } + } + + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + int64_t nrows = ray_table_nrows(tbl); + uint8_t n_keys = ext->n_keys; + uint8_t n_aggs = ext->n_aggs; + + /* Factorized shortcut: if input is a factorized expand result with + * (_src, _count) columns, and GROUP BY _src with COUNT/SUM(_count), + * return the pre-aggregated table directly without re-scanning. + * + * Interaction with g->selection: the factorized _count column + * encodes weighted counts, so COUNT(*) must SUM _count to get + * the true row count and SUM(_count) is the same thing. + * Neither the shortcut (returns verbatim, no filter) nor the + * main path (counts rows of the _src table, ignoring _count) + * knows how to apply a row filter while preserving those + * semantics. + * + * Other agg shapes — SUM/AVG/MIN/MAX of a non-_count column, + * etc. — don't rely on the factorized weighting; the main + * path handles them correctly with the selection installed. + * So the rejection must mirror the shortcut's exact + * compatibility check (all aggs are COUNT or SUM(_count)), + * not just the presence of a _count column. */ + if (g->selection && n_keys == 1 && n_aggs > 0 && nrows > 0) { + int64_t cnt_sym_probe = ray_sym_intern("_count", 6); + ray_t* cnt_col_probe = ray_table_get_col(tbl, cnt_sym_probe); + ray_op_ext_t* key_ext_probe = find_ext(g, ext->keys[0]->id); + int64_t src_sym_probe = ray_sym_intern("_src", 4); + if (cnt_col_probe && cnt_col_probe->type == RAY_I64 && + key_ext_probe && key_ext_probe->base.opcode == OP_SCAN && + key_ext_probe->sym == src_sym_probe) { + /* Reject on ANY agg whose semantics depend on the + * factorized _count weighting: COUNT(*) counts + * underlying source rows (not _src table rows) and + * SUM(_count) is equivalent. Even if only one agg in + * a mixed query needs weighting, the main path can't + * handle it correctly, so fail the whole query rather + * than return a mix of right and wrong columns. + * + * Special case: an empty selection (total_pass == 0) + * means every row was filtered out, so the result is + * an empty group set regardless of which aggs are + * involved. The main path handles this correctly + * even for count-weighted aggs because n_scan == 0 + * produces no group rows at all. Let it fall + * through. */ + ray_rowsel_t* sm = ray_rowsel_meta(g->selection); + if (sm->total_pass > 0) { + bool needs_weighting = false; + for (uint8_t a = 0; a < n_aggs; a++) { + uint16_t aop = ext->agg_ops[a]; + ray_op_ext_t* agg_ext = find_ext(g, ext->agg_ins[a]->id); + if (aop == OP_COUNT) { needs_weighting = true; break; } + if (aop == OP_SUM && agg_ext && + agg_ext->base.opcode == OP_SCAN && + agg_ext->sym == cnt_sym_probe) { + needs_weighting = true; break; + } + } + if (needs_weighting) + return ray_error("nyi", + "GROUP BY with selection on factorized expand result " + "(COUNT/SUM(_count) semantics)"); + } + } + } + if (!g->selection && n_keys == 1 && n_aggs > 0 && nrows > 0) { + int64_t cnt_sym = ray_sym_intern("_count", 6); + ray_t* cnt_col = ray_table_get_col(tbl, cnt_sym); + if (cnt_col && cnt_col->type == RAY_I64) { + ray_op_ext_t* key_ext = find_ext(g, ext->keys[0]->id); + int64_t src_sym = ray_sym_intern("_src", 4); + if (key_ext && key_ext->base.opcode == OP_SCAN && + key_ext->sym == src_sym) { + /* Verify all aggs are compatible with factorized data: + * COUNT(*) → use _count directly + * SUM(_count) → use _count directly */ + bool all_compat = true; + for (uint8_t a = 0; a < n_aggs; a++) { + uint16_t aop = ext->agg_ops[a]; + ray_op_ext_t* agg_ext = find_ext(g, ext->agg_ins[a]->id); + if (aop == OP_COUNT) continue; + if (aop == OP_SUM && agg_ext && + agg_ext->base.opcode == OP_SCAN && + agg_ext->sym == cnt_sym) continue; + all_compat = false; + break; + } + if (all_compat) { + /* The factorized table already has one row per group. + * Build result with _src key + agg columns from _count. */ + ray_t* src_col = ray_table_get_col(tbl, src_sym); + if (src_col) { + int64_t out_nkeys = 1; + int64_t out_ncols = out_nkeys + n_aggs; + ray_t* result = ray_table_new((int64_t)out_ncols); + if (!result || RAY_IS_ERR(result)) + return ray_error("oom", NULL); + ray_retain(src_col); + ray_t* tmp_r = ray_table_add_col(result, src_sym, src_col); + ray_release(src_col); + if (!tmp_r || RAY_IS_ERR(tmp_r)) { + ray_release(result); + return ray_error("oom", NULL); + } + result = tmp_r; + for (uint8_t a = 0; a < n_aggs; a++) { + ray_retain(cnt_col); + int64_t agg_name = ray_sym_intern("_agg", 4); + if (n_aggs > 1) { + char buf[16]; + int n = snprintf(buf, sizeof(buf), "_agg%d", a); + agg_name = ray_sym_intern(buf, (size_t)n); + } + tmp_r = ray_table_add_col(result, agg_name, cnt_col); + ray_release(cnt_col); + if (!tmp_r || RAY_IS_ERR(tmp_r)) { + ray_release(result); + return ray_error("oom", NULL); + } + result = tmp_r; + } + return result; + } + } + } + } + } + + if (n_keys > 8 || n_aggs > 8) return ray_error("nyi", NULL); + + /* Extract selection (rowsel) for pushdown. Workers iterate over + * [0, n_scan) and read row=match_idx[i]. When no selection is + * present, match_idx is NULL and n_scan equals nrows. The + * match_idx_block must be released on every exec_group exit + * path — see the various `goto cleanup` and early returns below. + * + * The top-of-function guard already rejected nrows mismatches, + * so if we reach here with a selection it's guaranteed valid + * for `tbl`. */ + ray_t* match_idx_block = NULL; + const int64_t* match_idx = NULL; + int64_t n_scan = nrows; + if (g->selection) { + match_idx_block = ray_rowsel_to_indices(g->selection); + if (!match_idx_block) return ray_error("oom", NULL); + match_idx = (const int64_t*)ray_data(match_idx_block); + n_scan = ray_rowsel_meta(g->selection)->total_pass; + } + + /* Resolve key columns (VLA — n_keys ≤ 8; use ≥1 to avoid zero-size VLA UB) */ + uint8_t vla_keys = n_keys > 0 ? n_keys : 1; + ray_t* key_vecs[vla_keys]; + memset(key_vecs, 0, vla_keys * sizeof(ray_t*)); + + uint8_t key_owned[vla_keys]; /* 1 = we allocated via exec_node, must free */ + memset(key_owned, 0, vla_keys * sizeof(uint8_t)); + for (uint8_t k = 0; k < n_keys; k++) { + ray_op_t* key_op = ext->keys[k]; + ray_op_ext_t* key_ext = find_ext(g, key_op->id); + if (key_ext && key_ext->base.opcode == OP_SCAN) { + key_vecs[k] = ray_table_get_col(tbl, key_ext->sym); + } else { + /* Expression key (CASE WHEN etc) — evaluate against current tbl */ + ray_t* saved_table = g->table; + g->table = tbl; + ray_t* vec = exec_node(g, key_op); + g->table = saved_table; + if (vec && !RAY_IS_ERR(vec)) { + key_vecs[k] = vec; + key_owned[k] = 1; + } + } + } + + /* Resolve agg input columns (VLA — n_aggs ≤ 8; use ≥1 to avoid zero-size VLA UB) */ + uint8_t vla_aggs = n_aggs > 0 ? n_aggs : 1; + ray_t* agg_vecs[vla_aggs]; + uint8_t agg_owned[vla_aggs]; /* 1 = we allocated via exec_node, must free */ + agg_affine_t agg_affine[vla_aggs]; + agg_linear_t agg_linear[vla_aggs]; + memset(agg_vecs, 0, vla_aggs * sizeof(ray_t*)); + memset(agg_owned, 0, vla_aggs * sizeof(uint8_t)); + memset(agg_affine, 0, vla_aggs * sizeof(agg_affine_t)); + memset(agg_linear, 0, vla_aggs * sizeof(agg_linear_t)); + + for (uint8_t a = 0; a < n_aggs; a++) { + ray_op_t* agg_input_op = ext->agg_ins[a]; + ray_op_ext_t* agg_ext = find_ext(g, agg_input_op->id); + + /* SUM/AVG(scan +/- const): aggregate base scan and apply bias at emit. */ + uint16_t agg_kind = ext->agg_ops[a]; + if ((agg_kind == OP_SUM || agg_kind == OP_AVG) && + try_affine_sumavg_input(g, tbl, agg_input_op, &agg_vecs[a], &agg_affine[a])) { + continue; + } + + /* SUM/AVG(integer-linear expr): scalar path can aggregate directly + * without materializing the expression vector. */ + if (n_keys == 0 && nrows > 0 && + (agg_kind == OP_SUM || agg_kind == OP_AVG) && + try_linear_sumavg_input_i64(g, tbl, agg_input_op, &agg_linear[a])) { + continue; + } + + if (agg_ext && agg_ext->base.opcode == OP_SCAN) { + agg_vecs[a] = ray_table_get_col(tbl, agg_ext->sym); + } else if (agg_ext && agg_ext->base.opcode == OP_CONST && agg_ext->literal) { + agg_vecs[a] = agg_ext->literal; + } else { + /* Expression node (ADD/MUL etc) — try compiled expression first */ + ray_expr_t agg_expr; + if (expr_compile(g, tbl, agg_input_op, &agg_expr)) { + ray_t* vec = expr_eval_full(&agg_expr, nrows); + if (vec && !RAY_IS_ERR(vec)) { + agg_vecs[a] = vec; + agg_owned[a] = 1; + continue; + } + } + /* Fallback: full recursive evaluation */ + ray_t* saved_table = g->table; + g->table = tbl; + ray_t* vec = exec_node(g, agg_input_op); + g->table = saved_table; + if (vec && !RAY_IS_ERR(vec)) { + agg_vecs[a] = vec; + agg_owned[a] = 1; + } + } + } + + /* Normalize scalar agg inputs to full-length vectors. + * Constants and scalar sub-expressions (len=1) must be broadcast to nrows + * before row-wise aggregation loops. */ + for (uint8_t a = 0; a < n_aggs; a++) { + if (!agg_vecs[a] || RAY_IS_ERR(agg_vecs[a])) continue; + if (ext->agg_ops[a] == OP_COUNT) continue; /* value is ignored for COUNT */ + + bool needs_broadcast = ray_is_atom(agg_vecs[a]) || + (agg_vecs[a]->type > 0 && agg_vecs[a]->len == 1 && nrows > 1); + if (!needs_broadcast) continue; + + ray_t* bcast = materialize_broadcast_input(agg_vecs[a], nrows); + if (!bcast || RAY_IS_ERR(bcast)) { + for (uint8_t i = 0; i < n_aggs; i++) { + if (agg_owned[i] && agg_vecs[i]) ray_release(agg_vecs[i]); + } + for (uint8_t k = 0; k < n_keys; k++) { + if (key_owned[k] && key_vecs[k]) ray_release(key_vecs[k]); + } + return bcast && RAY_IS_ERR(bcast) ? bcast : ray_error("oom", NULL); + } + + if (agg_owned[a]) ray_release(agg_vecs[a]); + agg_vecs[a] = bcast; + agg_owned[a] = 1; + } + + /* Pre-compute key metadata (VLA — n_keys ≤ 8; vla_keys ≥ 1) */ + void* key_data[vla_keys]; + int8_t key_types[vla_keys]; + uint8_t key_attrs[vla_keys]; + for (uint8_t k = 0; k < n_keys; k++) { + if (key_vecs[k]) { + key_data[k] = ray_data(key_vecs[k]); + key_types[k] = key_vecs[k]->type; + key_attrs[k] = key_vecs[k]->attrs; + } else { + key_data[k] = NULL; + key_types[k] = 0; + key_attrs[k] = 0; + } + } + + /* ---- Scalar aggregate fast path (n_keys == 0): flat vector scan ---- */ + if (n_keys == 0 && nrows > 0) { + uint8_t need_flags = DA_NEED_COUNT; + for (uint8_t a = 0; a < n_aggs; a++) { + uint16_t aop = ext->agg_ops[a]; + if (aop == OP_SUM || aop == OP_AVG || aop == OP_FIRST || aop == OP_LAST) + need_flags |= DA_NEED_SUM; + else if (aop == OP_STDDEV || aop == OP_STDDEV_POP || aop == OP_VAR || aop == OP_VAR_POP) + { need_flags |= DA_NEED_SUM; need_flags |= DA_NEED_SUMSQ; } + else if (aop == OP_MIN) need_flags |= DA_NEED_MIN; + else if (aop == OP_MAX) need_flags |= DA_NEED_MAX; + } + + void* agg_ptrs[vla_aggs]; + int8_t agg_types[vla_aggs]; + for (uint8_t a = 0; a < n_aggs; a++) { + if (agg_vecs[a]) { + agg_ptrs[a] = ray_data(agg_vecs[a]); + agg_types[a] = agg_vecs[a]->type; + } else { + agg_ptrs[a] = NULL; + agg_types[a] = 0; + } + } + + ray_pool_t* sc_pool = ray_pool_get(); + uint32_t sc_n = (sc_pool && nrows >= RAY_PARALLEL_THRESHOLD) + ? ray_pool_total_workers(sc_pool) : 1; + + ray_t* sc_hdr; + da_accum_t* sc_acc = (da_accum_t*)scratch_calloc(&sc_hdr, + sc_n * sizeof(da_accum_t)); + if (!sc_acc) goto da_path; + + /* Allocate 1-slot accumulators per worker (n_aggs entries) */ + bool alloc_ok = true; + for (uint32_t w = 0; w < sc_n; w++) { + if (need_flags & DA_NEED_SUM) { + sc_acc[w].sum = (da_val_t*)scratch_calloc(&sc_acc[w]._h_sum, + n_aggs * sizeof(da_val_t)); + if (!sc_acc[w].sum) { alloc_ok = false; break; } + } + if (need_flags & DA_NEED_MIN) { + sc_acc[w].min_val = (da_val_t*)scratch_alloc(&sc_acc[w]._h_min, + n_aggs * sizeof(da_val_t)); + if (!sc_acc[w].min_val) { alloc_ok = false; break; } + for (uint8_t a = 0; a < n_aggs; a++) { + if (agg_types[a] == RAY_F64) sc_acc[w].min_val[a].f = DBL_MAX; + else sc_acc[w].min_val[a].i = INT64_MAX; + } + } + if (need_flags & DA_NEED_MAX) { + sc_acc[w].max_val = (da_val_t*)scratch_alloc(&sc_acc[w]._h_max, + n_aggs * sizeof(da_val_t)); + if (!sc_acc[w].max_val) { alloc_ok = false; break; } + for (uint8_t a = 0; a < n_aggs; a++) { + if (agg_types[a] == RAY_F64) sc_acc[w].max_val[a].f = -DBL_MAX; + else sc_acc[w].max_val[a].i = INT64_MIN; + } + } + if (need_flags & DA_NEED_SUMSQ) { + sc_acc[w].sumsq_f64 = (double*)scratch_calloc(&sc_acc[w]._h_sumsq, + n_aggs * sizeof(double)); + if (!sc_acc[w].sumsq_f64) { alloc_ok = false; break; } + } + sc_acc[w].count = (int64_t*)scratch_calloc(&sc_acc[w]._h_count, + 1 * sizeof(int64_t)); + if (!sc_acc[w].count) { alloc_ok = false; break; } + } + if (!alloc_ok) { + for (uint32_t w = 0; w < sc_n; w++) da_accum_free(&sc_acc[w]); + scratch_free(sc_hdr); + goto da_path; + } + + scalar_ctx_t sc_ctx = { + .agg_ptrs = agg_ptrs, + .agg_types = agg_types, + .agg_ops = ext->agg_ops, + .agg_linear = agg_linear, + .n_aggs = n_aggs, + .need_flags = need_flags, + .match_idx = match_idx, + .accums = sc_acc, + .n_accums = sc_n, + }; + + /* Pick specialized tight loop when possible, else generic. + * The specialized scalar_sum_*_fn variants don't honour + * match_idx — they read data[r] directly — so they're only + * safe when no selection is in flight. */ + typedef void (*scalar_fn_t)(void*, uint32_t, int64_t, int64_t); + scalar_fn_t sc_fn = scalar_accum_fn; + if (n_aggs == 1 && !match_idx && agg_ptrs[0] != NULL) { + uint16_t op0 = ext->agg_ops[0]; + int8_t t0 = agg_types[0]; + if ((op0 == OP_SUM || op0 == OP_AVG) && + (t0 == RAY_I64 || t0 == RAY_SYM || t0 == RAY_TIMESTAMP)) + sc_fn = scalar_sum_i64_fn; + else if ((op0 == OP_SUM || op0 == OP_AVG) && t0 == RAY_F64) + sc_fn = scalar_sum_f64_fn; + } else if (n_aggs == 1 && !match_idx && agg_linear[0].enabled) { + uint16_t op0 = ext->agg_ops[0]; + if (op0 == OP_SUM || op0 == OP_AVG) + sc_fn = scalar_sum_linear_i64_fn; + } + + if (sc_n > 1) + ray_pool_dispatch(sc_pool, sc_fn, &sc_ctx, n_scan); + else + sc_fn(&sc_ctx, 0, 0, n_scan); + + /* Merge per-worker accumulators into sc_acc[0] */ + da_accum_t* m = &sc_acc[0]; + for (uint32_t w = 1; w < sc_n; w++) { + da_accum_t* wa = &sc_acc[w]; + if (need_flags & DA_NEED_SUM) { + for (uint8_t a = 0; a < n_aggs; a++) { + uint16_t merge_op = ext->agg_ops[a]; + if (merge_op == OP_FIRST) { + if (m->count[0] == 0 && wa->count[0] > 0) + m->sum[a] = wa->sum[a]; + } else if (merge_op == OP_LAST) { + if (wa->count[0] > 0) + m->sum[a] = wa->sum[a]; + } else { + if (agg_types[a] == RAY_F64) + m->sum[a].f += wa->sum[a].f; + else + m->sum[a].i += wa->sum[a].i; + } + } + } + if (need_flags & DA_NEED_SUMSQ) { + for (uint8_t a = 0; a < n_aggs; a++) + m->sumsq_f64[a] += wa->sumsq_f64[a]; + } + if (need_flags & DA_NEED_MIN) { + for (uint8_t a = 0; a < n_aggs; a++) { + if (agg_types[a] == RAY_F64) { + if (wa->min_val[a].f < m->min_val[a].f) + m->min_val[a].f = wa->min_val[a].f; + } else { + if (wa->min_val[a].i < m->min_val[a].i) + m->min_val[a].i = wa->min_val[a].i; + } + } + } + if (need_flags & DA_NEED_MAX) { + for (uint8_t a = 0; a < n_aggs; a++) { + if (agg_types[a] == RAY_F64) { + if (wa->max_val[a].f > m->max_val[a].f) + m->max_val[a].f = wa->max_val[a].f; + } else { + if (wa->max_val[a].i > m->max_val[a].i) + m->max_val[a].i = wa->max_val[a].i; + } + } + } + m->count[0] += wa->count[0]; + } + for (uint32_t w = 1; w < sc_n; w++) da_accum_free(&sc_acc[w]); + + /* Emit 1-row result with no key columns */ + ray_t* result = ray_table_new(n_aggs); + if (!result || RAY_IS_ERR(result)) { + da_accum_free(&sc_acc[0]); scratch_free(sc_hdr); + for (uint8_t a = 0; a < n_aggs; a++) + if (agg_owned[a] && agg_vecs[a]) ray_release(agg_vecs[a]); + for (uint8_t k = 0; k < n_keys; k++) + if (key_owned[k] && key_vecs[k]) ray_release(key_vecs[k]); + if (match_idx_block) ray_release(match_idx_block); + return result ? result : ray_error("oom", NULL); + } + + emit_agg_columns(&result, g, ext, agg_vecs, 1, n_aggs, + (double*)m->sum, (int64_t*)m->sum, + (double*)m->min_val, (double*)m->max_val, + (int64_t*)m->min_val, (int64_t*)m->max_val, + m->count, agg_affine, m->sumsq_f64); + + da_accum_free(&sc_acc[0]); scratch_free(sc_hdr); + for (uint8_t a = 0; a < n_aggs; a++) + if (agg_owned[a] && agg_vecs[a]) ray_release(agg_vecs[a]); + for (uint8_t k = 0; k < n_keys; k++) + if (key_owned[k] && key_vecs[k]) ray_release(key_vecs[k]); + if (match_idx_block) ray_release(match_idx_block); + return result; + } + +da_path:; + /* ---- Direct-array fast path for low-cardinality integer keys ---- */ + /* Supports multi-key via composite index: product of ranges <= MAX */ + #define DA_MAX_COMPOSITE_SLOTS 262144 /* 256K slots max */ + #define DA_MEM_BUDGET (256ULL << 20) /* 256 MB total across all workers */ + #define DA_PER_WORKER_MAX (6ULL << 20) /* 6 MB per-worker max */ + { + bool da_eligible = (nrows > 0 && n_keys > 0 && n_keys <= 8); + for (uint8_t k = 0; k < n_keys && da_eligible; k++) { + if (!key_data[k]) { da_eligible = false; break; } + int8_t t = key_types[k]; + if (t != RAY_I64 && t != RAY_SYM && t != RAY_I32 + && t != RAY_TIMESTAMP && t != RAY_DATE && t != RAY_TIME + && t != RAY_BOOL && t != RAY_U8 && t != RAY_I16) { + da_eligible = false; + } + /* DA path cannot represent nulls — fall back to HT path. */ + if (key_vecs[k]) { + ray_t* src = (key_vecs[k]->attrs & RAY_ATTR_SLICE) + ? key_vecs[k]->slice_parent : key_vecs[k]; + if (src && (src->attrs & RAY_ATTR_HAS_NULLS)) + da_eligible = false; + } + } + + int64_t da_key_min[8], da_key_range[8], da_key_stride[8]; + uint64_t total_slots = 1; + bool da_fits = false; + + + if (da_eligible) { + da_fits = true; + ray_pool_t* mm_pool = ray_pool_get(); + uint32_t mm_n = (mm_pool && nrows >= RAY_PARALLEL_THRESHOLD) + ? ray_pool_total_workers(mm_pool) : 1; + /* VLA bounded by worker count — max ~2KB per key even on 256-core systems. */ + int64_t mm_mins[mm_n], mm_maxs[mm_n]; + for (uint8_t k = 0; k < n_keys && da_fits; k++) { + int64_t kmin, kmax; + for (uint32_t w = 0; w < mm_n; w++) { + mm_mins[w] = INT64_MAX; + mm_maxs[w] = INT64_MIN; + } + minmax_ctx_t mm_ctx = { + .key_data = key_data[k], + .key_type = key_types[k], + .key_attrs = key_attrs[k], + .per_worker_min = mm_mins, + .per_worker_max = mm_maxs, + .n_workers = mm_n, + .match_idx = match_idx, + }; + if (mm_n > 1) { + ray_pool_dispatch(mm_pool, minmax_scan_fn, &mm_ctx, n_scan); + } else { + minmax_scan_fn(&mm_ctx, 0, 0, n_scan); + } + kmin = INT64_MAX; kmax = INT64_MIN; + for (uint32_t w = 0; w < mm_n; w++) { + if (mm_mins[w] < kmin) kmin = mm_mins[w]; + if (mm_maxs[w] > kmax) kmax = mm_maxs[w]; + } + da_key_min[k] = kmin; + /* kmax - kmin may overflow i64 when keys span full range. + * Compute in uint64_t and reject if the span exceeds i64. */ + uint64_t span = (uint64_t)kmax - (uint64_t)kmin + 1; + if (span > (uint64_t)INT64_MAX) { da_fits = false; break; } + da_key_range[k] = (int64_t)span; + if (da_key_range[k] <= 0) { da_fits = false; break; } + total_slots *= (uint64_t)da_key_range[k]; + if (total_slots > DA_MAX_COMPOSITE_SLOTS) da_fits = false; + } + } + + if (da_fits) { + /* Compute which accumulator arrays we actually need */ + uint8_t need_flags = DA_NEED_COUNT; /* always need count */ + for (uint8_t a = 0; a < n_aggs; a++) { + uint16_t aop = ext->agg_ops[a]; + if (aop == OP_SUM || aop == OP_AVG || aop == OP_FIRST || aop == OP_LAST) need_flags |= DA_NEED_SUM; + else if (aop == OP_STDDEV || aop == OP_STDDEV_POP || aop == OP_VAR || aop == OP_VAR_POP) + { need_flags |= DA_NEED_SUM; need_flags |= DA_NEED_SUMSQ; } + else if (aop == OP_MIN) need_flags |= DA_NEED_MIN; + else if (aop == OP_MAX) need_flags |= DA_NEED_MAX; + } + + /* Compute per-worker memory budget. Actual allocation is 1 union + * array per type, but MIN/MAX use conditional random writes that + * perform worse than radix-partitioned HT at high group counts. + * Weight MIN/MAX at 2x to keep those queries on the HT path. */ + uint32_t arrays_per_agg = 0; + if (need_flags & DA_NEED_SUM) arrays_per_agg += 1; + if (need_flags & DA_NEED_MIN) arrays_per_agg += 2; /* 2x: DA MIN slow at high cardinality */ + if (need_flags & DA_NEED_MAX) arrays_per_agg += 2; /* 2x: DA MAX slow at high cardinality */ + if (need_flags & DA_NEED_SUMSQ) arrays_per_agg += 1; + uint64_t per_worker = total_slots * (arrays_per_agg * n_aggs + 1u) * 8u; + if (per_worker > DA_PER_WORKER_MAX) + da_fits = false; + } + + if (da_fits) { + /* Recompute need_flags (da_fits may have changed scope) */ + uint8_t need_flags = DA_NEED_COUNT; + bool all_sum = true; + for (uint8_t a = 0; a < n_aggs; a++) { + uint16_t aop = ext->agg_ops[a]; + if (aop == OP_SUM || aop == OP_AVG || aop == OP_FIRST || aop == OP_LAST) need_flags |= DA_NEED_SUM; + else if (aop == OP_STDDEV || aop == OP_STDDEV_POP || aop == OP_VAR || aop == OP_VAR_POP) + { need_flags |= DA_NEED_SUM; need_flags |= DA_NEED_SUMSQ; } + else if (aop == OP_MIN) need_flags |= DA_NEED_MIN; + else if (aop == OP_MAX) need_flags |= DA_NEED_MAX; + if (aop != OP_SUM && aop != OP_AVG && aop != OP_COUNT) + all_sum = false; + } + + /* Compute strides: stride[k] = product of ranges[k+1..n_keys-1] + * Guard against overflow: if any product exceeds INT64_MAX, + * fall through to HT path. */ + bool stride_overflow = false; + for (uint8_t k = 0; k < n_keys; k++) { + int64_t s = 1; + for (uint8_t j = k + 1; j < n_keys; j++) { + if (da_key_range[j] != 0 && s > INT64_MAX / da_key_range[j]) { + stride_overflow = true; break; + } + s *= da_key_range[j]; + } + if (stride_overflow) break; + da_key_stride[k] = s; + } + if (stride_overflow) da_fits = false; + + uint32_t n_slots = (uint32_t)total_slots; + size_t total = (size_t)n_slots * n_aggs; + + void* agg_ptrs[vla_aggs]; + int8_t agg_types[vla_aggs]; + uint32_t agg_f64_mask = 0; + for (uint8_t a = 0; a < n_aggs; a++) { + if (agg_vecs[a]) { + agg_ptrs[a] = ray_data(agg_vecs[a]); + agg_types[a] = agg_vecs[a]->type; + if (agg_vecs[a]->type == RAY_F64) + agg_f64_mask |= (1u << a); + } else { + agg_ptrs[a] = NULL; + agg_types[a] = 0; + } + } + + ray_pool_t* da_pool = ray_pool_get(); + uint32_t da_n_workers = (da_pool && nrows >= RAY_PARALLEL_THRESHOLD) + ? ray_pool_total_workers(da_pool) : 1; + + /* Check memory budget — need one accumulator set per worker. + * Weight MIN/MAX at 2x in budget (same as eligibility check) to + * keep MIN/MAX-heavy queries on the faster radix-HT path. */ + uint32_t arrays_per_agg = 0; + if (need_flags & DA_NEED_SUM) arrays_per_agg += 1; + if (need_flags & DA_NEED_MIN) arrays_per_agg += 2; + if (need_flags & DA_NEED_MAX) arrays_per_agg += 2; + if (need_flags & DA_NEED_SUMSQ) arrays_per_agg += 1; + uint64_t per_worker_bytes = (uint64_t)n_slots * (arrays_per_agg * n_aggs + 1u) * 8u; + if ((uint64_t)da_n_workers * per_worker_bytes > DA_MEM_BUDGET) + da_n_workers = 1; + + ray_t* accums_hdr; + da_accum_t* accums = (da_accum_t*)scratch_calloc(&accums_hdr, + da_n_workers * sizeof(da_accum_t)); + if (!accums) goto ht_path; + + bool alloc_ok = true; + for (uint32_t w = 0; w < da_n_workers; w++) { + if (need_flags & DA_NEED_SUM) { + accums[w].sum = (da_val_t*)scratch_calloc(&accums[w]._h_sum, + total * sizeof(da_val_t)); + if (!accums[w].sum) { alloc_ok = false; break; } + } + if (need_flags & DA_NEED_SUMSQ) { + accums[w].sumsq_f64 = (double*)scratch_calloc(&accums[w]._h_sumsq, + total * sizeof(double)); + if (!accums[w].sumsq_f64) { alloc_ok = false; break; } + } + if (need_flags & DA_NEED_MIN) { + accums[w].min_val = (da_val_t*)scratch_alloc(&accums[w]._h_min, + total * sizeof(da_val_t)); + if (!accums[w].min_val) { alloc_ok = false; break; } + for (size_t i = 0; i < total; i++) { + uint8_t a = (uint8_t)(i % n_aggs); + if (agg_types[a] == RAY_F64) accums[w].min_val[i].f = DBL_MAX; + else accums[w].min_val[i].i = INT64_MAX; + } + } + if (need_flags & DA_NEED_MAX) { + accums[w].max_val = (da_val_t*)scratch_alloc(&accums[w]._h_max, + total * sizeof(da_val_t)); + if (!accums[w].max_val) { alloc_ok = false; break; } + for (size_t i = 0; i < total; i++) { + uint8_t a = (uint8_t)(i % n_aggs); + if (agg_types[a] == RAY_F64) accums[w].max_val[i].f = -DBL_MAX; + else accums[w].max_val[i].i = INT64_MIN; + } + } + accums[w].count = (int64_t*)scratch_calloc(&accums[w]._h_count, + n_slots * sizeof(int64_t)); + if (!accums[w].count) { alloc_ok = false; break; } + } + if (!alloc_ok) { + for (uint32_t w = 0; w < da_n_workers; w++) + da_accum_free(&accums[w]); + scratch_free(accums_hdr); + goto ht_path; + } + + + /* Pre-compute per-key element sizes for fast DA reads */ + uint8_t da_key_esz[n_keys]; + for (uint8_t k = 0; k < n_keys; k++) + da_key_esz[k] = ray_sym_elem_size(key_types[k], key_attrs[k]); + + da_ctx_t da_ctx = { + .accums = accums, + .n_accums = da_n_workers, + .key_ptrs = key_data, + .key_types = key_types, + .key_attrs = key_attrs, + .key_esz = da_key_esz, + .key_mins = da_key_min, + .key_strides = da_key_stride, + .n_keys = n_keys, + .agg_ptrs = agg_ptrs, + .agg_types = agg_types, + .agg_ops = ext->agg_ops, + .n_aggs = n_aggs, + .need_flags = need_flags, + .agg_f64_mask = agg_f64_mask, + .all_sum = all_sum, + .n_slots = n_slots, + .match_idx = match_idx, + }; + + if (da_n_workers > 1) + ray_pool_dispatch(da_pool, da_accum_fn, &da_ctx, n_scan); + else + da_accum_fn(&da_ctx, 0, 0, n_scan); + + /* Merge target is always accums[0] */ + da_accum_t* merged = &accums[0]; + + /* Check if any agg is FIRST/LAST (needs ordered per-worker merge) */ + bool has_first_last = false; + for (uint8_t a = 0; a < n_aggs; a++) { + uint16_t aop = ext->agg_ops[a]; + if (aop == OP_FIRST || aop == OP_LAST) { has_first_last = true; break; } + } + + /* Merge per-worker accumulators into accums[0]. + * FIRST/LAST require worker-order-dependent merge (sequential). + * All other ops are commutative — dispatch over disjoint slot + * ranges for parallel merge. */ + if (has_first_last) { + for (uint32_t w = 1; w < da_n_workers; w++) { + da_accum_t* wa = &accums[w]; + if (need_flags & DA_NEED_SUMSQ) { + for (size_t i = 0; i < total; i++) + merged->sumsq_f64[i] += wa->sumsq_f64[i]; + } + if (need_flags & DA_NEED_SUM) { + for (uint32_t s = 0; s < n_slots; s++) { + size_t base = (size_t)s * n_aggs; + for (uint8_t a = 0; a < n_aggs; a++) { + size_t idx = base + a; + uint16_t aop = ext->agg_ops[a]; + if (aop == OP_SUM || aop == OP_AVG || aop == OP_STDDEV || aop == OP_STDDEV_POP || aop == OP_VAR || aop == OP_VAR_POP) { + if (agg_types[a] == RAY_F64) merged->sum[idx].f += wa->sum[idx].f; + else merged->sum[idx].i += wa->sum[idx].i; + } else if (aop == OP_FIRST) { + if (merged->count[s] == 0 && wa->count[s] > 0) + merged->sum[idx] = wa->sum[idx]; + } else if (aop == OP_LAST) { + if (wa->count[s] > 0) + merged->sum[idx] = wa->sum[idx]; + } + } + } + } + if (need_flags & DA_NEED_MIN) { + for (size_t i = 0; i < total; i++) { + uint8_t a = (uint8_t)(i % n_aggs); + if (agg_types[a] == RAY_F64) { + if (wa->min_val[i].f < merged->min_val[i].f) + merged->min_val[i].f = wa->min_val[i].f; + } else { + if (wa->min_val[i].i < merged->min_val[i].i) + merged->min_val[i].i = wa->min_val[i].i; + } + } + } + if (need_flags & DA_NEED_MAX) { + for (size_t i = 0; i < total; i++) { + uint8_t a = (uint8_t)(i % n_aggs); + if (agg_types[a] == RAY_F64) { + if (wa->max_val[i].f > merged->max_val[i].f) + merged->max_val[i].f = wa->max_val[i].f; + } else { + if (wa->max_val[i].i > merged->max_val[i].i) + merged->max_val[i].i = wa->max_val[i].i; + } + } + } + for (uint32_t s = 0; s < n_slots; s++) + merged->count[s] += wa->count[s]; + } + } else if (da_n_workers > 1 && n_slots >= 1024 && da_pool) { + /* Parallel merge: dispatch over disjoint slot ranges */ + da_merge_ctx_t merge_ctx = { + .accums = accums, + .n_src_workers = da_n_workers, + .need_flags = need_flags, + .n_aggs = n_aggs, + .agg_types = agg_types, + .agg_ops = ext->agg_ops, + }; + ray_pool_dispatch(da_pool, da_merge_fn, &merge_ctx, (int64_t)n_slots); + } else { + /* Sequential merge for small slot counts */ + for (uint32_t w = 1; w < da_n_workers; w++) { + da_accum_t* wa = &accums[w]; + if (need_flags & DA_NEED_SUMSQ) { + for (size_t i = 0; i < total; i++) + merged->sumsq_f64[i] += wa->sumsq_f64[i]; + } + if (need_flags & DA_NEED_SUM) { + for (uint32_t s = 0; s < n_slots; s++) { + size_t base = (size_t)s * n_aggs; + for (uint8_t a = 0; a < n_aggs; a++) { + size_t idx = base + a; + uint16_t aop = ext->agg_ops[a]; + if (aop == OP_FIRST) { + if (merged->count[s] == 0 && wa->count[s] > 0) + merged->sum[idx] = wa->sum[idx]; + } else if (aop == OP_LAST) { + if (wa->count[s] > 0) + merged->sum[idx] = wa->sum[idx]; + } else if (agg_types[a] == RAY_F64) + merged->sum[idx].f += wa->sum[idx].f; + else + merged->sum[idx].i += wa->sum[idx].i; + } + } + } + if (need_flags & DA_NEED_MIN) { + for (size_t i = 0; i < total; i++) { + uint8_t a = (uint8_t)(i % n_aggs); + if (agg_types[a] == RAY_F64) { + if (wa->min_val[i].f < merged->min_val[i].f) + merged->min_val[i].f = wa->min_val[i].f; + } else { + if (wa->min_val[i].i < merged->min_val[i].i) + merged->min_val[i].i = wa->min_val[i].i; + } + } + } + if (need_flags & DA_NEED_MAX) { + for (size_t i = 0; i < total; i++) { + uint8_t a = (uint8_t)(i % n_aggs); + if (agg_types[a] == RAY_F64) { + if (wa->max_val[i].f > merged->max_val[i].f) + merged->max_val[i].f = wa->max_val[i].f; + } else { + if (wa->max_val[i].i > merged->max_val[i].i) + merged->max_val[i].i = wa->max_val[i].i; + } + } + } + for (uint32_t s = 0; s < n_slots; s++) + merged->count[s] += wa->count[s]; + } + } + + + + for (uint32_t w = 1; w < da_n_workers; w++) + da_accum_free(&accums[w]); + + da_val_t* da_sum = merged->sum; /* may be NULL if !DA_NEED_SUM */ + da_val_t* da_min_val = merged->min_val; /* may be NULL if !DA_NEED_MIN */ + da_val_t* da_max_val = merged->max_val; /* may be NULL if !DA_NEED_MAX */ + double* da_sumsq = merged->sumsq_f64; /* may be NULL if !DA_NEED_SUMSQ */ + int64_t* da_count = merged->count; + + uint32_t grp_count = 0; + for (uint32_t s = 0; s < n_slots; s++) + if (da_count[s] > 0) grp_count++; + + int64_t total_cols = n_keys + n_aggs; + ray_t* result = ray_table_new(total_cols); + if (!result || RAY_IS_ERR(result)) { + da_accum_free(&accums[0]); scratch_free(accums_hdr); + for (uint8_t a = 0; a < n_aggs; a++) + if (agg_owned[a] && agg_vecs[a]) ray_release(agg_vecs[a]); + for (uint8_t k = 0; k < n_keys; k++) + if (key_owned[k] && key_vecs[k]) ray_release(key_vecs[k]); + if (match_idx_block) ray_release(match_idx_block); + return result ? result : ray_error("oom", NULL); + } + + /* Key columns — decompose composite slot back to per-key values */ + for (uint8_t k = 0; k < n_keys; k++) { + ray_t* src_col = key_vecs[k]; + if (!src_col) continue; + ray_t* key_col = col_vec_new(src_col, (int64_t)grp_count); + if (!key_col || RAY_IS_ERR(key_col)) continue; + key_col->len = (int64_t)grp_count; + uint32_t gi = 0; + for (uint32_t s = 0; s < n_slots; s++) { + if (da_count[s] == 0) continue; + int64_t offset = ((int64_t)s / da_key_stride[k]) % da_key_range[k]; + int64_t key_val = da_key_min[k] + offset; + write_col_i64(ray_data(key_col), gi, key_val, src_col->type, key_col->attrs); + gi++; + } + ray_op_ext_t* key_ext = find_ext(g, ext->keys[k]->id); + int64_t name_id = key_ext ? key_ext->sym : (int64_t)k; + result = ray_table_add_col(result, name_id, key_col); + ray_release(key_col); + } + + /* Agg columns — compact sparse DA arrays into dense, then emit */ + size_t dense_total = (size_t)grp_count * n_aggs; + ray_t *_h_dsum = NULL, *_h_dmin = NULL, *_h_dmax = NULL; + ray_t *_h_dsq = NULL, *_h_dcnt = NULL; + da_val_t* dense_sum = da_sum ? (da_val_t*)scratch_alloc(&_h_dsum, dense_total * sizeof(da_val_t)) : NULL; + da_val_t* dense_min_val = da_min_val ? (da_val_t*)scratch_alloc(&_h_dmin, dense_total * sizeof(da_val_t)) : NULL; + da_val_t* dense_max_val = da_max_val ? (da_val_t*)scratch_alloc(&_h_dmax, dense_total * sizeof(da_val_t)) : NULL; + double* dense_sumsq = da_sumsq ? (double*)scratch_alloc(&_h_dsq, dense_total * sizeof(double)) : NULL; + int64_t* dense_counts = (int64_t*)scratch_alloc(&_h_dcnt, grp_count * sizeof(int64_t)); + + uint32_t gi = 0; + for (uint32_t s = 0; s < n_slots; s++) { + if (da_count[s] == 0) continue; + dense_counts[gi] = da_count[s]; + for (uint8_t a = 0; a < n_aggs; a++) { + size_t si = (size_t)s * n_aggs + a; + size_t di = (size_t)gi * n_aggs + a; + if (dense_sum) dense_sum[di] = da_sum[si]; + if (dense_min_val) dense_min_val[di] = da_min_val[si]; + if (dense_max_val) dense_max_val[di] = da_max_val[si]; + if (dense_sumsq) dense_sumsq[di] = da_sumsq[si]; + } + gi++; + } + + emit_agg_columns(&result, g, ext, agg_vecs, grp_count, n_aggs, + (double*)dense_sum, (int64_t*)dense_sum, + (double*)dense_min_val, (double*)dense_max_val, + (int64_t*)dense_min_val, (int64_t*)dense_max_val, + dense_counts, agg_affine, dense_sumsq); + + scratch_free(_h_dsum); scratch_free(_h_dmin); + scratch_free(_h_dmax); + scratch_free(_h_dsq); scratch_free(_h_dcnt); + + da_accum_free(&accums[0]); scratch_free(accums_hdr); + for (uint8_t a = 0; a < n_aggs; a++) + if (agg_owned[a] && agg_vecs[a]) ray_release(agg_vecs[a]); + for (uint8_t k = 0; k < n_keys; k++) + if (key_owned[k] && key_vecs[k]) ray_release(key_vecs[k]); + if (match_idx_block) ray_release(match_idx_block); + return result; + } + } + +ht_path:; + /* Compute which accumulator arrays the HT needs based on agg ops. + * COUNT only reads group row's count field — no accumulator needed. */ + uint8_t ght_need = 0; + for (uint8_t a = 0; a < n_aggs; a++) { + uint16_t aop = ext->agg_ops[a]; + if (aop == OP_SUM || aop == OP_AVG || aop == OP_FIRST || aop == OP_LAST) + ght_need |= GHT_NEED_SUM; + if (aop == OP_STDDEV || aop == OP_STDDEV_POP || aop == OP_VAR || aop == OP_VAR_POP) + { ght_need |= GHT_NEED_SUM; ght_need |= GHT_NEED_SUMSQ; } + if (aop == OP_MIN) ght_need |= GHT_NEED_MIN; + if (aop == OP_MAX) ght_need |= GHT_NEED_MAX; + } + + /* RAY_STR keys still need the eval-level path (variable-width + * with a pool). RAY_GUID uses the wide-key row-indirection + * support in the layout; see ght_layout_t.wide_key_mask. */ + for (uint8_t k = 0; k < n_keys; k++) { + if (key_types[k] == RAY_STR) { + for (uint8_t kk = 0; kk < n_keys; kk++) + if (key_owned[kk] && key_vecs[kk]) ray_release(key_vecs[kk]); + for (uint8_t a = 0; a < n_aggs; a++) + if (agg_owned[a] && agg_vecs[a]) ray_release(agg_vecs[a]); + if (match_idx_block) ray_release(match_idx_block); + return ray_error("nyi", NULL); + } + } + + /* Compute row-layout: keys + agg values inline */ + ght_layout_t ght_layout = ght_compute_layout(n_keys, n_aggs, agg_vecs, ght_need, ext->agg_ops, key_types); + + /* Right-sized hash table: start small, rehash on load > 0.5 */ + uint32_t ht_cap = 256; + { + uint64_t target = (uint64_t)nrows < 65536 ? (uint64_t)nrows : 65536; + if (target < 256) target = 256; + while (ht_cap < target) ht_cap *= 2; + } + + /* Parallel path: radix-partitioned group-by */ + ray_pool_t* pool = ray_pool_get(); + uint32_t n_total = pool ? ray_pool_total_workers(pool) : 1; + + group_ht_t single_ht; + group_ht_t* final_ht = NULL; + ray_t* result = NULL; + + ray_t* radix_bufs_hdr = NULL; + radix_buf_t* radix_bufs = NULL; + ray_t* part_hts_hdr = NULL; + group_ht_t* part_hts = NULL; + + if (pool && nrows >= RAY_PARALLEL_THRESHOLD && n_total > 1) { + size_t n_bufs = (size_t)n_total * RADIX_P; + radix_bufs = (radix_buf_t*)scratch_calloc(&radix_bufs_hdr, + n_bufs * sizeof(radix_buf_t)); + if (!radix_bufs) goto sequential_fallback; + + /* Pre-size each buffer: 1.5x expected, capped so total ≤ 2 GB. + * Buffers grow on demand via radix_buf_push doubling. */ + uint32_t buf_init = (uint32_t)((uint64_t)nrows / (RADIX_P * n_total)); + if (buf_init < 64) buf_init = 64; + buf_init = buf_init + buf_init / 2; /* 1.5x headroom */ + uint16_t estride = ght_layout.entry_stride; + { + /* Cap: total pre-alloc ≤ 2 GB */ + size_t total_pre = (size_t)n_bufs * buf_init * estride; + if (total_pre > (size_t)2 << 30) { + buf_init = (uint32_t)(((size_t)2 << 30) / ((size_t)n_bufs * estride)); + if (buf_init < 64) buf_init = 64; + } + } + for (size_t i = 0; i < n_bufs; i++) { + radix_bufs[i].data = (char*)scratch_alloc( + &radix_bufs[i]._hdr, (size_t)buf_init * estride); + radix_bufs[i].count = 0; + radix_bufs[i].cap = buf_init; + } + + /* Compute per-key nullability — lets phase1 skip null checks on + * key columns with no nulls (the common case). */ + uint8_t p1_nullable = 0; + for (uint8_t k = 0; k < n_keys; k++) { + if (!key_vecs[k]) continue; + ray_t* src = (key_vecs[k]->attrs & RAY_ATTR_SLICE) + ? key_vecs[k]->slice_parent : key_vecs[k]; + if (src && (src->attrs & RAY_ATTR_HAS_NULLS)) + p1_nullable |= (uint8_t)(1u << k); + } + + /* Phase 1: parallel hash + copy keys/agg values into fat entries */ + radix_phase1_ctx_t p1ctx = { + .key_data = key_data, + .key_types = key_types, + .key_attrs = key_attrs, + .key_vecs = key_vecs, + .nullable_mask = p1_nullable, + .agg_vecs = agg_vecs, + .n_workers = n_total, + .bufs = radix_bufs, + .layout = ght_layout, + .match_idx = match_idx, + }; + ray_pool_dispatch(pool, radix_phase1_fn, &p1ctx, n_scan); + CHECK_CANCEL_GOTO(pool, cleanup); + + /* Check for OOM during phase 1 radix buffer growth */ + { + bool phase1_oom = false; + for (size_t i = 0; i < n_bufs; i++) { + if (radix_bufs[i].oom) { phase1_oom = true; break; } + } + if (phase1_oom) { + for (size_t i = 0; i < n_bufs; i++) scratch_free(radix_bufs[i]._hdr); + scratch_free(radix_bufs_hdr); + radix_bufs = NULL; + goto sequential_fallback; + } + } + + /* Phase 2: parallel per-partition aggregation (no column access) */ + part_hts = (group_ht_t*)scratch_calloc(&part_hts_hdr, + RADIX_P * sizeof(group_ht_t)); + if (!part_hts) { + for (size_t i = 0; i < n_bufs; i++) scratch_free(radix_bufs[i]._hdr); + scratch_free(radix_bufs_hdr); + radix_bufs = NULL; + goto sequential_fallback; + } + + radix_phase2_ctx_t p2ctx = { + .key_types = key_types, + .n_keys = n_keys, + .n_workers = n_total, + .bufs = radix_bufs, + .part_hts = part_hts, + .layout = ght_layout, + .key_data = key_data, + }; + ray_pool_dispatch_n(pool, radix_phase2_fn, &p2ctx, RADIX_P); + CHECK_CANCEL_GOTO(pool, cleanup); + + /* Prefix offsets */ + uint32_t part_offsets[RADIX_P + 1]; + part_offsets[0] = 0; + for (uint32_t p = 0; p < RADIX_P; p++) + part_offsets[p + 1] = part_offsets[p] + part_hts[p].grp_count; + uint32_t total_grps = part_offsets[RADIX_P]; + + /* Build result directly from partition HTs */ + int64_t total_cols = n_keys + n_aggs; + result = ray_table_new(total_cols); + if (!result || RAY_IS_ERR(result)) goto cleanup; + + /* Pre-allocate key columns */ + ray_t* key_cols[n_keys]; + char* key_dsts[n_keys]; + int8_t key_out_types[n_keys]; + uint8_t key_esizes[n_keys]; + for (uint8_t k = 0; k < n_keys; k++) { + ray_t* src_col = key_vecs[k]; + key_cols[k] = NULL; + key_dsts[k] = NULL; + key_out_types[k] = 0; + key_esizes[k] = 0; + if (!src_col) continue; + uint8_t esz = ray_sym_elem_size(src_col->type, src_col->attrs); + ray_t* new_col; + if (src_col->type == RAY_SYM) + new_col = ray_sym_vec_new(src_col->attrs & RAY_SYM_W_MASK, (int64_t)total_grps); + else + new_col = ray_vec_new(src_col->type, (int64_t)total_grps); + if (!new_col || RAY_IS_ERR(new_col)) continue; + new_col->len = (int64_t)total_grps; + key_cols[k] = new_col; + key_dsts[k] = (char*)ray_data(new_col); + key_out_types[k] = src_col->type; + key_esizes[k] = esz; + } + + /* Pre-allocate agg result vectors */ + agg_out_t agg_outs[n_aggs]; + ray_t* agg_cols[n_aggs]; + for (uint8_t a = 0; a < n_aggs; a++) { + uint16_t agg_op = ext->agg_ops[a]; + ray_t* agg_col = agg_vecs[a]; + bool is_f64 = agg_col && agg_col->type == RAY_F64; + int8_t out_type; + switch (agg_op) { + case OP_AVG: + case OP_STDDEV: case OP_STDDEV_POP: + case OP_VAR: case OP_VAR_POP: + out_type = RAY_F64; break; + case OP_COUNT: out_type = RAY_I64; break; + case OP_SUM: case OP_PROD: + out_type = is_f64 ? RAY_F64 : RAY_I64; break; + default: + out_type = agg_col ? agg_col->type : RAY_I64; break; + } + ray_t* new_col = ray_vec_new(out_type, (int64_t)total_grps); + if (!new_col || RAY_IS_ERR(new_col)) { + agg_cols[a] = NULL; + memset(&agg_outs[a], 0, sizeof(agg_outs[a])); + continue; + } + new_col->len = (int64_t)total_grps; + agg_cols[a] = new_col; + agg_outs[a] = (agg_out_t){ + .out_type = out_type, .src_f64 = is_f64, + .agg_op = agg_op, + .affine = agg_affine[a].enabled, + .bias_f64 = agg_affine[a].bias_f64, + .bias_i64 = agg_affine[a].bias_i64, + .dst = ray_data(new_col), + .vec = new_col, + }; + } + + /* Pre-allocate nullmaps for agg result vectors (parallel safety) */ + bool nullmap_prep_ok[n_aggs]; + for (uint8_t a = 0; a < n_aggs; a++) + nullmap_prep_ok[a] = agg_cols[a] && (grp_prepare_nullmap(agg_outs[a].vec) == RAY_OK); + + /* Pre-prepare nullmaps on output key columns for parallel null writes */ + for (uint8_t k = 0; k < n_keys; k++) + if (key_cols[k]) grp_prepare_nullmap(key_cols[k]); + + /* Phase 3: parallel key gather + agg result building from inline rows */ + { + radix_phase3_ctx_t p3ctx = { + .part_hts = part_hts, + .part_offsets = part_offsets, + .key_dsts = key_dsts, + .key_types = key_out_types, + .key_attrs = key_attrs, + .key_esizes = key_esizes, + .key_cols = key_cols, + .n_keys = n_keys, + .agg_outs = agg_outs, + .n_aggs = n_aggs, + .key_src_data = key_data, + }; + ray_pool_dispatch_n(pool, radix_phase3_fn, &p3ctx, RADIX_P); + } + + /* Fixup: if nullmap prep failed for any VAR/STDDEV agg, re-scan + * hash tables sequentially to ensure all null bits were set */ + for (uint8_t a = 0; a < n_aggs; a++) { + if (nullmap_prep_ok[a] || !agg_cols[a]) continue; + uint16_t op = agg_outs[a].agg_op; + if (op != OP_VAR && op != OP_VAR_POP && + op != OP_STDDEV && op != OP_STDDEV_POP) continue; + for (uint32_t p = 0; p < RADIX_P; p++) { + group_ht_t* ph = &part_hts[p]; + uint32_t gc = ph->grp_count; + uint32_t off = part_offsets[p]; + uint16_t rs = ph->layout.row_stride; + for (uint32_t gi = 0; gi < gc; gi++) { + const char* row = ph->rows + (size_t)gi * rs; + int64_t cnt = *(const int64_t*)(const void*)row; + bool insuf = (op == OP_VAR || op == OP_STDDEV) ? cnt <= 1 : cnt <= 0; + if (insuf) ray_vec_set_null(agg_outs[a].vec, off + gi, true); + } + } + } + + /* Finalize null flags after parallel execution */ + for (uint8_t a = 0; a < n_aggs; a++) { + if (!agg_cols[a]) continue; + grp_finalize_nulls(agg_outs[a].vec); + } + for (uint8_t k = 0; k < n_keys; k++) { + if (!key_cols[k]) continue; + grp_finalize_nulls(key_cols[k]); + } + + /* Add key columns to result */ + for (uint8_t k = 0; k < n_keys; k++) { + if (!key_cols[k]) continue; + ray_op_ext_t* key_ext = find_ext(g, ext->keys[k]->id); + int64_t name_id = key_ext ? key_ext->sym : k; + result = ray_table_add_col(result, name_id, key_cols[k]); + ray_release(key_cols[k]); + } + + /* Add agg columns to result */ + for (uint8_t a = 0; a < n_aggs; a++) { + if (!agg_cols[a]) continue; + uint16_t agg_op = ext->agg_ops[a]; + ray_op_ext_t* agg_ext = find_ext(g, ext->agg_ins[a]->id); + int64_t name_id; + if (agg_ext && agg_ext->base.opcode == OP_SCAN) { + ray_t* name_atom = ray_sym_str(agg_ext->sym); + const char* base = name_atom ? ray_str_ptr(name_atom) : NULL; + size_t blen = base ? ray_str_len(name_atom) : 0; + const char* sfx = ""; + size_t slen = 0; + switch (agg_op) { + case OP_SUM: sfx = "_sum"; slen = 4; break; + case OP_COUNT: sfx = "_count"; slen = 6; break; + case OP_AVG: sfx = "_mean"; slen = 5; break; + case OP_MIN: sfx = "_min"; slen = 4; break; + case OP_MAX: sfx = "_max"; slen = 4; break; + case OP_FIRST: sfx = "_first"; slen = 6; break; + case OP_LAST: sfx = "_last"; slen = 5; break; + case OP_STDDEV: sfx = "_stddev"; slen = 7; break; + case OP_STDDEV_POP: sfx = "_stddev_pop"; slen = 11; break; + case OP_VAR: sfx = "_var"; slen = 4; break; + case OP_VAR_POP: sfx = "_var_pop"; slen = 8; break; + } + char buf[256]; + ray_t* name_dyn_hdr = NULL; + char* nbp = buf; + size_t nbc = sizeof(buf); + if (base && blen + slen >= sizeof(buf)) { + nbp = (char*)scratch_alloc(&name_dyn_hdr, blen + slen + 1); + if (nbp) nbc = blen + slen + 1; + else { nbp = buf; nbc = sizeof(buf); } + } + if (base && blen + slen < nbc) { + memcpy(nbp, base, blen); + memcpy(nbp + blen, sfx, slen); + name_id = ray_sym_intern(nbp, blen + slen); + } else { + name_id = agg_ext->sym; + } + scratch_free(name_dyn_hdr); + } else { + name_id = (int64_t)(n_keys + a); + } + result = ray_table_add_col(result, name_id, agg_cols[a]); + ray_release(agg_cols[a]); + } + + goto cleanup; + } + +sequential_fallback:; + /* Sequential path using row-layout HT */ + if (!group_ht_init(&single_ht, ht_cap, &ght_layout)) { + result = ray_error("oom", NULL); + goto cleanup; + } + group_rows_range(&single_ht, key_data, key_types, key_attrs, key_vecs, agg_vecs, + 0, n_scan, match_idx); + final_ht = &single_ht; + if (ray_interrupted()) { result = ray_error("cancel", "interrupted"); goto cleanup; } + if (single_ht.oom) { result = ray_error("oom", NULL); goto cleanup; } + + /* Build result from sequential HT (inline row layout) */ + { + uint32_t grp_count = final_ht->grp_count; + const ght_layout_t* ly = &final_ht->layout; + int64_t total_cols = n_keys + n_aggs; + result = ray_table_new(total_cols); + if (!result || RAY_IS_ERR(result)) goto cleanup; + + /* Key columns: read from inline group rows, narrow to original type. + * Wide keys store a source row index in the HT slot; resolve it + * through the original key column (key_data[k]) and copy bytes. */ + for (uint8_t k = 0; k < n_keys; k++) { + ray_t* src_col = key_vecs[k]; + if (!src_col) continue; + uint8_t esz = col_esz(src_col); + int8_t kt = src_col->type; + + ray_t* new_col = col_vec_new(src_col, (int64_t)grp_count); + if (!new_col || RAY_IS_ERR(new_col)) continue; + new_col->len = (int64_t)grp_count; + + bool is_wide = (ly->wide_key_mask & (1u << k)) != 0; + const char* src_base = is_wide ? (const char*)key_data[k] : NULL; + + for (uint32_t gi = 0; gi < grp_count; gi++) { + const char* row = final_ht->rows + (size_t)gi * ly->row_stride; + const int64_t* rkeys = (const int64_t*)(row + 8); + int64_t kv = rkeys[k]; + int64_t null_mask = rkeys[n_keys]; + if (null_mask & (int64_t)(1u << k)) { + ray_vec_set_null(new_col, (int64_t)gi, true); + continue; + } + if (is_wide) { + char* dst = (char*)ray_data(new_col) + (size_t)gi * esz; + memcpy(dst, src_base + (size_t)kv * esz, esz); + } else if (kt == RAY_F64) { + char* dst = (char*)ray_data(new_col) + (size_t)gi * esz; + memcpy(dst, &kv, 8); + } else { + write_col_i64(ray_data(new_col), gi, kv, kt, new_col->attrs); + } + } + + ray_op_ext_t* key_ext = find_ext(g, ext->keys[k]->id); + int64_t name_id = key_ext ? key_ext->sym : k; + result = ray_table_add_col(result, name_id, new_col); + ray_release(new_col); + } + + /* Agg columns from inline accumulators */ + for (uint8_t a = 0; a < n_aggs; a++) { + uint16_t agg_op = ext->agg_ops[a]; + ray_t* agg_col = agg_vecs[a]; + bool is_f64 = agg_col && agg_col->type == RAY_F64; + int8_t out_type; + switch (agg_op) { + case OP_AVG: + case OP_STDDEV: case OP_STDDEV_POP: + case OP_VAR: case OP_VAR_POP: + out_type = RAY_F64; break; + case OP_COUNT: out_type = RAY_I64; break; + case OP_SUM: case OP_PROD: + out_type = is_f64 ? RAY_F64 : RAY_I64; break; + default: + out_type = agg_col ? agg_col->type : RAY_I64; break; + } + ray_t* new_col = ray_vec_new(out_type, (int64_t)grp_count); + if (!new_col || RAY_IS_ERR(new_col)) continue; + new_col->len = (int64_t)grp_count; + + int8_t s = ly->agg_val_slot[a]; /* unified accum slot */ + for (uint32_t gi = 0; gi < grp_count; gi++) { + const char* row = final_ht->rows + (size_t)gi * ly->row_stride; + int64_t cnt = *(const int64_t*)(const void*)row; + if (out_type == RAY_F64) { + double v; + switch (agg_op) { + case OP_SUM: + v = is_f64 ? ROW_RD_F64(row, ly->off_sum, s) + : (double)ROW_RD_I64(row, ly->off_sum, s); + if (agg_affine[a].enabled) v += agg_affine[a].bias_f64 * cnt; + break; + case OP_AVG: + v = is_f64 ? ROW_RD_F64(row, ly->off_sum, s) / cnt + : (double)ROW_RD_I64(row, ly->off_sum, s) / cnt; + if (agg_affine[a].enabled) v += agg_affine[a].bias_f64; + break; + case OP_MIN: + v = is_f64 ? ROW_RD_F64(row, ly->off_min, s) + : (double)ROW_RD_I64(row, ly->off_min, s); + break; + case OP_MAX: + v = is_f64 ? ROW_RD_F64(row, ly->off_max, s) + : (double)ROW_RD_I64(row, ly->off_max, s); + break; + case OP_FIRST: case OP_LAST: + v = is_f64 ? ROW_RD_F64(row, ly->off_sum, s) + : (double)ROW_RD_I64(row, ly->off_sum, s); + break; + case OP_VAR: case OP_VAR_POP: + case OP_STDDEV: case OP_STDDEV_POP: { + bool insuf = (agg_op == OP_VAR || agg_op == OP_STDDEV) ? cnt <= 1 : cnt <= 0; + if (insuf) { v = 0.0; ray_vec_set_null(new_col, gi, true); break; } + double sum_val = is_f64 ? ROW_RD_F64(row, ly->off_sum, s) + : (double)ROW_RD_I64(row, ly->off_sum, s); + double sq_val = ly->off_sumsq ? ROW_RD_F64(row, ly->off_sumsq, s) : 0.0; + double mean = sum_val / cnt; + double var_pop = sq_val / cnt - mean * mean; + if (var_pop < 0) var_pop = 0; + if (agg_op == OP_VAR_POP) v = var_pop; + else if (agg_op == OP_VAR) v = var_pop * cnt / (cnt - 1); + else if (agg_op == OP_STDDEV_POP) v = sqrt(var_pop); + else v = sqrt(var_pop * cnt / (cnt - 1)); + break; + } + default: v = 0.0; break; + } + ((double*)ray_data(new_col))[gi] = v; + } else { + int64_t v; + switch (agg_op) { + case OP_SUM: + v = ROW_RD_I64(row, ly->off_sum, s); + if (agg_affine[a].enabled) v += agg_affine[a].bias_i64 * cnt; + break; + case OP_COUNT: v = cnt; break; + case OP_MIN: v = ROW_RD_I64(row, ly->off_min, s); break; + case OP_MAX: v = ROW_RD_I64(row, ly->off_max, s); break; + case OP_FIRST: case OP_LAST: v = ROW_RD_I64(row, ly->off_sum, s); break; + default: v = 0; break; + } + ((int64_t*)ray_data(new_col))[gi] = v; + } + } + + /* Generate unique column name */ + ray_op_ext_t* agg_ext = find_ext(g, ext->agg_ins[a]->id); + int64_t name_id; + if (agg_ext && agg_ext->base.opcode == OP_SCAN) { + ray_t* name_atom = ray_sym_str(agg_ext->sym); + const char* base = name_atom ? ray_str_ptr(name_atom) : NULL; + size_t blen = base ? ray_str_len(name_atom) : 0; + const char* sfx = ""; + size_t slen = 0; + switch (agg_op) { + case OP_SUM: sfx = "_sum"; slen = 4; break; + case OP_COUNT: sfx = "_count"; slen = 6; break; + case OP_AVG: sfx = "_mean"; slen = 5; break; + case OP_MIN: sfx = "_min"; slen = 4; break; + case OP_MAX: sfx = "_max"; slen = 4; break; + case OP_FIRST: sfx = "_first"; slen = 6; break; + case OP_LAST: sfx = "_last"; slen = 5; break; + case OP_STDDEV: sfx = "_stddev"; slen = 7; break; + case OP_STDDEV_POP: sfx = "_stddev_pop"; slen = 11; break; + case OP_VAR: sfx = "_var"; slen = 4; break; + case OP_VAR_POP: sfx = "_var_pop"; slen = 8; break; + } + char buf[256]; + if (base && blen + slen < sizeof(buf)) { + memcpy(buf, base, blen); + memcpy(buf + blen, sfx, slen); + name_id = ray_sym_intern(buf, blen + slen); + } else { + name_id = agg_ext->sym; + } + } else { + /* Expression agg input — synthetic name like "_e0_sum" */ + char nbuf[32]; + int np = 0; + nbuf[np++] = '_'; nbuf[np++] = 'e'; + /* Multi-digit agg index */ + { uint8_t v = a; char dig[3]; int nd = 0; + do { dig[nd++] = (char)('0' + v % 10); v /= 10; } while (v); + while (nd--) nbuf[np++] = dig[nd]; } + const char* nsfx = ""; + size_t nslen = 0; + switch (agg_op) { + case OP_SUM: nsfx = "_sum"; nslen = 4; break; + case OP_COUNT: nsfx = "_count"; nslen = 6; break; + case OP_AVG: nsfx = "_mean"; nslen = 5; break; + case OP_MIN: nsfx = "_min"; nslen = 4; break; + case OP_MAX: nsfx = "_max"; nslen = 4; break; + case OP_FIRST: nsfx = "_first"; nslen = 6; break; + case OP_LAST: nsfx = "_last"; nslen = 5; break; + case OP_STDDEV: nsfx = "_stddev"; nslen = 7; break; + case OP_STDDEV_POP: nsfx = "_stddev_pop"; nslen = 11; break; + case OP_VAR: nsfx = "_var"; nslen = 4; break; + case OP_VAR_POP: nsfx = "_var_pop"; nslen = 8; break; + } + memcpy(nbuf + np, nsfx, nslen); + name_id = ray_sym_intern(nbuf, (size_t)np + nslen); + } + result = ray_table_add_col(result, name_id, new_col); + ray_release(new_col); + } + } + +cleanup: + if (final_ht == &single_ht) { + group_ht_free(&single_ht); + } + if (radix_bufs) { + size_t n_bufs = (size_t)n_total * RADIX_P; + for (size_t i = 0; i < n_bufs; i++) scratch_free(radix_bufs[i]._hdr); + scratch_free(radix_bufs_hdr); + } + if (part_hts) { + for (uint32_t p = 0; p < RADIX_P; p++) { + if (part_hts[p].rows) group_ht_free(&part_hts[p]); + } + scratch_free(part_hts_hdr); + } + for (uint8_t a = 0; a < n_aggs; a++) + if (agg_owned[a] && agg_vecs[a]) ray_release(agg_vecs[a]); + for (uint8_t k = 0; k < n_keys; k++) + if (key_owned[k] && key_vecs[k]) ray_release(key_vecs[k]); + if (match_idx_block) ray_release(match_idx_block); + + return result; +} + +/* -------------------------------------------------------------------------- + * exec_group_per_partition — per-partition GROUP BY with merge + * + * Runs exec_group on each partition independently (zero-copy mmap segments), + * then merges the small partial results via a second exec_group pass. + * + * Merge ops: SUM→SUM, COUNT→SUM, MIN→MIN, MAX→MAX, FIRST→FIRST, LAST→LAST. + * AVG: decomposed into SUM+COUNT per partition, merged, then divided. + * STDDEV/VAR: decomposed into SUM(x)+SUM(x²)+COUNT(x) per partition, + * merged with SUM, then final variance/stddev computed from merged totals. + * + * Returns NULL if any step fails (caller falls through to concat path). + * -------------------------------------------------------------------------- */ +static ray_t* __attribute__((noinline)) +exec_group_per_partition(ray_t* parted_tbl, ray_op_ext_t* ext, + int32_t n_parts, const int64_t* key_syms, + const int64_t* agg_syms, int has_avg, + int has_stddev, int64_t group_limit) { + + uint8_t n_keys = ext->n_keys; + uint8_t n_aggs = ext->n_aggs; + + /* Guard: fixed-size arrays below cap at 24 agg ops. + * Each AVG adds 1 extra (COUNT), each STDDEV/VAR adds 2 (SUM_SQ + COUNT). + * n_aggs + n_avg + 2*n_std must stay within 24. */ + if (n_aggs > 8 || n_keys > 8) return NULL; + + /* Identify MAPCOMMON vs PARTED keys. MAPCOMMON keys are constant + * within a partition, so they are excluded from per-partition GROUP BY + * and reconstructed after concat. */ + uint8_t n_mc_keys = 0; + int64_t mc_sym_ids[8]; + uint8_t n_part_keys = 0; + int64_t pk_syms[8]; /* non-MAPCOMMON key sym IDs */ + + for (uint8_t k = 0; k < n_keys; k++) { + ray_t* pcol = ray_table_get_col(parted_tbl, key_syms[k]); + if (pcol && pcol->type == RAY_MAPCOMMON) { + mc_sym_ids[n_mc_keys++] = key_syms[k]; + } else { + pk_syms[n_part_keys++] = key_syms[k]; + } + } + + /* LIMIT pushdown: when all GROUP BY keys are MAPCOMMON (n_part_keys==0), + * each partition produces exactly 1 group. Limit the partition loop. */ + if (group_limit > 0 && n_part_keys == 0 && group_limit < n_parts) + n_parts = (int32_t)group_limit; + + /* Decomposition: AVG(x) → SUM(x) + COUNT(x). + * STDDEV/VAR(x) → SUM(x) + SUM(x²) + COUNT(x). + * Build per-partition agg_ops with decomposed ops, then merge ops. */ + uint16_t part_ops[24]; /* per-partition agg ops */ + uint16_t merge_ops[24]; /* merge agg ops */ + uint8_t avg_idx[8]; /* which original agg slots are AVG */ + uint8_t std_idx[8]; /* which original agg slots are STDDEV/VAR */ + uint16_t std_orig_op[8]; /* original op for each std slot */ + uint8_t n_avg = 0; + uint8_t n_std = 0; + uint8_t part_n_aggs = n_aggs; + /* stddev_needs_sq[a]: index into part_ops for the SUM(x²) slot */ + uint8_t std_sq_slot[8]; + uint8_t std_cnt_slot[8]; + + for (uint8_t a = 0; a < n_aggs; a++) { + uint16_t aop = ext->agg_ops[a]; + if (aop == OP_AVG) { + part_ops[a] = OP_SUM; /* partition: compute SUM */ + avg_idx[n_avg++] = a; + } else if (aop == OP_STDDEV || aop == OP_STDDEV_POP || + aop == OP_VAR || aop == OP_VAR_POP) { + part_ops[a] = OP_SUM; /* partition: compute SUM(x) */ + std_orig_op[n_std] = aop; + std_idx[n_std++] = a; + } else { + part_ops[a] = aop; + } + } + /* Guard: total decomposed slots must fit */ + if (n_aggs + n_avg + 2 * n_std > 24) return NULL; + + /* Append SUM(x²) for each STDDEV/VAR slot */ + for (uint8_t i = 0; i < n_std; i++) { + std_sq_slot[i] = part_n_aggs; + part_ops[part_n_aggs++] = OP_SUM; /* SUM(x²) */ + } + /* Append COUNT for each AVG column */ + for (uint8_t i = 0; i < n_avg; i++) + part_ops[part_n_aggs++] = OP_COUNT; + /* Append COUNT for each STDDEV/VAR column */ + for (uint8_t i = 0; i < n_std; i++) { + std_cnt_slot[i] = part_n_aggs; + part_ops[part_n_aggs++] = OP_COUNT; + } + + /* Merge ops: SUM→SUM, COUNT→SUM, MIN→MIN, MAX→MAX, + * FIRST→FIRST, LAST→LAST, all appended slots → SUM */ + for (uint8_t a = 0; a < part_n_aggs; a++) { + merge_ops[a] = part_ops[a]; + if (merge_ops[a] == OP_COUNT) merge_ops[a] = OP_SUM; + } + + /* Agg input syms for the decomposed ops. + * AVG's COUNT uses same input column as the AVG itself. + * STDDEV's SUM(x²) and COUNT use same input column as the STDDEV. */ + int64_t part_agg_syms[24]; + /* Flag: slot needs x*x graph node (for SUM(x²)) */ + int part_needs_sq[24]; + memset(part_needs_sq, 0, sizeof(part_needs_sq)); + + for (uint8_t a = 0; a < n_aggs; a++) + part_agg_syms[a] = agg_syms[a]; + /* SUM(x²) slots for STDDEV/VAR */ + for (uint8_t i = 0; i < n_std; i++) { + part_agg_syms[std_sq_slot[i]] = agg_syms[std_idx[i]]; + part_needs_sq[std_sq_slot[i]] = 1; + } + /* COUNT slots for AVG */ + for (uint8_t i = 0; i < n_avg; i++) + part_agg_syms[n_aggs + n_std + i] = agg_syms[avg_idx[i]]; + /* COUNT slots for STDDEV/VAR */ + for (uint8_t i = 0; i < n_std; i++) + part_agg_syms[std_cnt_slot[i]] = agg_syms[std_idx[i]]; + + /* ---- Batched incremental merge ---- + * Process partitions in batches of MERGE_BATCH. After each batch: + * Phase 1: exec_group each partition in batch → batch_partials[] + * Phase 2: concat (running + batch_partials + MAPCOMMON) → merge_tbl + * Phase 3: merge GROUP BY → new running + * Bounds peak memory to O(MERGE_BATCH × groups_per_partition). */ +#define MERGE_BATCH 8 + + /* Capture agg column name IDs from first partition result */ + int64_t agg_name_ids[24]; + int agg_names_captured = 0; + + ray_t* running = NULL; + ray_t* merge_tbl = NULL; /* last merge table (for column name fixup) */ + + for (int32_t batch_start = 0; batch_start < n_parts; + batch_start += MERGE_BATCH) { + + int32_t batch_end = batch_start + MERGE_BATCH; + if (batch_end > n_parts) batch_end = n_parts; + int32_t batch_n = batch_end - batch_start; + + /* Phase 1: exec_group each partition in this batch */ + ray_t* bp[MERGE_BATCH]; + memset(bp, 0, sizeof(bp)); + + for (int32_t bi = 0; bi < batch_n; bi++) { + int32_t p = batch_start + bi; + + /* Collect unique agg input sym IDs (avoid duplicate columns) */ + int64_t unique_agg[24]; + int n_unique_agg = 0; + for (uint8_t a = 0; a < part_n_aggs; a++) { + int dup = 0; + for (int j = 0; j < n_unique_agg; j++) + if (unique_agg[j] == part_agg_syms[a]) { dup = 1; break; } + if (!dup) { + for (uint8_t k = 0; k < n_keys; k++) + if (key_syms[k] == part_agg_syms[a]) { dup = 1; break; } + if (!dup) unique_agg[n_unique_agg++] = part_agg_syms[a]; + } + } + + ray_t* sub = ray_table_new((int64_t)(n_part_keys + n_unique_agg)); + if (!sub || RAY_IS_ERR(sub)) goto batch_fail; + + for (uint8_t k = 0; k < n_part_keys; k++) { + ray_t* pcol = ray_table_get_col(parted_tbl, pk_syms[k]); + if (!pcol || !RAY_IS_PARTED(pcol->type)) { + ray_release(sub); goto batch_fail; + } + ray_t* seg = ((ray_t**)ray_data(pcol))[p]; + if (!seg) { ray_release(sub); goto batch_fail; } + ray_retain(seg); + sub = ray_table_add_col(sub, pk_syms[k], seg); + ray_release(seg); + } + for (int j = 0; j < n_unique_agg; j++) { + ray_t* pcol = ray_table_get_col(parted_tbl, unique_agg[j]); + if (!pcol || !RAY_IS_PARTED(pcol->type)) { + ray_release(sub); goto batch_fail; + } + ray_t* seg = ((ray_t**)ray_data(pcol))[p]; + if (!seg) { ray_release(sub); goto batch_fail; } + ray_retain(seg); + sub = ray_table_add_col(sub, unique_agg[j], seg); + ray_release(seg); + } + + ray_graph_t* pg = ray_graph_new(sub); + if (!pg) { ray_release(sub); goto batch_fail; } + + ray_op_t* pkeys[8]; + for (uint8_t k = 0; k < n_part_keys; k++) { + ray_t* sym_atom = ray_sym_str(pk_syms[k]); + pkeys[k] = ray_scan(pg, ray_str_ptr(sym_atom)); + } + ray_op_t* pagg_ins[24]; + for (uint8_t a = 0; a < part_n_aggs; a++) { + ray_t* sym_atom = ray_sym_str(part_agg_syms[a]); + pagg_ins[a] = ray_scan(pg, ray_str_ptr(sym_atom)); + } + for (uint8_t j = 0; j < n_std; j++) { + uint8_t sq = std_sq_slot[j]; + ray_op_t* x = pagg_ins[sq]; + pagg_ins[sq] = ray_mul(pg, x, x); + } + + ray_op_t* proot = ray_group(pg, pkeys, n_part_keys, + part_ops, pagg_ins, part_n_aggs); + proot = ray_optimize(pg, proot); + bp[bi] = ray_execute(pg, proot); + ray_graph_free(pg); + ray_release(sub); + + if (!bp[bi] || RAY_IS_ERR(bp[bi])) goto batch_fail; + + /* Capture agg column name IDs once (all partials share names) */ + if (!agg_names_captured) { + for (uint8_t a = 0; a < part_n_aggs; a++) + agg_name_ids[a] = ray_table_col_name( + bp[bi], (int64_t)n_part_keys + a); + agg_names_captured = 1; + } + } + + /* Phase 2: concat (running + batch_partials + MAPCOMMON) */ + int64_t mrows = running ? ray_table_nrows(running) : 0; + for (int32_t i = 0; i < batch_n; i++) + mrows += ray_table_nrows(bp[i]); + + if (merge_tbl) { ray_release(merge_tbl); merge_tbl = NULL; } + merge_tbl = ray_table_new((int64_t)(n_keys + part_n_aggs)); + if (!merge_tbl || RAY_IS_ERR(merge_tbl)) { + merge_tbl = NULL; goto batch_fail; + } + + /* Key columns */ + for (uint8_t k = 0; k < n_keys; k++) { + int is_mc = 0; + for (uint8_t m = 0; m < n_mc_keys; m++) + if (mc_sym_ids[m] == key_syms[k]) { is_mc = 1; break; } + + /* Type reference for column allocation */ + ray_t* tref = NULL; + if (running) { + tref = ray_table_get_col(running, key_syms[k]); + } else if (is_mc) { + ray_t* mc_col = ray_table_get_col(parted_tbl, key_syms[k]); + tref = ((ray_t**)ray_data(mc_col))[0]; + } else { + tref = ray_table_get_col(bp[0], key_syms[k]); + } + if (!tref) goto batch_fail; + + size_t esz = (size_t)col_esz(tref); + ray_t* flat = col_vec_new(tref, mrows); + if (!flat || RAY_IS_ERR(flat)) goto batch_fail; + flat->len = mrows; + char* out = (char*)ray_data(flat); + int64_t off = 0; + + /* Copy from running result */ + if (running) { + ray_t* rc = ray_table_get_col(running, key_syms[k]); + if (rc && rc->len > 0) { + memcpy(out, ray_data(rc), (size_t)rc->len * esz); + off = rc->len; + } + } + + /* Copy from batch partials */ + for (int32_t i = 0; i < batch_n; i++) { + int64_t pnrows = ray_table_nrows(bp[i]); + if (is_mc) { + /* MAPCOMMON: replicate this partition's key value */ + int32_t p = batch_start + i; + ray_t* mc_col = ray_table_get_col(parted_tbl, key_syms[k]); + ray_t* mc_kv = ((ray_t**)ray_data(mc_col))[0]; + const char* kdata = (const char*)ray_data(mc_kv); + for (int64_t r = 0; r < pnrows; r++) + memcpy(out + (size_t)(off + r) * esz, + kdata + (size_t)p * esz, esz); + off += pnrows; + } else { + ray_t* pc = ray_table_get_col(bp[i], key_syms[k]); + if (pc && pc->len > 0) { + memcpy(out + (size_t)off * esz, + ray_data(pc), (size_t)pc->len * esz); + off += pc->len; + } + } + } + + merge_tbl = ray_table_add_col(merge_tbl, key_syms[k], flat); + ray_release(flat); + } + + /* Agg columns */ + for (uint8_t a = 0; a < part_n_aggs; a++) { + ray_t* tref = running + ? ray_table_get_col_idx(running, (int64_t)n_keys + a) + : ray_table_get_col_idx(bp[0], (int64_t)n_part_keys + a); + if (!tref) goto batch_fail; + + size_t esz = (size_t)col_esz(tref); + ray_t* flat = col_vec_new(tref, mrows); + if (!flat || RAY_IS_ERR(flat)) goto batch_fail; + flat->len = mrows; + char* out = (char*)ray_data(flat); + int64_t off = 0; + + if (running) { + ray_t* rc = ray_table_get_col_idx(running, (int64_t)n_keys + a); + if (rc && rc->len > 0) { + memcpy(out, ray_data(rc), (size_t)rc->len * esz); + off = rc->len; + } + } + + for (int32_t i = 0; i < batch_n; i++) { + ray_t* pc = ray_table_get_col_idx(bp[i], + (int64_t)n_part_keys + a); + if (pc && pc->len > 0) { + memcpy(out + (size_t)off * esz, + ray_data(pc), (size_t)pc->len * esz); + off += pc->len; + } + } + + merge_tbl = ray_table_add_col(merge_tbl, agg_name_ids[a], flat); + ray_release(flat); + } + + /* Free batch partials */ + for (int32_t i = 0; i < batch_n; i++) { + ray_release(bp[i]); + bp[i] = NULL; + } + + /* Phase 3: merge GROUP BY */ + ray_graph_t* mg = ray_graph_new(merge_tbl); + if (!mg) goto batch_fail; + + ray_op_t* mkeys[8]; + for (uint8_t k = 0; k < n_keys; k++) { + ray_t* sym_atom = ray_sym_str(key_syms[k]); + mkeys[k] = ray_scan(mg, ray_str_ptr(sym_atom)); + } + + ray_op_t* magg_ins[24]; + for (uint8_t a = 0; a < part_n_aggs; a++) { + ray_t* agg_name = ray_sym_str(agg_name_ids[a]); + magg_ins[a] = ray_scan(mg, ray_str_ptr(agg_name)); + } + + ray_op_t* mroot = ray_group(mg, mkeys, n_keys, + merge_ops, magg_ins, part_n_aggs); + mroot = ray_optimize(mg, mroot); + ray_t* new_running = ray_execute(mg, mroot); + ray_graph_free(mg); + + if (running) ray_release(running); + running = new_running; + + if (!running || RAY_IS_ERR(running)) { + ray_release(merge_tbl); + return NULL; + } + + /* Rename running's agg columns back to the original partial names. + * Without this, each merge adds an extra suffix (e.g. v1_sum → v1_sum_sum). */ + for (uint8_t a = 0; a < part_n_aggs; a++) + ray_table_set_col_name(running, (int64_t)n_keys + a, agg_name_ids[a]); + + continue; + +batch_fail: + for (int32_t i = 0; i < batch_n; i++) + if (bp[i]) ray_release(bp[i]); + if (running) ray_release(running); + if (merge_tbl) ray_release(merge_tbl); + return NULL; + } + + ray_t* result = running; + + if (!result || RAY_IS_ERR(result)) { + if (merge_tbl) ray_release(merge_tbl); + return NULL; + } + + int64_t rncols = ray_table_ncols(result); + + /* AVG/STDDEV post-processing: build trimmed table (n_keys + n_aggs cols), + * computing final AVG = SUM/COUNT and STDDEV/VAR from SUM, SUM_SQ, COUNT. */ + if (has_avg || has_stddev) { + ray_t* trimmed = ray_table_new((int64_t)(n_keys + n_aggs)); + if (!trimmed || RAY_IS_ERR(trimmed)) { + ray_release(result); + if (merge_tbl) ray_release(merge_tbl); + return NULL; + } + + for (int64_t c = 0; c < (int64_t)(n_keys + n_aggs) && c < rncols; c++) { + int64_t nm = ray_table_col_name(result, c); + + /* Check if this agg column is an AVG or STDDEV/VAR slot */ + int is_avg_slot = 0, is_std_slot = 0; + uint8_t avg_i = 0, std_i = 0; + if (c >= n_keys) { + uint8_t a = (uint8_t)(c - n_keys); + for (uint8_t j = 0; j < n_avg; j++) { + if (avg_idx[j] == a) { is_avg_slot = 1; avg_i = j; break; } + } + for (uint8_t j = 0; j < n_std; j++) { + if (std_idx[j] == a) { is_std_slot = 1; std_i = j; break; } + } + } + + if (is_avg_slot) { + /* AVG = SUM(x) / COUNT(x) */ + int64_t sum_ci = c; + /* AVG COUNT slots: after n_aggs + n_std SUM_SQ slots */ + int64_t cnt_ci = (int64_t)n_keys + n_aggs + n_std + avg_i; + ray_t* sum_col = ray_table_get_col_idx(result, sum_ci); + ray_t* cnt_col = (cnt_ci < rncols) ? ray_table_get_col_idx(result, cnt_ci) : NULL; + if (!sum_col || !cnt_col) { + if (sum_col) { + ray_retain(sum_col); + trimmed = ray_table_add_col(trimmed, nm, sum_col); + ray_release(sum_col); + } + continue; + } + + int64_t nrows = sum_col->len; + ray_t* avg_col = ray_vec_new(RAY_F64, nrows); + if (!avg_col || RAY_IS_ERR(avg_col)) { + ray_release(trimmed); ray_release(result); + if (merge_tbl) ray_release(merge_tbl); + return NULL; + } + avg_col->len = nrows; + + double* out = (double*)ray_data(avg_col); + if (sum_col->type == RAY_F64) { + const double* sv = (const double*)ray_data(sum_col); + const int64_t* cv = (const int64_t*)ray_data(cnt_col); + for (int64_t r = 0; r < nrows; r++) + out[r] = cv[r] > 0 ? sv[r] / (double)cv[r] : 0.0; + } else { + const int64_t* sv = (const int64_t*)ray_data(sum_col); + const int64_t* cv = (const int64_t*)ray_data(cnt_col); + for (int64_t r = 0; r < nrows; r++) + out[r] = cv[r] > 0 ? (double)sv[r] / (double)cv[r] : 0.0; + } + trimmed = ray_table_add_col(trimmed, nm, avg_col); + ray_release(avg_col); + } else if (is_std_slot) { + /* STDDEV/VAR from merged SUM(x), SUM(x²), COUNT(x): + * var_pop = SUM_SQ/N - (SUM/N)² + * var_samp = var_pop * N/(N-1) + * stddev_pop = sqrt(var_pop), stddev_samp = sqrt(var_samp) */ + int64_t sum_ci = c; + int64_t sq_ci = (int64_t)n_keys + std_sq_slot[std_i]; + int64_t cnt_ci = (int64_t)n_keys + std_cnt_slot[std_i]; + ray_t* sum_col = ray_table_get_col_idx(result, sum_ci); + ray_t* sq_col = (sq_ci < rncols) ? ray_table_get_col_idx(result, sq_ci) : NULL; + ray_t* cnt_col = (cnt_ci < rncols) ? ray_table_get_col_idx(result, cnt_ci) : NULL; + if (!sum_col || !sq_col || !cnt_col) { + if (sum_col) { + ray_retain(sum_col); + trimmed = ray_table_add_col(trimmed, nm, sum_col); + ray_release(sum_col); + } + continue; + } + + int64_t nrows = sum_col->len; + ray_t* out_col = ray_vec_new(RAY_F64, nrows); + if (!out_col || RAY_IS_ERR(out_col)) { + ray_release(trimmed); ray_release(result); + if (merge_tbl) ray_release(merge_tbl); + return NULL; + } + out_col->len = nrows; + double* out = (double*)ray_data(out_col); + + uint16_t orig_op = std_orig_op[std_i]; + /* SUM(x) is always F64 after merge (SUM produces F64 for F64 input, + * I64 for integer input; SUM(x²) via ray_mul always produces F64). */ + const double* sq = (const double*)ray_data(sq_col); + const int64_t* cv = (const int64_t*)ray_data(cnt_col); + if (sum_col->type == RAY_F64) { + const double* sv = (const double*)ray_data(sum_col); + for (int64_t r = 0; r < nrows; r++) { + double n = (double)cv[r]; + if (n <= 0) { out[r] = 0.0; ray_vec_set_null(out_col, r, true); continue; } + double mean = sv[r] / n; + double var_pop = sq[r] / n - mean * mean; + if (var_pop < 0) var_pop = 0; + bool insuf = (orig_op == OP_VAR || orig_op == OP_STDDEV) && n <= 1; + if (insuf) { out[r] = 0.0; ray_vec_set_null(out_col, r, true); continue; } + if (orig_op == OP_VAR_POP) out[r] = var_pop; + else if (orig_op == OP_VAR) out[r] = var_pop * n / (n - 1); + else if (orig_op == OP_STDDEV_POP) out[r] = sqrt(var_pop); + else /* OP_STDDEV */ out[r] = sqrt(var_pop * n / (n - 1)); + } + } else { + const int64_t* sv = (const int64_t*)ray_data(sum_col); + for (int64_t r = 0; r < nrows; r++) { + double n = (double)cv[r]; + if (n <= 0) { out[r] = 0.0; ray_vec_set_null(out_col, r, true); continue; } + double mean = (double)sv[r] / n; + double var_pop = sq[r] / n - mean * mean; + if (var_pop < 0) var_pop = 0; + bool insuf = (orig_op == OP_VAR || orig_op == OP_STDDEV) && n <= 1; + if (insuf) { out[r] = 0.0; ray_vec_set_null(out_col, r, true); continue; } + if (orig_op == OP_VAR_POP) out[r] = var_pop; + else if (orig_op == OP_VAR) out[r] = var_pop * n / (n - 1); + else if (orig_op == OP_STDDEV_POP) out[r] = sqrt(var_pop); + else /* OP_STDDEV */ out[r] = sqrt(var_pop * n / (n - 1)); + } + } + trimmed = ray_table_add_col(trimmed, nm, out_col); + ray_release(out_col); + } else { + ray_t* col = ray_table_get_col_idx(result, c); + if (col) { + ray_retain(col); + trimmed = ray_table_add_col(trimmed, nm, col); + ray_release(col); + } + } + } + ray_release(result); + result = trimmed; + rncols = ray_table_ncols(result); + } + + /* Agg column names already fixed by ray_table_set_col_name inside batch loop. + * Apply final name fixup for the user-facing n_aggs columns (trim decomposed extras). */ + for (uint8_t a = 0; a < n_aggs && (int64_t)(n_keys + a) < rncols; a++) + ray_table_set_col_name(result, (int64_t)n_keys + a, agg_name_ids[a]); + + if (merge_tbl) ray_release(merge_tbl); + return result; +} + +/* ══════════════════════════════════════════════════════════════════════ + * pivot_ingest_run — shared parallel hash-aggregate for pivot + * + * Mirrors the phase1+phase2 radix pipeline exec_group uses, leaving + * the result in per-partition HTs with prefix offsets so the caller + * can iterate grouped rows without knowing about the radix internals. + * Falls back to a single sequential HT for tiny inputs or when no + * pool is available — the caller iterates n_parts ∈ {1, RADIX_P}. + * ══════════════════════════════════════════════════════════════════════ */ + +static void pivot_ingest_sequential(pivot_ingest_t* out, const ght_layout_t* ly, + void** key_data, int8_t* key_types, + uint8_t* key_attrs, ray_t** key_vecs, + ray_t** agg_vecs, int64_t n_scan, + group_ht_t* scratch_ht) { + (void)key_data; + out->part_hts = scratch_ht; + out->n_parts = 1; + out->row_stride = ly->row_stride; + group_rows_range(scratch_ht, key_data, key_types, key_attrs, key_vecs, + agg_vecs, 0, n_scan, NULL); + out->total_grps = scratch_ht->grp_count; + out->part_offsets[0] = 0; + out->part_offsets[1] = scratch_ht->grp_count; + out->part_hts = scratch_ht; +} + +bool pivot_ingest_run(pivot_ingest_t* out, + const ght_layout_t* ly, + void** key_data, int8_t* key_types, uint8_t* key_attrs, + ray_t** key_vecs, ray_t** agg_vecs, + int64_t n_scan) { + memset(out, 0, sizeof(*out)); + out->row_stride = ly->row_stride; + + /* Allocate a small offsets buffer up front (RADIX_P+1 is the max). */ + out->part_offsets = (uint32_t*)scratch_alloc(&out->_offsets_hdr, + (size_t)(RADIX_P + 1) * sizeof(uint32_t)); + if (!out->part_offsets) return false; + + uint8_t n_keys = ly->n_keys; + + ray_pool_t* pool = ray_pool_get(); + uint32_t n_total = pool ? ray_pool_total_workers(pool) : 1; + bool parallel_ok = (pool && n_scan >= RAY_PARALLEL_THRESHOLD && n_total > 1); + + if (!parallel_ok) { + /* Sequential single-HT path — allocate the HT in its own scratch + * block and wire part_hts/n_parts immediately so every failure + * below funnels through pivot_ingest_free for cleanup. */ + group_ht_t* seq = (group_ht_t*)scratch_calloc(&out->_part_hts_hdr, + sizeof(group_ht_t)); + if (!seq) return false; + out->part_hts = seq; + out->n_parts = 1; + uint32_t seq_cap = 1024; + uint64_t target = (uint64_t)n_scan * 2; + while ((uint64_t)seq_cap < target && seq_cap < (1u << 24)) seq_cap <<= 1; + if (!group_ht_init(seq, seq_cap, ly)) return false; + pivot_ingest_sequential(out, ly, key_data, key_types, key_attrs, + key_vecs, agg_vecs, n_scan, seq); + /* Surface grow-path OOM from group_probe_entry so callers don't + * silently see a truncated result. */ + if (seq->oom) return false; + return true; + } + + /* ═════ Parallel radix path ═════ */ + size_t n_bufs = (size_t)n_total * RADIX_P; + out->_n_bufs = n_bufs; + radix_buf_t* radix_bufs = (radix_buf_t*)scratch_calloc(&out->_radix_bufs_hdr, + n_bufs * sizeof(radix_buf_t)); + if (!radix_bufs) return false; + out->_radix_bufs = radix_bufs; + + uint32_t buf_init = (uint32_t)((uint64_t)n_scan / (RADIX_P * n_total)); + if (buf_init < 64) buf_init = 64; + buf_init = buf_init + buf_init / 2; + uint16_t estride = ly->entry_stride; + { + size_t total_pre = (size_t)n_bufs * buf_init * estride; + if (total_pre > (size_t)2 << 30) { + buf_init = (uint32_t)(((size_t)2 << 30) / ((size_t)n_bufs * estride)); + if (buf_init < 64) buf_init = 64; + } + } + for (size_t i = 0; i < n_bufs; i++) { + radix_bufs[i].data = (char*)scratch_alloc(&radix_bufs[i]._hdr, + (size_t)buf_init * estride); + radix_bufs[i].count = 0; + radix_bufs[i].cap = buf_init; + } + + uint8_t p1_nullable = 0; + for (uint8_t k = 0; k < n_keys; k++) { + if (!key_vecs[k]) continue; + ray_t* src = (key_vecs[k]->attrs & RAY_ATTR_SLICE) + ? key_vecs[k]->slice_parent : key_vecs[k]; + if (src && (src->attrs & RAY_ATTR_HAS_NULLS)) + p1_nullable |= (uint8_t)(1u << k); + } + + radix_phase1_ctx_t p1ctx = { + .key_data = key_data, + .key_types = key_types, + .key_attrs = key_attrs, + .key_vecs = key_vecs, + .nullable_mask = p1_nullable, + .agg_vecs = agg_vecs, + .n_workers = n_total, + .bufs = radix_bufs, + .layout = *ly, + .match_idx = NULL, + }; + ray_pool_dispatch(pool, radix_phase1_fn, &p1ctx, n_scan); + if (ray_interrupted()) return true; /* caller checks ray_interrupted() */ + /* Sync point — phase1 drained all rows, so rows_done == n_scan. */ + ray_progress_update(NULL, "hash-partition", (uint64_t)n_scan, (uint64_t)n_scan); + + for (size_t i = 0; i < n_bufs; i++) + if (radix_bufs[i].oom) return false; + + group_ht_t* part_hts = (group_ht_t*)scratch_calloc(&out->_part_hts_hdr, + RADIX_P * sizeof(group_ht_t)); + if (!part_hts) return false; + + radix_phase2_ctx_t p2ctx = { + .key_types = key_types, + .n_keys = n_keys, + .n_workers = n_total, + .bufs = radix_bufs, + .part_hts = part_hts, + .layout = *ly, + .key_data = key_data, + }; + ray_pool_dispatch_n(pool, radix_phase2_fn, &p2ctx, RADIX_P); + out->part_hts = part_hts; + out->n_parts = RADIX_P; + if (ray_interrupted()) return true; + /* Sync point — partitions materialized; show RADIX_P/RADIX_P. */ + ray_progress_update(NULL, "per-partition aggregate", RADIX_P, RADIX_P); + + /* OOM detection for the parallel path. Two distinct failure modes + * must be caught here so callers never see a silently-truncated + * result: + * (a) phase2 init failed — radix_phase2_fn `continue`s when + * group_ht_init_sized returns false, leaving the partition + * HT with NULL rows despite a non-zero buffer count. Every + * entry routed into that partition would be dropped. + * (b) grow-path OOM — group_probe_entry sets part_hts[p].oom + * on scratch_realloc failure and returns without inserting + * the key, silently truncating later groups. */ + for (uint32_t p = 0; p < RADIX_P; p++) { + if (part_hts[p].oom) return false; + if (part_hts[p].rows) continue; + uint32_t pcount = 0; + for (uint32_t w = 0; w < n_total; w++) + pcount += radix_bufs[(size_t)w * RADIX_P + p].count; + if (pcount) return false; + } + + out->part_offsets[0] = 0; + for (uint32_t p = 0; p < RADIX_P; p++) + out->part_offsets[p + 1] = out->part_offsets[p] + part_hts[p].grp_count; + out->total_grps = out->part_offsets[RADIX_P]; + return true; +} + +void pivot_ingest_free(pivot_ingest_t* out) { + if (!out) return; + if (out->part_hts) { + for (uint32_t p = 0; p < out->n_parts; p++) { + if (out->part_hts[p].rows || out->part_hts[p].slots) + group_ht_free(&out->part_hts[p]); + } + scratch_free(out->_part_hts_hdr); + } + if (out->_radix_bufs) { + radix_buf_t* bufs = (radix_buf_t*)out->_radix_bufs; + for (size_t i = 0; i < out->_n_bufs; i++) scratch_free(bufs[i]._hdr); + scratch_free(out->_radix_bufs_hdr); + } + scratch_free(out->_offsets_hdr); + memset(out, 0, sizeof(*out)); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/hash.h b/crates/rayforce-sys/vendor/rayforce/src/ops/hash.h new file mode 100644 index 0000000..814b7bd --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/hash.h @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * hash.h — Fast wyhash-based hashing for Rayforce + * + * Based on wyhash final version 4.2 by Wang Yi + * Original: https://github.com/wangyi-fudan/wyhash + * + * This is free and unencumbered software released into the public domain + * under The Unlicense (https://unlicense.org). + * See the original repository for full license text. + */ + +#ifndef RAY_HASH_H +#define RAY_HASH_H + +#include +#include +#include + +/* ---- Platform detection ------------------------------------------------- */ + +#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) + #define RAY_HASH_LIKELY(x) __builtin_expect(!!(x), 1) + #define RAY_HASH_UNLIKELY(x) __builtin_expect(!!(x), 0) +#else + #define RAY_HASH_LIKELY(x) (x) + #define RAY_HASH_UNLIKELY(x) (x) +#endif + +#if defined(_MSC_VER) && defined(_M_X64) + #include + #pragma intrinsic(_umul128) +#endif + +#ifndef RAY_HASH_LITTLE_ENDIAN + #if defined(RAY_OS_WINDOWS) || defined(__LITTLE_ENDIAN__) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + #define RAY_HASH_LITTLE_ENDIAN 1 + #elif defined(__BIG_ENDIAN__) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + #define RAY_HASH_LITTLE_ENDIAN 0 + #else + #define RAY_HASH_LITTLE_ENDIAN 1 + #endif +#endif + +/* ---- Internal primitives ------------------------------------------------ */ + +/* 128-bit multiply: *A and *B become the low and high 64 bits of A*B */ +static inline void ray__wymum(uint64_t *A, uint64_t *B) { +#if defined(__SIZEOF_INT128__) + __uint128_t r = (__uint128_t)*A * *B; + *A = (uint64_t)r; + *B = (uint64_t)(r >> 64); +#elif defined(_MSC_VER) && defined(_M_X64) + *A = _umul128(*A, *B, B); +#else + uint64_t ha = *A >> 32, la = (uint32_t)*A; + uint64_t hb = *B >> 32, lb = (uint32_t)*B; + uint64_t rh = ha * hb, rm0 = ha * lb, rm1 = hb * la, rl = la * lb; + uint64_t t = rl + (rm0 << 32), c = t < rl; + uint64_t lo = t + (rm1 << 32); + c += lo < t; + uint64_t hi = rh + (rm0 >> 32) + (rm1 >> 32) + c; + *A = lo; + *B = hi; +#endif +} + +/* Mix two 64-bit values via multiply-then-xor */ +static inline uint64_t ray__wymix(uint64_t A, uint64_t B) { + ray__wymum(&A, &B); + return A ^ B; +} + +/* ---- Byte readers (endian-aware) ---------------------------------------- */ + +static inline uint64_t ray__wyr8(const uint8_t *p) { + uint64_t v; + memcpy(&v, p, 8); +#if RAY_HASH_LITTLE_ENDIAN + return v; +#elif defined(__GNUC__) || defined(__clang__) + return __builtin_bswap64(v); +#elif defined(_MSC_VER) + return _byteswap_uint64(v); +#else + return ((v >> 56) & 0xff) | ((v >> 40) & 0xff00) | + ((v >> 24) & 0xff0000) | ((v >> 8) & 0xff000000) | + ((v << 8) & 0xff00000000ULL) | ((v << 24) & 0xff0000000000ULL) | + ((v << 40) & 0xff000000000000ULL) | ((v << 56) & 0xff00000000000000ULL); +#endif +} + +static inline uint64_t ray__wyr4(const uint8_t *p) { + uint32_t v; + memcpy(&v, p, 4); +#if RAY_HASH_LITTLE_ENDIAN + return v; +#elif defined(__GNUC__) || defined(__clang__) + return __builtin_bswap32(v); +#elif defined(_MSC_VER) + return _byteswap_ulong(v); +#else + return ((v >> 24) & 0xff) | ((v >> 8) & 0xff00) | + ((v << 8) & 0xff0000) | ((v << 24) & 0xff000000); +#endif +} + +static inline uint64_t ray__wyr3(const uint8_t *p, size_t k) { + return ((uint64_t)p[0] << 16) | ((uint64_t)p[k >> 1] << 8) | p[k - 1]; +} + +/* ---- Secret constants (from wyhash final4.2) ---------------------------- */ + +static const uint64_t ray__wyp[4] = { + 0x2d358dccaa6c78a5ULL, + 0x8bb84b93962eacc9ULL, + 0x4b33a62ed433d4a3ULL, + 0x4d5a2da51de1aa47ULL, +}; + +/* ---- Core: hash arbitrary bytes ----------------------------------------- */ + +/* + * ray_hash_bytes -- hash a byte buffer of length `len`. + * + * This is the full wyhash final4.2 algorithm: ~3 cycles/8 bytes on + * modern x86-64. Seed is fixed at 0 for deterministic, repeatable hashing + * within a single process lifetime. + */ +/* L2: Fixed seed=0 is acceptable for in-process dataframe operations; + * use a random seed if processing adversarial input (e.g., untrusted + * CSV with crafted hash collisions). */ +static inline uint64_t ray_hash_bytes(const void *data, size_t len) { + const uint8_t *p = (const uint8_t *)data; + uint64_t seed = 0; + seed ^= ray__wymix(seed ^ ray__wyp[0], ray__wyp[1]); + + uint64_t a, b; + if (RAY_HASH_LIKELY(len <= 16)) { + if (RAY_HASH_LIKELY(len >= 4)) { + a = (ray__wyr4(p) << 32) | ray__wyr4(p + ((len >> 3) << 2)); + b = (ray__wyr4(p + len - 4) << 32) | ray__wyr4(p + len - 4 - ((len >> 3) << 2)); + } else if (RAY_HASH_LIKELY(len > 0)) { + a = ray__wyr3(p, len); + b = 0; + } else { + a = b = 0; + } + } else { + size_t i = len; + if (RAY_HASH_UNLIKELY(i >= 48)) { + uint64_t see1 = seed, see2 = seed; + do { + seed = ray__wymix(ray__wyr8(p) ^ ray__wyp[1], ray__wyr8(p + 8) ^ seed); + see1 = ray__wymix(ray__wyr8(p + 16) ^ ray__wyp[2], ray__wyr8(p + 24) ^ see1); + see2 = ray__wymix(ray__wyr8(p + 32) ^ ray__wyp[3], ray__wyr8(p + 40) ^ see2); + p += 48; + i -= 48; + } while (RAY_HASH_LIKELY(i >= 48)); + seed ^= see1 ^ see2; + } + while (RAY_HASH_UNLIKELY(i > 16)) { + seed = ray__wymix(ray__wyr8(p) ^ ray__wyp[1], ray__wyr8(p + 8) ^ seed); + i -= 16; + p += 16; + } + a = ray__wyr8(p + i - 16); + b = ray__wyr8(p + i - 8); + } + a ^= ray__wyp[1]; + b ^= seed; + ray__wymum(&a, &b); + return ray__wymix(a ^ ray__wyp[0] ^ len, b ^ ray__wyp[1]); +} + +/* ---- Convenience: hash a single int64 ----------------------------------- */ + +/* + * ray_hash_i64 -- hash a 64-bit integer. + * + * Uses wyhash64 two-round mixing which is faster than feeding 8 bytes + * through the generic path while retaining excellent distribution. + */ +static inline uint64_t ray_hash_i64(int64_t val) { + uint64_t A = (uint64_t)val ^ 0x2d358dccaa6c78a5ULL; + uint64_t B = (uint64_t)val ^ 0x8bb84b93962eacc9ULL; + ray__wymum(&A, &B); + return ray__wymix(A ^ 0x2d358dccaa6c78a5ULL, B ^ 0x8bb84b93962eacc9ULL); +} + +/* ---- Convenience: hash a double ----------------------------------------- */ + +/* + * ray_hash_f64 -- hash a 64-bit float by its bit pattern. + * + * Normalizes negative zero to positive zero so that -0.0 and +0.0 + * hash identically (they compare equal via ==). + * + * Note: different NaN bit patterns hash differently; SQL NULL is + * handled separately at a higher level and never reaches this path. + */ +static inline uint64_t ray_hash_f64(double val) { + uint64_t bits; + if (val == 0.0) { uint64_t z = 0; memcpy(&val, &z, sizeof(val)); } /* normalize -0.0 → +0.0 */ + memcpy(&bits, &val, sizeof(bits)); + uint64_t A = bits ^ 0x2d358dccaa6c78a5ULL; + uint64_t B = bits ^ 0x8bb84b93962eacc9ULL; + ray__wymum(&A, &B); + return ray__wymix(A ^ 0x2d358dccaa6c78a5ULL, B ^ 0x8bb84b93962eacc9ULL); +} + +/* ---- Combine two hashes ------------------------------------------------- */ + +/* + * ray_hash_combine -- mix two hash values into one. + * + * Uses the wyhash64 two-input mixer. This is order-dependent: + * combine(a,b) != combine(b,a), which is the desired behaviour for + * multi-column key hashing where column order matters. + */ +static inline uint64_t ray_hash_combine(uint64_t h1, uint64_t h2) { + uint64_t A = h1 ^ 0x2d358dccaa6c78a5ULL; + uint64_t B = h2 ^ 0x8bb84b93962eacc9ULL; + ray__wymum(&A, &B); + return ray__wymix(A ^ 0x2d358dccaa6c78a5ULL, B ^ 0x8bb84b93962eacc9ULL); +} + +#endif /* RAY_HASH_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/idxop.c b/crates/rayforce-sys/vendor/rayforce/src/ops/idxop.c new file mode 100644 index 0000000..b3817a6 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/idxop.c @@ -0,0 +1,734 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "idxop.h" +#include "mem/heap.h" +#include "mem/cow.h" +#include "vec/vec.h" +#include "table/table.h" +#include "table/sym.h" +#include "lang/eval.h" +#include "ops/ops.h" +#include +#include + +/* Width of one element of a numeric vector type, or 0 if unsupported. */ +static int numeric_elem_size(int8_t t) { + switch (t) { + case RAY_BOOL: case RAY_U8: return 1; + case RAY_I16: return 2; + case RAY_I32: case RAY_DATE: case RAY_F32: return 4; + case RAY_I64: case RAY_TIME: case RAY_TIMESTAMP: + case RAY_F64: return 8; + default: return 0; + } +} + +/* Read row i of a numeric vector as a 64-bit hash-input word. Mirrors the + * canonical-equality semantics in the rest of the codebase: -0.0 / +0.0 + * collapse, NaNs route per-row (caller treats NaN as its own bucket). */ +static uint64_t numeric_key_word(const uint8_t* base, int8_t type, int64_t i) { + int es = numeric_elem_size(type); + if (type == RAY_F32 || type == RAY_F64) { + double v; + if (es == 4) { float t; memcpy(&t, base + i*4, 4); v = (double)t; } + else { memcpy(&v, base + i*8, 8); } + if (v == 0.0) v = 0.0; /* canonicalise -0.0 -> +0.0 */ + if (v != v) { /* NaN: per-row bucket via row hash */ + return (uint64_t)i * 0x9E3779B97F4A7C15ULL; + } + uint64_t bits; + memcpy(&bits, &v, 8); + return bits; + } + int64_t k = 0; + switch (es) { + case 1: k = (int64_t)base[i]; break; + case 2: { int16_t t; memcpy(&t, base + i*2, 2); k = (int64_t)t; break; } + case 4: { int32_t t; memcpy(&t, base + i*4, 4); k = (int64_t)t; break; } + case 8: { int64_t t; memcpy(&t, base + i*8, 8); k = t; break; } + } + return (uint64_t)k; +} + +/* 64-bit avalanche mix (splittable hash from Stafford / xxhash). */ +static inline uint64_t mix64(uint64_t x) { + x ^= x >> 30; + x *= 0xbf58476d1ce4e5b9ULL; + x ^= x >> 27; + x *= 0x94d049bb133111ebULL; + x ^= x >> 31; + return x; +} + +/* Smallest power of two >= n, clamped to >= 1. */ +static uint64_t next_pow2(uint64_t n) { + if (n <= 1) return 1; + uint64_t p = 1; + while (p < n) p <<= 1; + return p; +} + +/* -------------------------------------------------------------------------- + * Index ray_t allocation / destruction helpers + * + * The block layout: 32-byte ray_t header + ray_index_t payload in data[]. + * type = RAY_INDEX, attrs = 0 (the index itself is never sliced or aliased), + * len = sizeof(ray_index_t) (so callers can sanity-check the payload size). + * -------------------------------------------------------------------------- */ + +static ray_t* ray_index_alloc(ray_idx_kind_t kind, int8_t parent_type, int64_t parent_len) { + ray_t* idx = ray_alloc(sizeof(ray_index_t)); + if (!idx || RAY_IS_ERR(idx)) return idx; + idx->type = RAY_INDEX; + idx->attrs = 0; + idx->len = (int64_t)sizeof(ray_index_t); + memset(idx->data, 0, sizeof(ray_index_t)); + ray_index_t* ix = ray_index_payload(idx); + ix->kind = (uint8_t)kind; + ix->parent_type = parent_type; + ix->built_for_len = parent_len; + return idx; +} + +/* Reading saved-nullmap pointers: typed views into the 16-byte snapshot. */ +static inline ray_t* saved_lo_ptr(ray_index_t* ix) { + ray_t* p; memcpy(&p, &ix->saved_nullmap[0], sizeof(p)); return p; +} +static inline ray_t* saved_hi_ptr(ray_index_t* ix) { + ray_t* p; memcpy(&p, &ix->saved_nullmap[8], sizeof(p)); return p; +} +static inline void saved_lo_clear(ray_index_t* ix) { + memset(&ix->saved_nullmap[0], 0, 8); +} +static inline void saved_hi_clear(ray_index_t* ix) { + memset(&ix->saved_nullmap[8], 0, 8); +} + +/* -------------------------------------------------------------------------- + * Saved-nullmap retain / release + * + * The saved 16 bytes hold pointers iff (parent_type, saved_attrs) say so: + * - saved_attrs & NULLMAP_EXT => low 8 bytes are an owning ray_t* (ext nullmap) + * *except* RAY_STR uses the same slot for + * str_ext_null (also an owning ref) — same + * semantics, same ownership. + * - parent_type == RAY_STR => high 8 bytes are str_pool (owning ref) + * - parent_type == RAY_SYM and saved_attrs & NULLMAP_EXT + * => high 8 bytes are sym_dict (owning ref) + * + * For all other type/attr combos the bytes are inline bitmap data, not + * pointers, and we leave them alone. + * -------------------------------------------------------------------------- */ + +void ray_index_release_saved(ray_index_t* ix) { + if (ix->saved_attrs & RAY_ATTR_NULLMAP_EXT) { + ray_t* lo = saved_lo_ptr(ix); + if (lo && !RAY_IS_ERR(lo)) ray_release(lo); + saved_lo_clear(ix); + } + if (ix->parent_type == RAY_STR) { + ray_t* hi = saved_hi_ptr(ix); + if (hi && !RAY_IS_ERR(hi)) ray_release(hi); + saved_hi_clear(ix); + } else if (ix->parent_type == RAY_SYM && + (ix->saved_attrs & RAY_ATTR_NULLMAP_EXT)) { + /* RAY_SYM stores sym_dict at high 8 bytes only when an ext nullmap + * is present (otherwise the inline bitmap occupies both halves and + * sym_dict isn't materialized in the union slot). */ + ray_t* hi = saved_hi_ptr(ix); + if (hi && !RAY_IS_ERR(hi)) ray_release(hi); + saved_hi_clear(ix); + } +} + +void ray_index_retain_saved(ray_index_t* ix) { + if (ix->saved_attrs & RAY_ATTR_NULLMAP_EXT) { + ray_t* lo = saved_lo_ptr(ix); + if (lo && !RAY_IS_ERR(lo)) ray_retain(lo); + } + if (ix->parent_type == RAY_STR) { + ray_t* hi = saved_hi_ptr(ix); + if (hi && !RAY_IS_ERR(hi)) ray_retain(hi); + } else if (ix->parent_type == RAY_SYM && + (ix->saved_attrs & RAY_ATTR_NULLMAP_EXT)) { + ray_t* hi = saved_hi_ptr(ix); + if (hi && !RAY_IS_ERR(hi)) ray_retain(hi); + } +} + +/* -------------------------------------------------------------------------- + * Per-kind payload retain / release + * -------------------------------------------------------------------------- */ + +void ray_index_release_payload(ray_index_t* ix) { + switch ((ray_idx_kind_t)ix->kind) { + case RAY_IDX_HASH: + if (ix->u.hash.table && !RAY_IS_ERR(ix->u.hash.table)) + ray_release(ix->u.hash.table); + if (ix->u.hash.chain && !RAY_IS_ERR(ix->u.hash.chain)) + ray_release(ix->u.hash.chain); + ix->u.hash.table = ix->u.hash.chain = NULL; + break; + case RAY_IDX_SORT: + if (ix->u.sort.perm && !RAY_IS_ERR(ix->u.sort.perm)) + ray_release(ix->u.sort.perm); + ix->u.sort.perm = NULL; + break; + case RAY_IDX_BLOOM: + if (ix->u.bloom.bits && !RAY_IS_ERR(ix->u.bloom.bits)) + ray_release(ix->u.bloom.bits); + ix->u.bloom.bits = NULL; + break; + case RAY_IDX_ZONE: + case RAY_IDX_NONE: + break; + } +} + +void ray_index_retain_payload(ray_index_t* ix) { + switch ((ray_idx_kind_t)ix->kind) { + case RAY_IDX_HASH: + if (ix->u.hash.table && !RAY_IS_ERR(ix->u.hash.table)) + ray_retain(ix->u.hash.table); + if (ix->u.hash.chain && !RAY_IS_ERR(ix->u.hash.chain)) + ray_retain(ix->u.hash.chain); + break; + case RAY_IDX_SORT: + if (ix->u.sort.perm && !RAY_IS_ERR(ix->u.sort.perm)) + ray_retain(ix->u.sort.perm); + break; + case RAY_IDX_BLOOM: + if (ix->u.bloom.bits && !RAY_IS_ERR(ix->u.bloom.bits)) + ray_retain(ix->u.bloom.bits); + break; + case RAY_IDX_ZONE: + case RAY_IDX_NONE: + break; + } +} + +/* -------------------------------------------------------------------------- + * Zone scan -- compute min/max + null count + * + * Reads the parent vector before the nullmap is displaced. Integer paths + * cover BOOL/U8/I16/I32/I64/DATE/TIME/TIMESTAMP (all stored in int slots); + * float paths cover F32/F64. RAY_SYM/STR/GUID return RAY_ERR_NYI for now; + * those types will get string-aware min/max in the P4 zone work. + * -------------------------------------------------------------------------- */ + +static ray_err_t zone_scan_int(ray_t* v, ray_index_t* ix, int elem_size) { + int64_t n = v->len; + int64_t mn = INT64_MAX, mx = INT64_MIN; + int64_t nn = 0; + bool any_value = false; + const uint8_t* base = (const uint8_t*)ray_data(v); + + for (int64_t i = 0; i < n; i++) { + if (ray_vec_is_null(v, i)) { nn++; continue; } + int64_t val = 0; + switch (elem_size) { + case 1: val = (int64_t)base[i]; break; + case 2: { int16_t t; memcpy(&t, base + i*2, 2); val = (int64_t)t; break; } + case 4: { int32_t t; memcpy(&t, base + i*4, 4); val = (int64_t)t; break; } + case 8: { int64_t t; memcpy(&t, base + i*8, 8); val = t; break; } + default: return RAY_ERR_TYPE; + } + if (val < mn) mn = val; + if (val > mx) mx = val; + any_value = true; + } + if (!any_value) { mn = 0; mx = 0; } + ix->u.zone.min_i = mn; + ix->u.zone.max_i = mx; + ix->u.zone.n_nulls = nn; + return RAY_OK; +} + +static ray_err_t zone_scan_float(ray_t* v, ray_index_t* ix, int elem_size) { + int64_t n = v->len; + double mn = INFINITY, mx = -INFINITY; + int64_t nn = 0; + bool any_value = false; + const uint8_t* base = (const uint8_t*)ray_data(v); + + for (int64_t i = 0; i < n; i++) { + if (ray_vec_is_null(v, i)) { nn++; continue; } + double val = 0.0; + if (elem_size == 4) { + float t; memcpy(&t, base + i*4, 4); val = (double)t; + } else { + memcpy(&val, base + i*8, 8); + } + if (isnan(val)) continue; /* NaNs don't participate in min/max */ + if (val < mn) mn = val; + if (val > mx) mx = val; + any_value = true; + } + if (!any_value) { mn = 0.0; mx = 0.0; } + ix->u.zone.min_f = mn; + ix->u.zone.max_f = mx; + ix->u.zone.n_nulls = nn; + return RAY_OK; +} + +static ray_err_t zone_scan(ray_t* v, ray_index_t* ix) { + switch (v->type) { + case RAY_BOOL: + case RAY_U8: return zone_scan_int(v, ix, 1); + case RAY_I16: return zone_scan_int(v, ix, 2); + case RAY_I32: + case RAY_DATE: return zone_scan_int(v, ix, 4); + case RAY_I64: + case RAY_TIME: + case RAY_TIMESTAMP: return zone_scan_int(v, ix, 8); + case RAY_F32: return zone_scan_float(v, ix, 4); + case RAY_F64: return zone_scan_float(v, ix, 8); + default: return RAY_ERR_NYI; + } +} + +/* -------------------------------------------------------------------------- + * Attach + * + * The 16-byte snapshot must be taken AFTER the scan (so the scan reads the + * parent's normal nullmap) but BEFORE we overwrite parent->nullmap with the + * index pointer. Ownership transfer: pointers in the snapshot (ext_nullmap, + * str_pool, sym_dict) move from parent to ix. We do NOT retain them here — + * the existing refs simply move. Symmetrically, when we install the index + * pointer in parent->nullmap, we transfer that single ref to the parent + * (no extra retain). + * -------------------------------------------------------------------------- */ + +static ray_t* attach_finalize(ray_t* parent, ray_t* idx) { + ray_index_t* ix = ray_index_payload(idx); + /* Snapshot the parent's 16 raw bytes verbatim. */ + memcpy(ix->saved_nullmap, parent->nullmap, 16); + ix->saved_attrs = parent->attrs & (RAY_ATTR_HAS_NULLS | RAY_ATTR_NULLMAP_EXT); + + /* Install the index pointer — overwrites bytes 0-7 with the index ptr. + * Bytes 8-15 carry link_target when HAS_LINK is set; preserve them. + * Otherwise zero _idx_pad as a tidy default. */ + parent->index = idx; + if (!(parent->attrs & RAY_ATTR_HAS_LINK)) parent->_idx_pad = NULL; + parent->attrs |= RAY_ATTR_HAS_INDEX; + /* Clear NULLMAP_EXT on the parent: vec->ext_nullmap is now the index + * pointer, not a U8 nullmap vec, so naive readers that gate on + * NULLMAP_EXT and dereference ext_nullmap would read garbage. The + * displaced ext-nullmap pointer is preserved inside ix->saved_nullmap[0..7] + * and accessed via the HAS_INDEX-aware helpers in vec.c / morsel.c. + * + * IMPORTANT: HAS_NULLS is *preserved* on the parent so the many call + * sites that use it as a cheap "do I need null logic at all?" gate + * continue to give correct answers. The actual null bits are read + * via ray_vec_is_null / ray_morsel_next, both of which check + * HAS_INDEX first and route through the saved snapshot. */ + parent->attrs &= (uint8_t)~RAY_ATTR_NULLMAP_EXT; + return parent; +} + +/* Validate + COW + drop existing index. Returns the (possibly new) parent + * pointer and updates *vp. On error returns a RAY_ERROR; caller must + * propagate without further modifying *vp. */ +static ray_t* prepare_attach(ray_t** vp, const char* what) { + if (!vp || !*vp || RAY_IS_ERR(*vp)) + return ray_error("type", "%s: null/error vector", what); + ray_t* v = *vp; + if (!ray_is_vec(v)) + return ray_error("type", "%s: index can only attach to a vector", what); + if (v->attrs & RAY_ATTR_SLICE) + return ray_error("type", "%s: cannot index a slice; materialize first", what); + if (v->attrs & RAY_ATTR_HAS_INDEX) { + ray_index_drop(&v); + if (RAY_IS_ERR(v)) return v; + *vp = v; + } + v = ray_cow(v); + if (!v || RAY_IS_ERR(v)) return v; + *vp = v; + if (numeric_elem_size(v->type) == 0) { + return ray_error("nyi", "%s: only numeric vectors supported in v1 (got type %d)", + what, (int)v->type); + } + return v; +} + +ray_t* ray_index_attach_zone(ray_t** vp) { + ray_t* v = prepare_attach(vp, "zone"); + if (RAY_IS_ERR(v)) return v; + + ray_t* idx = ray_index_alloc(RAY_IDX_ZONE, v->type, v->len); + if (!idx || RAY_IS_ERR(idx)) return idx; + + ray_err_t err = zone_scan(v, ray_index_payload(idx)); + if (err != RAY_OK) { + ray_release(idx); + return ray_error(ray_err_code_str(err), "zone scan failed for type %d", (int)v->type); + } + return attach_finalize(v, idx); +} + +/* -------------------------------------------------------------------------- + * Hash index — chained open addressing + * + * table[capacity]: each slot is rid+1 of the most recent row that hashed + * into the bucket (0 = empty bucket). + * chain[parent->len]: each slot is rid+1 of the next-older row in the same + * bucket's chain (0 = end of chain). + * + * Lookup `k`: rid = table[hash(k) & mask] - 1; while rid >= 0 compare + * parent->data[rid] == k, on miss step rid = chain[rid] - 1. + * -------------------------------------------------------------------------- */ + +ray_t* ray_index_attach_hash(ray_t** vp) { + ray_t* v = prepare_attach(vp, "hash"); + if (RAY_IS_ERR(v)) return v; + + int64_t n = v->len; + /* Capacity: at least 8, at most 2*n. Power of two for cheap masking. */ + uint64_t cap = next_pow2((uint64_t)(n < 4 ? 8 : 2 * n)); + if (cap < 8) cap = 8; + + ray_t* table = ray_vec_new(RAY_I64, (int64_t)cap); + if (!table || RAY_IS_ERR(table)) return table ? table : ray_error("oom", NULL); + table->len = (int64_t)cap; + memset(ray_data(table), 0, (size_t)cap * sizeof(int64_t)); + + ray_t* chain = ray_vec_new(RAY_I64, n > 0 ? n : 1); + if (!chain || RAY_IS_ERR(chain)) { + ray_release(table); + return chain ? chain : ray_error("oom", NULL); + } + chain->len = n; + if (n > 0) memset(ray_data(chain), 0, (size_t)n * sizeof(int64_t)); + + int64_t* tbl = (int64_t*)ray_data(table); + int64_t* chn = (int64_t*)ray_data(chain); + const uint8_t* base = (const uint8_t*)ray_data(v); + int64_t n_keys = 0; + uint64_t mask = cap - 1; + + for (int64_t i = 0; i < n; i++) { + if (ray_vec_is_null(v, i)) continue; + uint64_t h = mix64(numeric_key_word(base, v->type, i)); + uint64_t slot = h & mask; + chn[i] = tbl[slot]; /* link previous head into chain */ + tbl[slot] = i + 1; /* this row becomes new head */ + n_keys++; + } + + ray_t* idx = ray_index_alloc(RAY_IDX_HASH, v->type, n); + if (!idx || RAY_IS_ERR(idx)) { + ray_release(table); + ray_release(chain); + return idx ? idx : ray_error("oom", NULL); + } + ray_index_t* ix = ray_index_payload(idx); + ix->u.hash.table = table; + ix->u.hash.chain = chain; + ix->u.hash.mask = mask; + ix->u.hash.n_keys = n_keys; + + return attach_finalize(v, idx); +} + +/* -------------------------------------------------------------------------- + * Sort index — ascending permutation of row ids + * + * Delegates to the existing parallel sort builder. Result is an I64 vec of + * length parent->len with default null-handling (nulls last for asc). + * -------------------------------------------------------------------------- */ + +ray_t* ray_index_attach_sort(ray_t** vp) { + ray_t* v = prepare_attach(vp, "sort"); + if (RAY_IS_ERR(v)) return v; + + ray_t* col = v; + ray_t* perm = ray_sort_indices(&col, NULL, NULL, 1, v->len); + if (!perm || RAY_IS_ERR(perm)) return perm ? perm : ray_error("oom", NULL); + + ray_t* idx = ray_index_alloc(RAY_IDX_SORT, v->type, v->len); + if (!idx || RAY_IS_ERR(idx)) { + ray_release(perm); + return idx ? idx : ray_error("oom", NULL); + } + ray_index_t* ix = ray_index_payload(idx); + ix->u.sort.perm = perm; + + return attach_finalize(v, idx); +} + +/* -------------------------------------------------------------------------- + * Bloom filter — m bits, k=3 hashes via double-hashing + * + * Layout: m is rounded to the next power of two >= max(64, 8*n_non_null). + * Each row sets bits at positions (h1 + i*h2) mod m for i in [0..k). + * h1, h2 are derived from a single 64-bit mix of the key word. + * -------------------------------------------------------------------------- */ + +ray_t* ray_index_attach_bloom(ray_t** vp) { + ray_t* v = prepare_attach(vp, "bloom"); + if (RAY_IS_ERR(v)) return v; + + int64_t n = v->len; + /* Count non-null rows for sizing. */ + int64_t n_set = 0; + for (int64_t i = 0; i < n; i++) { + if (!ray_vec_is_null(v, i)) n_set++; + } + uint64_t target_bits = (uint64_t)(n_set < 8 ? 64 : 8 * n_set); + uint64_t m = next_pow2(target_bits); + if (m < 64) m = 64; + uint64_t mbytes = m / 8; + uint32_t k = 3; + + ray_t* bits = ray_vec_new(RAY_U8, (int64_t)mbytes); + if (!bits || RAY_IS_ERR(bits)) return bits ? bits : ray_error("oom", NULL); + bits->len = (int64_t)mbytes; + memset(ray_data(bits), 0, (size_t)mbytes); + + uint8_t* bbuf = (uint8_t*)ray_data(bits); + uint64_t mask = m - 1; + const uint8_t* base = (const uint8_t*)ray_data(v); + + for (int64_t i = 0; i < n; i++) { + if (ray_vec_is_null(v, i)) continue; + uint64_t h = mix64(numeric_key_word(base, v->type, i)); + uint64_t h1 = h; + uint64_t h2 = mix64(h ^ 0xc6a4a7935bd1e995ULL) | 1ULL; /* ensure odd */ + for (uint32_t kk = 0; kk < k; kk++) { + uint64_t pos = (h1 + (uint64_t)kk * h2) & mask; + bbuf[pos >> 3] |= (uint8_t)(1u << (pos & 7)); + } + } + + ray_t* idx = ray_index_alloc(RAY_IDX_BLOOM, v->type, n); + if (!idx || RAY_IS_ERR(idx)) { + ray_release(bits); + return idx ? idx : ray_error("oom", NULL); + } + ray_index_t* ix = ray_index_payload(idx); + ix->u.bloom.bits = bits; + ix->u.bloom.m_mask = mask; + ix->u.bloom.k = k; + ix->u.bloom.n_keys = n_set; + + return attach_finalize(v, idx); +} + +/* -------------------------------------------------------------------------- + * Detach (drop) + * + * Restore the parent's 16-byte nullmap union from the saved snapshot, then + * release the index ray_t. The release path of RAY_INDEX would otherwise + * also try to release the saved-nullmap pointers, so we clear the saved + * snapshot and saved_attrs first to neutralize that — ownership is moving + * back to the parent. + * -------------------------------------------------------------------------- */ + +ray_t* ray_index_drop(ray_t** vp) { + if (!vp || !*vp || RAY_IS_ERR(*vp)) return *vp; + ray_t* v = *vp; + if (!(v->attrs & RAY_ATTR_HAS_INDEX)) return v; + + /* Detach mutates the parent in place; require sole ownership. */ + v = ray_cow(v); + if (!v || RAY_IS_ERR(v)) { *vp = v; return v; } + *vp = v; + + /* After ray_cow, *vp may be a freshly copied block. In ray_alloc_copy, + * the index pointer was retained by ray_retain_owned_refs (via the + * RAY_ATTR_HAS_INDEX branch we add in heap.c), so v->index here is + * still the live, owned index ray_t. */ + ray_t* idx = v->index; + ray_index_t* ix = ray_index_payload(idx); + + /* Shared-index case: another vec may share this RAY_INDEX block via + * ray_alloc_copy (rc>1). Don't clobber the snapshot in that case — + * the other holder still reads it. Copy our own retained refs to + * the saved-pointer slots so the bytes we move into v->nullmap are + * owned by v. See vec_drop_index_inplace for the same pattern. */ + uint8_t saved = ix->saved_attrs; + bool shared = ray_atomic_load(&idx->rc) > 1; + if (shared) { + ray_index_retain_saved(ix); + } + memcpy(v->nullmap, ix->saved_nullmap, 16); + if (!shared) { + memset(ix->saved_nullmap, 0, 16); + ix->saved_attrs = 0; + } + + /* Restore parent attrs. HAS_NULLS was preserved through the attachment + * so we don't need to OR it back in; only NULLMAP_EXT (which we cleared + * at attach time) needs to be reinstated from saved_attrs. */ + v->attrs &= (uint8_t)~RAY_ATTR_HAS_INDEX; + if (saved & RAY_ATTR_NULLMAP_EXT) v->attrs |= RAY_ATTR_NULLMAP_EXT; + + /* Release the index. Per-kind children are released by the RAY_INDEX + * branch of ray_release_owned_refs (added in heap.c). */ + ray_release(idx); + return v; +} + +/* -------------------------------------------------------------------------- + * Info + * -------------------------------------------------------------------------- */ + +static const char* kind_name(ray_idx_kind_t k) { + switch (k) { + case RAY_IDX_HASH: return "hash"; + case RAY_IDX_SORT: return "sort"; + case RAY_IDX_ZONE: return "zone"; + case RAY_IDX_BLOOM: return "bloom"; + default: return "none"; + } +} + +static ray_t* dict_append_sym_i64(ray_t** keys, ray_t** vals, const char* k, int64_t n) { + int64_t kid = ray_sym_intern(k, strlen(k)); + *keys = ray_vec_append(*keys, &kid); + if (RAY_IS_ERR(*keys)) return *keys; + ray_t* nv = ray_i64(n); + *vals = ray_list_append(*vals, nv); + ray_release(nv); + return *vals; +} + +static ray_t* dict_append_sym_sym(ray_t** keys, ray_t** vals, const char* k, const char* s) { + int64_t kid = ray_sym_intern(k, strlen(k)); + *keys = ray_vec_append(*keys, &kid); + if (RAY_IS_ERR(*keys)) return *keys; + int64_t sid = ray_sym_intern(s, strlen(s)); + ray_t* sv = ray_sym(sid); + *vals = ray_list_append(*vals, sv); + ray_release(sv); + return *vals; +} + +ray_t* ray_index_info(ray_t* v) { + if (!ray_index_has(v)) return RAY_NULL_OBJ; + ray_index_t* ix = ray_index_payload(v->index); + + ray_t* keys = ray_sym_vec_new(RAY_SYM_W64, 8); + if (RAY_IS_ERR(keys)) return keys; + ray_t* vals = ray_list_new(8); + if (RAY_IS_ERR(vals)) { ray_release(keys); return vals; } + + ray_t* r; + r = dict_append_sym_sym(&keys, &vals, "kind", kind_name((ray_idx_kind_t)ix->kind)); + if (RAY_IS_ERR(r)) goto fail; + r = dict_append_sym_i64(&keys, &vals, "length", ix->built_for_len); + if (RAY_IS_ERR(r)) goto fail; + r = dict_append_sym_i64(&keys, &vals, "parent_type", (int64_t)ix->parent_type); + if (RAY_IS_ERR(r)) goto fail; + r = dict_append_sym_i64(&keys, &vals, "saved_attrs", (int64_t)ix->saved_attrs); + if (RAY_IS_ERR(r)) goto fail; + + switch ((ray_idx_kind_t)ix->kind) { + case RAY_IDX_ZONE: + if (ix->parent_type == RAY_F32 || ix->parent_type == RAY_F64) { + int64_t kmin = ray_sym_intern("min", 3); + keys = ray_vec_append(keys, &kmin); + ray_t* mn = ray_f64(ix->u.zone.min_f); + vals = ray_list_append(vals, mn); ray_release(mn); + int64_t kmax = ray_sym_intern("max", 3); + keys = ray_vec_append(keys, &kmax); + ray_t* mx = ray_f64(ix->u.zone.max_f); + vals = ray_list_append(vals, mx); ray_release(mx); + } else { + r = dict_append_sym_i64(&keys, &vals, "min", ix->u.zone.min_i); + if (RAY_IS_ERR(r)) goto fail; + r = dict_append_sym_i64(&keys, &vals, "max", ix->u.zone.max_i); + if (RAY_IS_ERR(r)) goto fail; + } + r = dict_append_sym_i64(&keys, &vals, "n_nulls", ix->u.zone.n_nulls); + if (RAY_IS_ERR(r)) goto fail; + break; + case RAY_IDX_HASH: + r = dict_append_sym_i64(&keys, &vals, "capacity", (int64_t)(ix->u.hash.mask + 1)); + if (RAY_IS_ERR(r)) goto fail; + r = dict_append_sym_i64(&keys, &vals, "n_keys", ix->u.hash.n_keys); + if (RAY_IS_ERR(r)) goto fail; + break; + case RAY_IDX_SORT: + r = dict_append_sym_i64(&keys, &vals, "perm_len", + ix->u.sort.perm ? ix->u.sort.perm->len : 0); + if (RAY_IS_ERR(r)) goto fail; + break; + case RAY_IDX_BLOOM: + r = dict_append_sym_i64(&keys, &vals, "m_bits", (int64_t)(ix->u.bloom.m_mask + 1)); + if (RAY_IS_ERR(r)) goto fail; + r = dict_append_sym_i64(&keys, &vals, "k", (int64_t)ix->u.bloom.k); + if (RAY_IS_ERR(r)) goto fail; + r = dict_append_sym_i64(&keys, &vals, "n_keys", ix->u.bloom.n_keys); + if (RAY_IS_ERR(r)) goto fail; + break; + case RAY_IDX_NONE: + break; + } + + return ray_dict_new(keys, vals); + +fail: + if (!RAY_IS_ERR(keys)) ray_release(keys); + if (!RAY_IS_ERR(vals)) ray_release(vals); + return r; +} + +/* -------------------------------------------------------------------------- + * Rayfall builtins (registered from src/lang/eval.c) + * -------------------------------------------------------------------------- */ + +/* Common entry shape: take a borrowed ref, return an owning ref of the + * (possibly COW-copied) parent. See heap.c:ray_release on rc transfer. */ +static ray_t* attach_via(ray_t* v, ray_t* (*fn)(ray_t**)) { + if (!v || RAY_IS_ERR(v)) return v; + ray_t* w = v; + ray_retain(w); + ray_t* r = fn(&w); + if (RAY_IS_ERR(r)) { ray_release(w); return r; } + return w; +} + +ray_t* ray_idx_zone_fn (ray_t* v) { return attach_via(v, ray_index_attach_zone); } +ray_t* ray_idx_hash_fn (ray_t* v) { return attach_via(v, ray_index_attach_hash); } +ray_t* ray_idx_sort_fn (ray_t* v) { return attach_via(v, ray_index_attach_sort); } +ray_t* ray_idx_bloom_fn(ray_t* v) { return attach_via(v, ray_index_attach_bloom); } + +ray_t* ray_idx_drop_fn(ray_t* v) { + if (!v || RAY_IS_ERR(v)) return v; + ray_t* w = v; + ray_retain(w); + ray_t* r = ray_index_drop(&w); + if (RAY_IS_ERR(r)) { ray_release(w); return r; } + return w; +} + +ray_t* ray_idx_has_fn(ray_t* v) { + return ray_bool(ray_index_has(v) ? 1 : 0); +} + +ray_t* ray_idx_info_fn(ray_t* v) { + return ray_index_info(v); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/idxop.h b/crates/rayforce-sys/vendor/rayforce/src/ops/idxop.h new file mode 100644 index 0000000..5dcc4c3 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/idxop.h @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_IDXOP_H +#define RAY_IDXOP_H + +/* + * idxop.h -- Per-vector accelerator indices. + * + * A vector with RAY_ATTR_HAS_INDEX set carries a child ray_t of type + * RAY_INDEX in its nullmap[0..7] slot. The index ray_t holds: + * - the kind (hash / sort / zone / bloom) + * - kind-specific payload (keys vec, perm vec, min/max, bloom bits) + * - a snapshot of the parent's original 16-byte nullmap union plus + * the relevant attrs bits, so detach can restore the vector to its + * pre-attach state byte-for-byte. + * + * Attach precondition: parent must not be a slice, must not already + * carry an index, must be COW'd to rc==1 by the caller's path. + * + * Mutation invalidates: any in-place write to the parent vector must + * call ray_index_drop() first — a stale index is a wrong-answer bug. + */ + +#include +#include "mem/heap.h" /* RAY_ATTR_HAS_INDEX, RAY_ATTR_NULLMAP_EXT */ + +/* Index kinds. Stored in ray_index_t.kind. */ +typedef enum { + RAY_IDX_NONE = 0, + RAY_IDX_HASH = 1, + RAY_IDX_SORT = 2, + RAY_IDX_ZONE = 3, + RAY_IDX_BLOOM = 4, +} ray_idx_kind_t; + +/* The payload stored inside data[] of a RAY_INDEX ray_t. */ +typedef struct { + uint8_t kind; /* ray_idx_kind_t */ + uint8_t saved_attrs; /* parent attrs & (HAS_NULLS|NULLMAP_EXT) at attach */ + int8_t parent_type; /* parent->type (for restore-time pointer interp) */ + uint8_t reserved; + int64_t built_for_len; /* parent->len at attach (mismatch -> stale) */ + + /* Raw 16-byte snapshot of parent->nullmap union at attach time. + * Restored verbatim on detach. When this contains pointers + * (ext_nullmap, str_pool, sym_dict, str_ext_null) they are owned + * by THIS ray_t for the duration of the attachment; release-side + * of RAY_INDEX walks these based on (parent_type, saved_attrs). */ + uint8_t saved_nullmap[16]; + + /* Kind-specific payload. All ray_t* fields are owning refs. */ + union { + struct { /* RAY_IDX_HASH */ + /* Chained open-addressing. table[mask+1] holds the head rid+1 + * for each bucket (0 = empty bucket). chain[parent->len] holds + * the next rid+1 in the same bucket's chain (0 = end of chain). + * Lookup: hash key, read table[hash & mask] for head, walk chain + * until 0 comparing parent->data[rid] for equality. */ + ray_t* table; /* RAY_I64 vec, capacity entries */ + ray_t* chain; /* RAY_I64 vec, parent->len entries */ + uint64_t mask; /* capacity - 1 (capacity is power of two) */ + int64_t n_keys; /* number of non-null rows indexed */ + } hash; + struct { /* RAY_IDX_SORT */ + ray_t* perm; /* RAY_I64 vec, perm[i] = row id at sorted pos i */ + } sort; + struct { /* RAY_IDX_ZONE */ + int64_t min_i; /* integer min (used when type is int/date/time) */ + int64_t max_i; /* integer max */ + double min_f; /* float min (used when type is f32/f64) */ + double max_f; /* float max */ + int64_t n_nulls; /* number of null rows (0 if no nulls) */ + } zone; + struct { /* RAY_IDX_BLOOM */ + ray_t* bits; /* RAY_U8 vec, m/8 bytes */ + uint64_t m_mask; /* m - 1 (m is power of two, m bits total) */ + uint32_t k; /* number of hash functions */ + uint32_t _pad; + int64_t n_keys; /* number of non-null rows added */ + } bloom; + } u; +} ray_index_t; + +/* Inline accessor — returns ray_index_t* for a RAY_INDEX block. */ +static inline ray_index_t* ray_index_payload(ray_t* idx) { + return (ray_index_t*)idx->data; +} + +/* ===== Attach / Detach ===== */ + +/* Build an accelerator and attach. Numeric types only for v1 + * (BOOL/U8/I16/I32/I64/F32/F64/DATE/TIME/TIMESTAMP — RAY_STR/RAY_SYM/RAY_GUID + * deferred until the str_pool/sym_dict displacement sweep is complete). + * On success, *vp is the (possibly new) parent vector with HAS_INDEX set. + * On failure, *vp is unchanged and a RAY_ERROR is returned. */ +ray_t* ray_index_attach_zone (ray_t** vp); +ray_t* ray_index_attach_hash (ray_t** vp); +ray_t* ray_index_attach_sort (ray_t** vp); +ray_t* ray_index_attach_bloom(ray_t** vp); + +/* Drop any attached index from *vp. No-op if none. Restores the + * pre-attach nullmap state byte-for-byte. Returns *vp. */ +ray_t* ray_index_drop(ray_t** vp); + +/* ===== Introspection ===== */ + +static inline bool ray_index_has(const ray_t* v) { + return v && !RAY_IS_ERR((ray_t*)v) && + (v->attrs & RAY_ATTR_HAS_INDEX) && + v->index != NULL; +} + +/* Returns RAY_IDX_NONE if no index is attached. */ +static inline ray_idx_kind_t ray_index_kind(const ray_t* v) { + if (!ray_index_has(v)) return RAY_IDX_NONE; + return (ray_idx_kind_t)ray_index_payload(v->index)->kind; +} + +/* Returns a fresh RAY_DICT with {kind, length, ...kind-specific...} + * or RAY_NULL_OBJ when no index is attached. */ +ray_t* ray_index_info(ray_t* v); + +/* ===== Internal helpers (used by retain/release/detach in heap.c + * and by mutation paths in vec.c) ===== */ + +/* Release the saved-nullmap pointers carried by a RAY_INDEX ray_t. + * Invoked from ray_release_owned_refs when the index ray_t is freed. */ +void ray_index_release_saved(ray_index_t* ix); + +/* Retain the saved-nullmap pointers carried by a RAY_INDEX ray_t. + * Invoked from ray_retain_owned_refs after a copy of the index ray_t. */ +void ray_index_retain_saved(ray_index_t* ix); + +/* Release per-kind payload children (keys/table/perm/bits...). */ +void ray_index_release_payload(ray_index_t* ix); + +/* Retain per-kind payload children. */ +void ray_index_retain_payload(ray_index_t* ix); + +/* ===== Rayfall builtin entry points (registered from src/lang/eval.c) ===== */ + +ray_t* ray_idx_zone_fn (ray_t* v); /* (.idx.zone v) -> v with zone attached */ +ray_t* ray_idx_hash_fn (ray_t* v); /* (.idx.hash v) -> v with hash attached */ +ray_t* ray_idx_sort_fn (ray_t* v); /* (.idx.sort v) -> v with sort attached */ +ray_t* ray_idx_bloom_fn(ray_t* v); /* (.idx.bloom v) -> v with bloom attached */ +ray_t* ray_idx_drop_fn (ray_t* v); /* (.idx.drop v) -> v with index removed */ +ray_t* ray_idx_has_fn (ray_t* v); /* (.idx.has? v) -> 0b/1b */ +ray_t* ray_idx_info_fn (ray_t* v); /* (.idx.info v) -> dict of metadata */ + +#endif /* RAY_IDXOP_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/internal.h b/crates/rayforce-sys/vendor/rayforce/src/ops/internal.h new file mode 100644 index 0000000..bc0dc1b --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/internal.h @@ -0,0 +1,992 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** Shared helpers for exec.c split — included by expr.c, filter.c, join.c, etc. + * Small hot-path helpers are static inline; larger functions that remain in + * exec.c are declared extern. + */ + +#ifndef RAY_EXEC_INTERNAL_H +#define RAY_EXEC_INTERNAL_H + +#if !defined(RAY_OS_WINDOWS) && !defined(_GNU_SOURCE) +#define _GNU_SOURCE +#endif + +#include "exec.h" +#include "hash.h" +#include "core/pool.h" +#include "core/profile.h" +#include "store/csr.h" +#include "store/hnsw.h" +#include "lftj.h" +#include "mem/heap.h" +#include "table/sym.h" +#include "table/table.h" +#include "vec/str.h" +#include "vec/vec.h" +#include +#include +#include +#include +#include +#include + +/* ══════════════════════════════════════════ + * Parted segment helpers + * ══════════════════════════════════════════ */ + +/* Return attrs of the first non-NULL segment (for SYM width). */ +static inline uint8_t parted_first_attrs(ray_t** segs, int64_t n_segs) { + for (int64_t i = 0; i < n_segs; i++) + if (segs[i]) return segs[i]->attrs; + return 0; +} + +/* Check whether a parted segment's SYM width matches the expected esz. + * For non-SYM types this always returns true (attrs don't affect esz). */ +static inline bool parted_seg_esz_ok(ray_t* seg, int8_t base, uint8_t expected_esz) { + if (!seg) return false; + if (base != RAY_SYM) return true; + return ray_sym_elem_size(base, seg->attrs) == expected_esz; +} + +/* ══════════════════════════════════════════ + * Global profiler + * ══════════════════════════════════════════ */ + +extern ray_profile_t g_ray_profile; + +/* ══════════════════════════════════════════ + * Arena-based scratch allocation helpers + * ══════════════════════════════════════════ */ + +/* Allocate zero-initialized scratch buffer, returns data pointer. + * *hdr_out receives the ray_t* header for later ray_free(). */ +static inline void* scratch_calloc(ray_t** hdr_out, size_t nbytes) { + ray_t* h = ray_alloc(nbytes); + if (!h) { *hdr_out = NULL; return NULL; } + void* p = ray_data(h); + memset(p, 0, nbytes); + *hdr_out = h; + return p; +} + +/* Allocate uninitialized scratch buffer. */ +static inline void* scratch_alloc(ray_t** hdr_out, size_t nbytes) { + ray_t* h = ray_alloc(nbytes); + if (!h) { *hdr_out = NULL; return NULL; } + *hdr_out = h; + return ray_data(h); +} + +/* Reallocate: alloc new, copy old, free old. Returns new data pointer. */ +static inline void* scratch_realloc(ray_t** hdr_out, size_t old_bytes, size_t new_bytes) { + ray_t* old_h = *hdr_out; + ray_t* new_h = ray_alloc(new_bytes); + if (!new_h) return NULL; + void* new_p = ray_data(new_h); + if (old_h) { + memcpy(new_p, ray_data(old_h), old_bytes < new_bytes ? old_bytes : new_bytes); + ray_free(old_h); + } + *hdr_out = new_h; + return new_p; +} + +/* Free a scratch buffer (NULL-safe). */ +static inline void scratch_free(ray_t* hdr) { + if (!hdr) return; + ray_free(hdr); +} + +/* ══════════════════════════════════════════ + * Safe sym intern + * ══════════════════════════════════════════ */ + +/* Safe sym intern for constant column names in graph algorithm result tables. + * Falls back to 0 on failure (column name interning should never fail for + * short constant strings unless ray_sym_init failed). */ +static inline int64_t sym_intern_safe(const char* s, size_t len) { + int64_t id = ray_sym_intern(s, len); + return id >= 0 ? id : 0; +} + +/* ══════════════════════════════════════════ + * Unified column read/write helpers + * ══════════════════════════════════════════ */ + +static inline int64_t read_col_i64(const void* data, int64_t row, + int8_t type, uint8_t attrs) { + switch (type) { + case RAY_I64: case RAY_TIMESTAMP: + return ((const int64_t*)data)[row]; + case RAY_SYM: + switch (attrs & RAY_SYM_W_MASK) { + case RAY_SYM_W8: return (int64_t)((const uint8_t*)data)[row]; + case RAY_SYM_W16: return (int64_t)((const uint16_t*)data)[row]; + case RAY_SYM_W32: return (int64_t)((const uint32_t*)data)[row]; + default: return ((const int64_t*)data)[row]; + } + case RAY_I32: case RAY_DATE: case RAY_TIME: + return (int64_t)((const int32_t*)data)[row]; + case RAY_I16: + return (int64_t)((const int16_t*)data)[row]; + default: /* RAY_BOOL, RAY_U8 */ + return (int64_t)((const uint8_t*)data)[row]; + } +} + +static inline void write_col_i64(void* data, int64_t row, int64_t val, + int8_t type, uint8_t attrs) { + switch (type) { + case RAY_I64: case RAY_TIMESTAMP: + ((int64_t*)data)[row] = val; return; + case RAY_SYM: + ray_write_sym(data, row, (uint64_t)val, type, attrs); return; + case RAY_I32: case RAY_DATE: case RAY_TIME: + ((int32_t*)data)[row] = (int32_t)val; return; + case RAY_I16: + ((int16_t*)data)[row] = (int16_t)val; return; + default: /* RAY_BOOL, RAY_U8 */ + ((uint8_t*)data)[row] = (uint8_t)val; return; + } +} + +/* ══════════════════════════════════════════ + * RAY_SYM-aware column helpers + * ══════════════════════════════════════════ */ + +static inline uint8_t col_esz(const ray_t* col) { + return ray_sym_elem_size(col->type, col->attrs); +} + +/* Fast key reader for DA/sort hot loops: elem_size is pre-computed and + * loop-invariant, so the switch is always perfectly predicted. Avoids the + * ray_read_sym → type dispatch chain (3+ branches per element). */ +static inline int64_t read_by_esz(const void* data, int64_t row, uint8_t esz) { + switch (esz) { + case 1: return (int64_t)((const uint8_t*)data)[row]; + case 2: return (int64_t)((const uint16_t*)data)[row]; + case 4: return (int64_t)((const uint32_t*)data)[row]; + default: return ((const int64_t*)data)[row]; + } +} + +static inline ray_t* col_vec_new(const ray_t* src, int64_t cap) { + if (src->type == RAY_SYM) + return ray_sym_vec_new(src->attrs & RAY_SYM_W_MASK, cap); + return ray_vec_new(src->type, cap); +} + +/* Propagate str_pool from source to gathered result. + * Source may be a slice — resolve to owner's pool. */ +static inline void col_propagate_str_pool(ray_t* dst, const ray_t* src) { + if (src->type != RAY_STR || dst->type != RAY_STR) return; + const ray_t* owner = (src->attrs & RAY_ATTR_SLICE) ? src->slice_parent : src; + if (owner->str_pool) { + if (dst->str_pool) ray_release(dst->str_pool); + ray_retain(owner->str_pool); + dst->str_pool = owner->str_pool; + } +} + +/* Propagate str_pool from parted segments to gathered result. + * All segments must share the same pool for memcpy-gathered results + * to be valid. For multi-pool cases, callers must use the deep-copy + * gather path (parted_gather_str_col) instead. */ +static inline void col_propagate_str_pool_parted(ray_t* dst, ray_t** segs, int64_t n_segs) { + if (dst->type != RAY_STR) return; + for (int64_t i = 0; i < n_segs; i++) { + if (segs[i] && segs[i]->type == RAY_STR && segs[i]->str_pool) { + col_propagate_str_pool(dst, segs[i]); + return; + } + } +} + +/* Check if all non-NULL STR segments share the same str_pool pointer. */ +static inline bool parted_str_single_pool(ray_t** segs, int64_t n_segs) { + ray_t* pool = NULL; + for (int64_t i = 0; i < n_segs; i++) { + if (!segs[i] || segs[i]->type != RAY_STR || !segs[i]->str_pool) continue; + if (!pool) pool = segs[i]->str_pool; + else if (segs[i]->str_pool != pool) return false; + } + return true; +} + +/* ---- Null bitmap propagation helpers ---- */ + +/* Propagate nulls from src to dst via index array: dst[r] gets src's null + * bit at indices[r]. indices may contain -1 for LEFT/OUTER join fill rows + * (those are set null unconditionally). */ +static inline void col_propagate_nulls_gather(ray_t* dst, const ray_t* src, + const int64_t* indices, + int64_t count) { + bool src_has_nulls = (src->attrs & RAY_ATTR_HAS_NULLS) != 0; + for (int64_t r = 0; r < count; r++) { + if (indices[r] < 0 || + (src_has_nulls && ray_vec_is_null((ray_t*)src, indices[r]))) + ray_vec_set_null(dst, r, true); + } +} + +/* Propagate nulls from src[src_off..src_off+count) to dst[dst_off..), + * for contiguous range copies (HEAD, TAIL, range inserts). */ +static inline void col_propagate_nulls_range(ray_t* dst, int64_t dst_off, + const ray_t* src, int64_t src_off, + int64_t count) { + if (!(src->attrs & RAY_ATTR_HAS_NULLS)) return; + for (int64_t i = 0; i < count; i++) { + if (ray_vec_is_null((ray_t*)src, src_off + i)) + ray_vec_set_null(dst, dst_off + i, true); + } +} + +/* Propagate nulls through a boolean filter mask: for each set bit in + * mask[0..src_len), copy the null bit from src to dst[out_idx++]. */ +static inline void col_propagate_nulls_filter(ray_t* dst, const ray_t* src, + const uint8_t* mask, + int64_t src_len) { + if (!(src->attrs & RAY_ATTR_HAS_NULLS)) return; + int64_t out = 0; + for (int64_t i = 0; i < src_len; i++) { + if (mask[i]) { + if (ray_vec_is_null((ray_t*)src, i)) + ray_vec_set_null(dst, out, true); + out++; + } + } +} + +/* Append one string element from a parted segment, preserving nulls. */ +static inline ray_t* parted_str_append_elem(ray_t* out, ray_t* seg, + int64_t local_idx, + const char* pool_base) { + if ((seg->attrs & RAY_ATTR_HAS_NULLS) && ray_vec_is_null(seg, local_idx)) { + out = ray_str_vec_append(out, "", 0); + if (!RAY_IS_ERR(out)) + ray_vec_set_null(out, out->len - 1, true); + } else { + ray_str_t* elems = (ray_str_t*)ray_data(seg); + const char* str = ray_str_t_ptr(&elems[local_idx], pool_base); + out = ray_str_vec_append(out, str, elems[local_idx].len); + } + return out; +} + +/* Deep-copy gather from parted RAY_STR segments by row index. + * Resolves each string from its source segment's pool and appends + * into the output vector's own pool. Safe for multi-pool segments. */ +static inline ray_t* parted_gather_str_rows(ray_t** segs, int64_t n_segs, + const int64_t* row_indices, + int64_t count) { + /* Build prefix-sum segment boundaries */ + int64_t cumul = 0; + int64_t stack_ends[64]; + int64_t* seg_ends = (n_segs <= 64) ? stack_ends : NULL; + ray_t* ends_hdr = NULL; + if (!seg_ends) { + seg_ends = (int64_t*)scratch_alloc(&ends_hdr, (size_t)n_segs * sizeof(int64_t)); + if (!seg_ends) return ray_error("oom", NULL); + } + for (int64_t i = 0; i < n_segs; i++) { + cumul += (segs[i]) ? segs[i]->len : 0; + seg_ends[i] = cumul; + } + + ray_t* out = ray_vec_new(RAY_STR, count); + if (!out || RAY_IS_ERR(out)) { if (ends_hdr) scratch_free(ends_hdr); return out; } + + int64_t seg = 0; + for (int64_t i = 0; i < count; i++) { + int64_t row = row_indices[i]; + while (seg < n_segs - 1 && row >= seg_ends[seg]) seg++; + if (!segs[seg]) { + out = ray_str_vec_append(out, "", 0); + if (!RAY_IS_ERR(out)) + ray_vec_set_null(out, out->len - 1, true); + } else { + int64_t seg_start = (seg > 0) ? seg_ends[seg - 1] : 0; + int64_t local = row - seg_start; + const char* pool_base = segs[seg]->str_pool + ? (const char*)ray_data(segs[seg]->str_pool) : NULL; + out = parted_str_append_elem(out, segs[seg], local, pool_base); + } + if (RAY_IS_ERR(out)) { if (ends_hdr) scratch_free(ends_hdr); return out; } + } + if (ends_hdr) scratch_free(ends_hdr); + return out; +} + +/* Deep-copy head (first n rows) from parted RAY_STR segments. */ +static inline ray_t* parted_head_str(ray_t** segs, int64_t n_segs, int64_t n) { + ray_t* out = ray_vec_new(RAY_STR, n); + if (!out || RAY_IS_ERR(out)) return out; + int64_t remaining = n; + for (int64_t s = 0; s < n_segs && remaining > 0; s++) { + if (!segs[s]) continue; + int64_t seg_len = segs[s]->len; + int64_t take = (seg_len > remaining) ? remaining : seg_len; + const char* pool_base = segs[s]->str_pool + ? (const char*)ray_data(segs[s]->str_pool) : NULL; + for (int64_t i = 0; i < take; i++) { + out = parted_str_append_elem(out, segs[s], i, pool_base); + if (RAY_IS_ERR(out)) return out; + } + remaining -= take; + } + return out; +} + +/* Deep-copy tail (last n rows) from parted RAY_STR segments. */ +static inline ray_t* parted_tail_str(ray_t** segs, int64_t n_segs, int64_t n) { + /* First pass: count total rows to find start offset */ + int64_t total = 0; + for (int64_t s = 0; s < n_segs; s++) + if (segs[s]) total += segs[s]->len; + int64_t skip = total - n; + if (skip < 0) { skip = 0; n = total; } + + ray_t* out = ray_vec_new(RAY_STR, n); + if (!out || RAY_IS_ERR(out)) return out; + int64_t skipped = 0; + for (int64_t s = 0; s < n_segs; s++) { + if (!segs[s]) continue; + int64_t seg_len = segs[s]->len; + int64_t seg_start = 0; + if (skipped + seg_len <= skip) { skipped += seg_len; continue; } + if (skipped < skip) { seg_start = skip - skipped; skipped = skip; } + const char* pool_base = segs[s]->str_pool + ? (const char*)ray_data(segs[s]->str_pool) : NULL; + for (int64_t i = seg_start; i < seg_len; i++) { + out = parted_str_append_elem(out, segs[s], i, pool_base); + if (RAY_IS_ERR(out)) return out; + } + skipped += seg_len; + } + return out; +} + +/* Deep-copy flatten all rows from parted RAY_STR segments. */ +static inline ray_t* parted_flatten_str(ray_t** segs, int64_t n_segs, int64_t total) { + ray_t* out = ray_vec_new(RAY_STR, total); + if (!out || RAY_IS_ERR(out)) return out; + for (int64_t s = 0; s < n_segs; s++) { + if (!segs[s] || segs[s]->len <= 0) continue; + const char* pool_base = segs[s]->str_pool + ? (const char*)ray_data(segs[s]->str_pool) : NULL; + for (int64_t i = 0; i < segs[s]->len; i++) { + out = parted_str_append_elem(out, segs[s], i, pool_base); + if (RAY_IS_ERR(out)) return out; + } + } + return out; +} + +/* Same but from explicit type + attrs (for parted base type, etc.) */ +static inline ray_t* typed_vec_new(int8_t type, uint8_t attrs, int64_t cap) { + if (type == RAY_SYM) + return ray_sym_vec_new(attrs & RAY_SYM_W_MASK, cap); + return ray_vec_new(type, cap); +} + +/* ══════════════════════════════════════════ + * Cancellation check + * ══════════════════════════════════════════ */ + +static inline bool pool_cancelled(ray_pool_t* pool) { + if (RAY_UNLIKELY(ray_interrupted())) return true; + return pool && RAY_UNLIKELY(atomic_load_explicit(&pool->cancelled, + memory_order_relaxed)); +} + +#define CHECK_CANCEL(pool) \ + do { if (pool_cancelled(pool)) \ + return ray_error("cancel", NULL); } while(0) + +#define CHECK_CANCEL_GOTO(pool, lbl) \ + do { if (pool_cancelled(pool)) { \ + result = ray_error("cancel", NULL); \ + goto lbl; \ + } \ + } while(0) + +/* ══════════════════════════════════════════ + * Graph helper: find extended node + * ══════════════════════════════════════════ */ + +static inline ray_op_ext_t* find_ext(ray_graph_t* g, uint32_t node_id) { + for (uint32_t i = 0; i < g->ext_count; i++) { + if (g->ext_nodes[i] && g->ext_nodes[i]->base.id == node_id) + return g->ext_nodes[i]; + } + return NULL; +} + +/* ══════════════════════════════════════════ + * String helpers + * ══════════════════════════════════════════ */ + +/* Convert an atom (-RAY_STR or RAY_SYM scalar) to ray_str_t for comparison */ +static inline void atom_to_str_t(ray_t* atom, ray_str_t* out, const char** out_pool) { + const char* sp; + size_t sl; + if (atom->type == -RAY_STR) { + sp = ray_str_ptr(atom); + sl = ray_str_len(atom); + } else if (atom->type == RAY_STR) { + /* Length-1 RAY_STR vector used as scalar */ + if (atom->len < 1) { + memset(out, 0, sizeof(ray_str_t)); + *out_pool = NULL; + return; + } + /* Resolve slice to parent data — slices have no data of their own, + * and str_pool shares the union with slice_offset. */ + ray_t* src = atom; + int64_t idx = 0; + if (atom->attrs & RAY_ATTR_SLICE) { + src = atom->slice_parent; + idx = atom->slice_offset; + } + const ray_str_t* elems = (const ray_str_t*)ray_data(src); + *out = elems[idx]; + *out_pool = src->str_pool ? (const char*)ray_data(src->str_pool) : NULL; + return; + } else if (RAY_IS_SYM(atom->type) && ray_is_atom(atom)) { + /* SAFETY: ray_sym_str returns a borrowed pointer into the append-only + * sym table. The pointer is valid for the lifetime of the sym table + * (i.e., the entire query execution). If the sym table ever gains + * eviction, this must retain the returned atom. */ + ray_t* s = ray_sym_str(atom->i64); + sp = s ? ray_str_ptr(s) : ""; + sl = s ? ray_str_len(s) : 0; + } else { + sp = ""; sl = 0; + } + memset(out, 0, sizeof(ray_str_t)); + out->len = (uint32_t)sl; + if (sl <= RAY_STR_INLINE_MAX) { + if (sl > 0) memcpy(out->data, sp, sl); + *out_pool = NULL; + } else { + memcpy(out->prefix, sp, 4); + out->pool_off = 0; + *out_pool = sp; /* point directly at atom's string data */ + } +} + +/* Resolve RAY_STR vec to data owner, accounting for slices. + * Returns element pointer (already offset for slices) and pool pointer. */ +static inline void str_resolve(const ray_t* v, const ray_str_t** elems, + const char** pool) { + const ray_t* owner = (v->attrs & RAY_ATTR_SLICE) ? v->slice_parent : v; + int64_t base = (v->attrs & RAY_ATTR_SLICE) ? v->slice_offset : 0; + *elems = (const ray_str_t*)ray_data((ray_t*)owner) + base; + *pool = owner->str_pool ? (const char*)ray_data(owner->str_pool) : NULL; +} + +/* Helper: resolve sym/enum element to string */ +static inline void sym_elem(const ray_t* input, int64_t i, + const char** out_str, size_t* out_len) { + int64_t sym_id = ray_read_sym(ray_data((ray_t*)input), i, input->type, input->attrs); + ray_t* atom = ray_sym_str(sym_id); + if (!atom) { *out_str = ""; *out_len = 0; return; } + *out_str = ray_str_ptr(atom); + *out_len = ray_str_len(atom); +} + +/* ══════════════════════════════════════════ + * Shared types — used by expr.c and exec.c + * ══════════════════════════════════════════ */ + +typedef struct { + bool enabled; + double bias_f64; + int64_t bias_i64; +} agg_affine_t; + +#define AGG_LINEAR_MAX_TERMS 8 + +typedef struct { + bool enabled; + uint8_t n_terms; + void* term_ptrs[AGG_LINEAR_MAX_TERMS]; + int8_t term_types[AGG_LINEAR_MAX_TERMS]; + int64_t coeff_i64[AGG_LINEAR_MAX_TERMS]; + int64_t bias_i64; +} agg_linear_t; + +typedef struct { + uint8_t n_terms; + int64_t syms[AGG_LINEAR_MAX_TERMS]; + int64_t coeff_i64[AGG_LINEAR_MAX_TERMS]; + int64_t bias_i64; +} linear_expr_i64_t; + +/* ── Expression compiler types ── */ + +#define EXPR_MAX_REGS 16 +#define EXPR_MAX_INS 48 +#define EXPR_MORSEL RAY_MORSEL_ELEMS + +typedef struct { + uint8_t opcode; /* OP_ADD, OP_NEG, OP_CAST, etc. */ + uint8_t dst; /* destination register */ + uint8_t src1; /* source 1 register */ + uint8_t src2; /* source 2 register (0xFF for unary) */ +} expr_ins_t; + +enum { REG_SCAN = 0, REG_CONST = 1, REG_SCRATCH = 2 }; + +typedef struct { + uint8_t n_ins; + uint8_t n_regs; + uint8_t n_scratch; /* scratch registers needed */ + uint8_t out_reg; + int8_t out_type; /* RAY_F64, RAY_I64, or RAY_BOOL */ + bool has_parted; /* true if any REG_SCAN refs a parted column */ + struct { + uint8_t kind; /* REG_SCAN / REG_CONST / REG_SCRATCH */ + int8_t type; /* computational type: RAY_F64 / RAY_I64 / RAY_BOOL */ + int8_t col_type; /* original column type (REG_SCAN only) */ + uint8_t col_attrs; /* column attrs — RAY_SYM width (REG_SCAN only) */ + bool is_parted; /* true if this SCAN refs a parted column */ + const void* data; /* column data pointer (REG_SCAN only) */ + ray_t* parted_col; /* parted wrapper (is_parted only) */ + double const_f64; /* scalar value (REG_CONST) */ + int64_t const_i64; /* scalar value (REG_CONST) */ + } regs[EXPR_MAX_REGS]; + expr_ins_t ins[EXPR_MAX_INS]; +} ray_expr_t; + +/* ══════════════════════════════════════════ + * Shared gather types — used by filter.c, exec.c (sort, join) + * ══════════════════════════════════════════ */ + +#define MGATHER_MAX_COLS 16 + +typedef struct { + const int64_t* idx; + char* srcs[MGATHER_MAX_COLS]; + char* dsts[MGATHER_MAX_COLS]; + uint8_t esz[MGATHER_MAX_COLS]; + int64_t ncols; +} multi_gather_ctx_t; + +typedef struct { + int64_t* idx; + ray_t* src_col; + ray_t* dst_col; + uint8_t esz; + bool nullable; /* true = idx may contain -1 (LEFT JOIN nulls) */ +} gather_ctx_t; + +/* ══════════════════════════════════════════ + * Shared sort types and constants — used by sort_exec.c, exec.c (window) + * ══════════════════════════════════════════ */ + +#define RADIX_SORT_THRESHOLD 4096 /* switch from comparison to radix sort */ +#define SMALL_POOL_THRESHOLD 8192 /* skip pool dispatch below this size */ +#define NEARLY_SORTED_FRAC 0.05 /* threshold for nearly-sorted detection */ +#define MK_PRESCAN_MAX_KEYS 8 /* max sort keys for stack allocation */ + +typedef struct { + ray_t** vecs; + uint8_t* desc; + uint8_t* nulls_first; + uint8_t n_sort; +} sort_cmp_ctx_t; + +/* Radix pass context (shared across histogram + scatter phases) */ +typedef struct { + const uint64_t* keys; + const int64_t* idx; + uint64_t* keys_out; + int64_t* idx_out; + int64_t n; + uint8_t shift; + uint32_t n_tasks; + uint32_t* hist; /* flat [n_tasks * 256] */ + int64_t* offsets; /* flat [n_tasks * 256] */ +} radix_pass_ctx_t; + +/* Key-encoding context for parallel encode phase */ +typedef struct { + uint64_t* keys; /* output */ + int64_t* indices; /* if non-NULL, initialize indices[i]=i (fused iota) */ + /* Single-key fields: */ + const void* data; /* raw column data */ + ray_t* col; /* source column (for null bitmap access) */ + int8_t type; /* column type */ + uint8_t col_attrs; /* RAY_SYM width attrs */ + bool desc; + bool nulls_first; /* for single-key F64: 1=nulls first */ + /* SYM rank mapping (NULL if not sym): */ + const uint32_t* enum_rank; /* intern_id → sort rank */ + /* Composite-key fields (n_keys > 1): */ + uint8_t n_keys; + ray_t** vecs; + int64_t mins[16]; + int64_t ranges[16]; + uint8_t bit_shifts[16]; /* bit offset for key k in composite */ + uint8_t descs[16]; + const uint32_t* enum_ranks[16]; /* per-key rank mappings */ +} radix_encode_ctx_t; + +/* Parallel multi-key min/max prescan context */ +typedef struct { + ray_t* const* vecs; + uint32_t* const* enum_ranks; + uint8_t n_keys; + int64_t nrows; + uint32_t n_workers; + int64_t* pw_mins; + int64_t* pw_maxs; +} mk_prescan_ctx_t; + +/* Parallel sort phase 1 context */ +typedef struct { + const sort_cmp_ctx_t* cmp_ctx; + int64_t* indices; + int64_t* tmp; + int64_t nrows; + uint32_t n_chunks; +} sort_phase1_ctx_t; + +/* Parallel merge pass context */ +typedef struct { + const sort_cmp_ctx_t* cmp_ctx; + const int64_t* src; + int64_t* dst; + int64_t nrows; + int64_t run_size; +} sort_merge_ctx_t; + +/* Compute the number of significant bytes for radix sort based on type. + * Returns 1..8: the number of byte passes radix_sort_run needs. */ +static inline uint8_t radix_key_bytes(int8_t type) { + switch (type) { + case RAY_BOOL: case RAY_U8: return 1; + case RAY_I16: return 2; + case RAY_I32: case RAY_DATE: case RAY_TIME: return 4; + default: return 8; /* I64, F64, TIMESTAMP, SYM */ + } +} + +/* ══════════════════════════════════════════ + * Extern forward declarations — larger functions in exec.c + * ══════════════════════════════════════════ */ + +/* ── exec.c (gather helpers) ── */ +void multi_gather_fn(void* raw, uint32_t wid, int64_t start, int64_t end); +void gather_fn(void* raw, uint32_t wid, int64_t start, int64_t end); +void partitioned_gather(ray_pool_t* pool, const int64_t* idx, int64_t n, + int64_t src_rows, char** srcs, char** dsts, + const uint8_t* esz, int64_t ncols); + +/* ── filter.c ── */ +ray_t* exec_filter(ray_graph_t* g, ray_op_t* op, ray_t* input, ray_t* pred); +ray_t* exec_filter_head(ray_t* input, ray_t* pred, int64_t limit); +ray_t* sel_compact(ray_graph_t* g, ray_t* tbl, ray_t* sel); + +/* ── expr.c ── */ +bool try_affine_sumavg_input(ray_graph_t* g, ray_t* tbl, ray_op_t* input_op, + ray_t** out_vec, agg_affine_t* out_affine); +bool try_linear_sumavg_input_i64(ray_graph_t* g, ray_t* tbl, ray_op_t* input_op, + agg_linear_t* out_plan); +bool expr_compile(ray_graph_t* g, ray_t* tbl, ray_op_t* root, ray_expr_t* out); +ray_t* expr_eval_full(const ray_expr_t* expr, int64_t nrows); +ray_t* exec_elementwise_unary(ray_graph_t* g, ray_op_t* op, ray_t* input); +ray_t* exec_elementwise_binary(ray_graph_t* g, ray_op_t* op, ray_t* lhs, ray_t* rhs); + +/* ── sort_exec.c ── */ +int sort_cmp(const sort_cmp_ctx_t* ctx, int64_t a, int64_t b); +void sort_insertion(const sort_cmp_ctx_t* ctx, int64_t* arr, int64_t n); +void sort_merge_recursive(const sort_cmp_ctx_t* ctx, + int64_t* arr, int64_t* tmp, int64_t n); +void sort_phase1_fn(void* arg, uint32_t worker_id, int64_t start, int64_t end); +void sort_merge_fn(void* arg, uint32_t worker_id, int64_t start, int64_t end); +void key_introsort(uint64_t* keys, int64_t* idx, int64_t n); +double detect_sortedness(ray_pool_t* pool, const uint64_t* keys, int64_t n); +uint8_t compute_key_nbytes(ray_pool_t* pool, const uint64_t* keys, + int64_t n, uint8_t type_max); +int64_t* radix_sort_run(ray_pool_t* pool, uint64_t* keys, int64_t* indices, + uint64_t* keys_tmp, int64_t* idx_tmp, + int64_t n, uint8_t n_bytes, + uint64_t** sorted_keys_out); +uint64_t* packed_radix_sort_run(ray_pool_t* pool, uint64_t* data, + uint64_t* tmp, int64_t n, uint8_t n_bytes); +int64_t* msd_radix_sort_run(ray_pool_t* pool, uint64_t* keys, int64_t* indices, + uint64_t* keys_tmp, int64_t* idx_tmp, + int64_t n, uint8_t n_bytes, + uint64_t** sorted_keys_out); +void radix_encode_fn(void* arg, uint32_t wid, int64_t start, int64_t end); +void mk_prescan_fn(void* arg, uint32_t wid, int64_t start, int64_t end); +uint32_t* build_enum_rank(ray_t* col, int64_t nrows, ray_t** hdr_out); +ray_t* exec_sort(ray_graph_t* g, ray_op_t* op, ray_t* tbl, int64_t limit); + +/* ── join.c ── */ +ray_t* exec_join(ray_graph_t* g, ray_op_t* op, ray_t* left_table, ray_t* right_table); +ray_t* exec_antijoin(ray_graph_t* g, ray_op_t* op, + ray_t* left_table, ray_t* right_table); +ray_t* exec_window_join(ray_graph_t* g, ray_op_t* op, + ray_t* left_table, ray_t* right_table); + +/* ── group.c ── */ +ray_t* exec_reduction(ray_graph_t* g, ray_op_t* op, ray_t* input); +ray_t* exec_count_distinct(ray_graph_t* g, ray_op_t* op, ray_t* input); +ray_t* exec_group(ray_graph_t* g, ray_op_t* op, ray_t* tbl, int64_t group_limit); + +/* Group HT types and helpers — shared with pivot (exec.c) */ +#define GHT_NEED_SUM 0x01 +#define GHT_NEED_MIN 0x02 +#define GHT_NEED_MAX 0x04 +#define GHT_NEED_SUMSQ 0x08 + +typedef struct { + uint16_t entry_stride; + uint16_t row_stride; + uint8_t n_keys; + uint8_t n_aggs; + uint8_t n_agg_vals; + uint8_t need_flags; + uint8_t agg_is_f64; + uint8_t agg_is_first; + uint8_t agg_is_last; + int8_t agg_val_slot[8]; + uint16_t off_sum; + uint16_t off_min; + uint16_t off_max; + uint16_t off_sumsq; + /* Wide-key support: bit k set iff key k does not fit in 8 bytes + * (e.g. RAY_GUID = 16 B). For wide keys the 8-byte key slot + * stores a source-row index and the actual key bytes live in the + * original column, so probe/rehash/scatter must redirect through + * key_data[k]. wide_key_esz[k] is the per-element byte size of + * the source column. */ + uint8_t wide_key_mask; + uint8_t wide_key_esz[8]; +} ght_layout_t; + +typedef struct { + uint32_t* slots; + uint32_t ht_cap; + char* rows; + uint32_t grp_count; + uint32_t grp_cap; + ght_layout_t layout; + /* Non-NULL only when layout.wide_key_mask != 0. Pointers into + * the original key columns (slice-unaware raw data), used by + * group_probe_entry / group_ht_rehash to resolve row-indexed + * wide keys. */ + void* key_data[8]; + ray_t* _h_slots; + ray_t* _h_rows; + uint8_t oom; /* set by group_probe_entry on grow failure */ +} group_ht_t; + +/* Row-level accessors for group HT rows */ +#define HT_SALT(h) ((uint8_t)((h) >> 56)) +#define HT_EMPTY UINT32_MAX +#define HT_PACK(salt, gid) (((uint32_t)(uint8_t)(salt) << 24) | ((gid) & 0xFFFFFF)) +#define HT_GID(s) ((s) & 0xFFFFFF) +#define HT_SALT_V(s) ((uint8_t)((s) >> 24)) + +#define ROW_RD_F64(row, off, slot) (((const double*)((const void*)((row) + (off))))[(slot)]) +#define ROW_RD_I64(row, off, slot) (((const int64_t*)((const void*)((row) + (off))))[(slot)]) +#define ROW_WR_F64(row, off, slot) (((double*)((void*)((row) + (off))))[(slot)]) +#define ROW_WR_I64(row, off, slot) (((int64_t*)((void*)((row) + (off))))[(slot)]) + +ght_layout_t ght_compute_layout(uint8_t n_keys, uint8_t n_aggs, + ray_t** agg_vecs, uint8_t need_flags, + const uint16_t* agg_ops, + const int8_t* key_types); +bool group_ht_init(group_ht_t* ht, uint32_t cap, const ght_layout_t* ly); +void group_ht_free(group_ht_t* ht); +/* Hash-aggregate rows [start, end) into ht. + * + * When match_idx is non-NULL, the loop iterates `i` in [start, end) + * and reads `row = match_idx[i]` — start/end index the selection + * space (number of passing rows), not the source column length. + * When match_idx is NULL, `row = i` — iterating directly over source + * column rows (no selection). */ +void group_rows_range(group_ht_t* ht, void** key_data, int8_t* key_types, + uint8_t* key_attrs, ray_t** key_vecs, ray_t** agg_vecs, + int64_t start, int64_t end, + const int64_t* match_idx); + +/* ══════════════════════════════════════════ + * Pivot ingest — shared parallel hash-aggregate path. + * + * Runs the same radix pipeline exec_group uses (phases 1+2), leaving + * the result in a set of per-partition HTs with prefix offsets. Phase + * 3 is left to the caller so pivot can restructure the output. For + * small inputs or when no thread pool is available, falls back to a + * single sequential HT transparently — the caller iterates + * part_hts[0..n_parts) the same way either way. + * ══════════════════════════════════════════ */ + +typedef struct { + group_ht_t* part_hts; /* n_parts entries */ + uint32_t* part_offsets; /* n_parts+1 entries (prefix sums of grp_counts) */ + uint32_t n_parts; /* 1 when sequential, RADIX_P when parallel */ + uint32_t total_grps; + uint16_t row_stride; + + /* Internal cleanup state — do not touch from callers. */ + ray_t* _part_hts_hdr; + ray_t* _offsets_hdr; + void* _radix_bufs; /* radix_buf_t* — allocated only on parallel path */ + ray_t* _radix_bufs_hdr; + size_t _n_bufs; +} pivot_ingest_t; + +/* Run parallel (or sequential-fallback) hash aggregation for pivot. + * Returns true on success, false on unrecoverable OOM. On true the + * caller must eventually call pivot_ingest_free(). Cancellation is + * propagated via ray_interrupted() — callers should check that too. */ +bool pivot_ingest_run(pivot_ingest_t* out, + const ght_layout_t* ly, + void** key_data, int8_t* key_types, uint8_t* key_attrs, + ray_t** key_vecs, ray_t** agg_vecs, + int64_t n_scan); + +void pivot_ingest_free(pivot_ingest_t* out); + +/* ── window.c ── */ +ray_t* exec_window(ray_graph_t* g, ray_op_t* op, ray_t* tbl); + +/* ── graph_exec.c ── */ +ray_t* exec_expand(ray_graph_t* g, ray_op_t* op, ray_t* src_vec); +ray_t* exec_var_expand(ray_graph_t* g, ray_op_t* op, ray_t* start_vec); +ray_t* exec_shortest_path(ray_graph_t* g, ray_op_t* op, + ray_t* src_val, ray_t* dst_val); +ray_t* exec_pagerank(ray_graph_t* g, ray_op_t* op); +ray_t* exec_connected_comp(ray_graph_t* g, ray_op_t* op); +ray_t* exec_dijkstra(ray_graph_t* g, ray_op_t* op, + ray_t* src_val, ray_t* dst_val); +ray_t* exec_wco_join(ray_graph_t* g, ray_op_t* op); +ray_t* exec_louvain(ray_graph_t* g, ray_op_t* op); +ray_t* exec_degree_cent(ray_graph_t* g, ray_op_t* op); +ray_t* exec_topsort(ray_graph_t* g, ray_op_t* op); +ray_t* exec_cluster_coeff(ray_graph_t* g, ray_op_t* op); +ray_t* exec_betweenness(ray_graph_t* g, ray_op_t* op); +ray_t* exec_closeness(ray_graph_t* g, ray_op_t* op); +ray_t* exec_mst(ray_graph_t* g, ray_op_t* op); +ray_t* exec_random_walk(ray_graph_t* g, ray_op_t* op, ray_t* src_val); +ray_t* exec_dfs(ray_graph_t* g, ray_op_t* op, ray_t* src_val); +ray_t* exec_astar(ray_graph_t* g, ray_op_t* op, + ray_t* src_val, ray_t* dst_val); +ray_t* exec_k_shortest(ray_graph_t* g, ray_op_t* op, + ray_t* src_val, ray_t* dst_val); + +/* ── pivot_exec.c ── */ +ray_t* exec_if(ray_graph_t* g, ray_op_t* op); +ray_t* exec_pivot(ray_graph_t* g, ray_op_t* op, ray_t* tbl); + +/* ── embedding_exec.c ── */ +ray_t* exec_cosine_sim(ray_graph_t* g, ray_op_t* op, ray_t* emb_vec); +ray_t* exec_euclidean_dist(ray_graph_t* g, ray_op_t* op, ray_t* emb_vec); +ray_t* exec_knn(ray_graph_t* g, ray_op_t* op, ray_t* emb_vec); +ray_t* exec_hnsw_knn(ray_graph_t* g, ray_op_t* op); +ray_t* exec_ann_rerank(ray_graph_t* g, ray_op_t* op, ray_t* src); +ray_t* exec_knn_rerank(ray_graph_t* g, ray_op_t* op, ray_t* src); + +/* ── temporal_exec.c ── */ +ray_t* exec_extract(ray_graph_t* g, ray_op_t* op); +ray_t* exec_date_trunc(ray_graph_t* g, ray_op_t* op); + +/* ── string_exec.c ── */ +ray_t* exec_like(ray_graph_t* g, ray_op_t* op); +ray_t* exec_ilike(ray_graph_t* g, ray_op_t* op); +ray_t* exec_string_unary(ray_graph_t* g, ray_op_t* op); +ray_t* exec_strlen(ray_graph_t* g, ray_op_t* op); +ray_t* exec_substr(ray_graph_t* g, ray_op_t* op); +ray_t* exec_replace(ray_graph_t* g, ray_op_t* op); +ray_t* exec_concat(ray_graph_t* g, ray_op_t* op); + +/* ── exec.c ── */ +ray_t* materialize_mapcommon(ray_t* mc); +ray_t* materialize_mapcommon_head(ray_t* mc, int64_t n); +ray_t* materialize_mapcommon_filter(ray_t* mc, ray_t* pred, int64_t pass_count); +ray_t* broadcast_scalar(ray_t* atom, int64_t nrows); +ray_t* exec_node(ray_graph_t* g, ray_op_t* op); + +/* ══════════════════════════════════════════ + * Thread-safe null bitmap helpers (parallel group/window) + * ══════════════════════════════════════════ */ + +/* Atomically set a null bit. For idx >= 128 without ext nullmap, falls back + * to ray_vec_set_null (lazy alloc). Safe because OOM forces sequential path. */ +static inline void par_set_null(ray_t* vec, int64_t idx) { + if (!(vec->attrs & RAY_ATTR_NULLMAP_EXT)) { + if (idx >= 128) { + ray_vec_set_null(vec, idx, true); + return; + } + int byte_idx = (int)(idx / 8); + int bit_idx = (int)(idx % 8); + __atomic_fetch_or(&vec->nullmap[byte_idx], + (uint8_t)(1u << bit_idx), __ATOMIC_RELAXED); + return; + } + ray_t* ext = vec->ext_nullmap; + uint8_t* bits = (uint8_t*)ray_data(ext); + int byte_idx = (int)(idx / 8); + int bit_idx = (int)(idx % 8); + __atomic_fetch_or(&bits[byte_idx], + (uint8_t)(1u << bit_idx), __ATOMIC_RELAXED); +} + +/* Pre-allocate external nullmap so parallel threads can set bits safely. */ +static inline ray_err_t par_prepare_nullmap(ray_t* vec) { + if (vec->len <= 128) return RAY_OK; + ray_err_t err = ray_vec_set_null_checked(vec, 0, true); + if (err != RAY_OK) return err; + ray_vec_set_null_checked(vec, 0, false); + vec->attrs &= (uint8_t)~RAY_ATTR_HAS_NULLS; + return RAY_OK; +} + +/* Scan nullmap after parallel execution; set RAY_ATTR_HAS_NULLS if any bit set. */ +static inline void par_finalize_nulls(ray_t* vec) { + if (vec->attrs & RAY_ATTR_NULLMAP_EXT) { + ray_t* ext = vec->ext_nullmap; + uint8_t* bits = (uint8_t*)ray_data(ext); + int64_t nbytes = (vec->len + 7) / 8; + for (int64_t i = 0; i < nbytes; i++) { + if (bits[i]) { vec->attrs |= RAY_ATTR_HAS_NULLS; return; } + } + } else { + int64_t nbytes = (vec->len + 7) / 8; + if (nbytes > 16) nbytes = 16; + for (int64_t i = 0; i < nbytes; i++) { + if (vec->nullmap[i]) { vec->attrs |= RAY_ATTR_HAS_NULLS; return; } + } + } +} + +#endif /* RAY_EXEC_INTERNAL_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/join.c b/crates/rayforce-sys/vendor/rayforce/src/ops/join.c new file mode 100644 index 0000000..21baa4a --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/join.c @@ -0,0 +1,1972 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ops/internal.h" + +/* ── Hash helper (shared by radix and chained HT join paths) ──────────── */ + +static uint64_t hash_row_keys(ray_t** key_vecs, uint8_t n_keys, int64_t row) { + uint64_t h = 0; + for (uint8_t k = 0; k < n_keys; k++) { + ray_t* col = key_vecs[k]; + if (!col) continue; + /* NULL key — produce unique hash that won't match any other row */ + if (ray_vec_is_null(col, row)) + return h ^ ((uint64_t)row * 0x9E3779B97F4A7C15ULL); + uint64_t kh; + if (col->type == RAY_F64) + kh = ray_hash_f64(((double*)ray_data(col))[row]); + else + kh = ray_hash_i64(read_col_i64(ray_data(col), row, col->type, col->attrs)); + h = (k == 0) ? kh : ray_hash_combine(h, kh); + } + return h; +} + +/* ============================================================================ + * Radix-partitioned hash join + * + * Four-phase pipeline: + * Phase 1: Partition both sides by radix bits of hash (parallel) + * Phase 2: Per-partition build + probe with open-addressing HT (parallel) + * Phase 3: Gather output columns from matched pairs (parallel) + * Phase 4: Fallback to chained HT for small joins (< RAY_PARALLEL_THRESHOLD) + * ============================================================================ */ + +/* Partition entry: row index + cached hash */ +typedef struct { + uint32_t row_idx; + uint32_t hash; +} join_radix_entry_t; + +/* Per-partition descriptor */ +typedef struct { + join_radix_entry_t* entries; /* partition buffer (from ray_alloc) */ + ray_t* entries_hdr; /* ray_alloc header for freeing */ + uint32_t count; /* number of entries in partition */ +} join_radix_part_t; + +/* Choose radix bits so each partition's HT working set fits in cache. + * HT working set per partition ≈ 2x right entries × 8B = 16B per right row. */ +static uint8_t radix_join_bits(int64_t right_rows) { + /* HT working set: 2x capacity × 8B slot = 16B per right row */ + size_t right_bytes = (size_t)right_rows * 16; + if (right_bytes <= RAY_JOIN_L2_TARGET) + return RAY_JOIN_MIN_RADIX; + + /* R = ceil(log2(right_bytes / L2_TARGET)) */ + uint8_t r = 0; + size_t target = RAY_JOIN_L2_TARGET; + while (target < right_bytes && r < RAY_JOIN_MAX_RADIX) { + target *= 2; + r++; + } + if (r < RAY_JOIN_MIN_RADIX) r = RAY_JOIN_MIN_RADIX; + return r; +} + +/* Context for parallel hash pre-computation */ +typedef struct { + ray_t** key_vecs; + uint8_t n_keys; + uint32_t* hashes; /* output: hash[row] */ +} join_radix_hash_ctx_t; + +static void join_radix_hash_fn(void* raw, uint32_t wid, int64_t start, int64_t end) { + (void)wid; + join_radix_hash_ctx_t* c = (join_radix_hash_ctx_t*)raw; + for (int64_t r = start; r < end; r++) + c->hashes[r] = (uint32_t)hash_row_keys(c->key_vecs, c->n_keys, r); +} + +/* Context for parallel partition histogram + scatter (pre-computed hashes). + * Uses fixed row assignment: task i processes rows [i*chunk, (i+1)*chunk). + * This ensures histogram and scatter see the same row ranges per task, + * enabling non-atomic per-worker scatter offsets. */ +typedef struct { + uint32_t* hashes; + uint32_t radix_mask; + uint8_t radix_shift; + uint32_t n_parts; + uint32_t n_workers; + int64_t nrows; + uint32_t* histograms; /* [n_workers][n_parts] flat array */ +} join_radix_hist_ctx_t; + +static void join_radix_hist_fn(void* raw, uint32_t wid, int64_t task_start, int64_t task_end) { + (void)wid; (void)task_end; + join_radix_hist_ctx_t* c = (join_radix_hist_ctx_t*)raw; + /* Fixed row range for this task */ + uint32_t tid = (uint32_t)task_start; + int64_t chunk = (c->nrows + (int64_t)c->n_workers - 1) / (int64_t)c->n_workers; + int64_t start = (int64_t)tid * chunk; + int64_t end = start + chunk; + if (end > c->nrows) end = c->nrows; + if (start >= c->nrows) return; + + uint32_t* hist = c->histograms + tid * c->n_parts; + uint32_t mask = c->radix_mask; + uint8_t shift = c->radix_shift; + + for (int64_t r = start; r < end; r++) { + uint32_t part = (c->hashes[r] >> shift) & mask; + hist[part]++; + } +} + +/* Context for parallel partition scatter with write-combining buffers. + * Each worker writes to small local buffers (one per partition). When + * a buffer fills, it flushes to the partition in a burst memcpy. + * This converts random writes into sequential bursts, dramatically + * improving cache utilization. + * + * Uses fixed per-worker row assignments (dispatch_n with n_workers tasks) + * to match histogram phase, eliminating atomic operations. */ +#define WCB_SIZE 64 /* entries per write-combine buffer */ +typedef struct { + uint32_t* hashes; + uint32_t radix_mask; + uint8_t radix_shift; + uint32_t n_parts; + join_radix_part_t* parts; + uint32_t* offsets; /* [n_workers][n_parts] per-worker write positions */ + int64_t nrows; + uint32_t n_workers; + _Atomic(uint8_t) had_error; /* set by any worker on OOM */ +} join_radix_scatter_ctx_t; + +static void join_radix_scatter_fn(void* raw, uint32_t wid, int64_t task_start, int64_t task_end) { + (void)wid; (void)task_end; + join_radix_scatter_ctx_t* c = (join_radix_scatter_ctx_t*)raw; + uint32_t mask = c->radix_mask; + uint8_t shift = c->radix_shift; + uint32_t n_parts = c->n_parts; + + /* Fixed row range for this task (matches histogram) */ + uint32_t tid = (uint32_t)task_start; + int64_t chunk = (c->nrows + (int64_t)c->n_workers - 1) / (int64_t)c->n_workers; + int64_t ws = (int64_t)tid * chunk; + int64_t we = ws + chunk; + if (we > c->nrows) we = c->nrows; + if (ws >= c->nrows) return; + + uint32_t* off = c->offsets + tid * n_parts; + + /* Write-combining: per-partition local buffers, flushed in bursts */ + uint32_t wcb_cnt_stack[1024]; + uint32_t* wcb_cnt_p = wcb_cnt_stack; + ray_t* wcb_cnt_hdr = NULL; + if (n_parts > 1024) { + wcb_cnt_p = (uint32_t*)scratch_calloc(&wcb_cnt_hdr, (size_t)n_parts * sizeof(uint32_t)); + if (!wcb_cnt_p) { + atomic_store_explicit(&c->had_error, 1, memory_order_relaxed); + return; + } + } else { + memset(wcb_cnt_stack, 0, (size_t)n_parts * sizeof(uint32_t)); + } + + /* Allocate per-partition local buffers */ + ray_t* local_hdr = NULL; + join_radix_entry_t* local_buf = (join_radix_entry_t*)scratch_alloc(&local_hdr, + (size_t)n_parts * WCB_SIZE * sizeof(join_radix_entry_t)); + if (!local_buf) { + /* Fallback: direct write without buffering */ + for (int64_t r = ws; r < we; r++) { + uint32_t h = c->hashes[r]; + uint32_t part = (h >> shift) & mask; + uint32_t pos = off[part]++; + c->parts[part].entries[pos].row_idx = (uint32_t)r; + c->parts[part].entries[pos].hash = h; + } + if (wcb_cnt_hdr) scratch_free(wcb_cnt_hdr); + return; + } + + for (int64_t r = ws; r < we; r++) { + uint32_t h = c->hashes[r]; + uint32_t part = (h >> shift) & mask; + uint32_t idx = wcb_cnt_p[part]; + local_buf[part * WCB_SIZE + idx].row_idx = (uint32_t)r; + local_buf[part * WCB_SIZE + idx].hash = h; + idx++; + if (idx == WCB_SIZE) { + /* Flush buffer to partition */ + memcpy(&c->parts[part].entries[off[part]], + &local_buf[part * WCB_SIZE], + WCB_SIZE * sizeof(join_radix_entry_t)); + off[part] += WCB_SIZE; + idx = 0; + } + wcb_cnt_p[part] = idx; + } + + /* Flush remaining entries */ + for (uint32_t p = 0; p < n_parts; p++) { + uint32_t cnt = wcb_cnt_p[p]; + if (cnt > 0) { + memcpy(&c->parts[p].entries[off[p]], + &local_buf[p * WCB_SIZE], + (size_t)cnt * sizeof(join_radix_entry_t)); + off[p] += cnt; + } + } + + scratch_free(local_hdr); + if (wcb_cnt_hdr) scratch_free(wcb_cnt_hdr); +} + +/* Partition one side of the join. Returns array of join_radix_part_t[n_parts]. + * Caller must free each partition's entries_hdr and the parts array itself. */ +static join_radix_part_t* join_radix_partition(ray_pool_t* pool, int64_t nrows, + uint8_t radix_bits, + uint32_t* hashes, + ray_t** parts_hdr_out) { + uint32_t n_parts = (uint32_t)1 << radix_bits; + uint32_t mask = n_parts - 1; + /* Use upper bits of hash for radix (lower bits used inside partition HT) */ + uint8_t shift = 32 - radix_bits; + + /* Allocate partition descriptor array */ + ray_t* parts_hdr; + join_radix_part_t* parts = (join_radix_part_t*)scratch_calloc(&parts_hdr, + (size_t)n_parts * sizeof(join_radix_part_t)); + if (!parts) { *parts_hdr_out = NULL; return NULL; } + *parts_hdr_out = parts_hdr; + + /* Step 1: Histogram — count rows per partition per worker. + * n_workers must match dispatch: 1 when running serially so that the + * single hist/scatter call covers all rows (chunk = nrows / 1). */ + uint32_t n_workers = (pool && nrows > RAY_PARALLEL_THRESHOLD) ? pool->n_workers + 1 : 1; + ray_t* hist_hdr; + uint32_t* histograms = (uint32_t*)scratch_calloc(&hist_hdr, + (size_t)n_workers * n_parts * sizeof(uint32_t)); + if (!histograms) { scratch_free(parts_hdr); *parts_hdr_out = NULL; return NULL; } + + join_radix_hist_ctx_t hctx = { + .hashes = hashes, + .radix_mask = mask, .radix_shift = shift, + .n_parts = n_parts, .n_workers = n_workers, + .nrows = nrows, + .histograms = histograms, + }; + if (pool && nrows > RAY_PARALLEL_THRESHOLD) + ray_pool_dispatch_n(pool, join_radix_hist_fn, &hctx, n_workers); + else + join_radix_hist_fn(&hctx, 0, 0, 1); + + /* Compute partition sizes (sum across workers) */ + for (uint32_t p = 0; p < n_parts; p++) { + uint32_t total = 0; + for (uint32_t w = 0; w < n_workers; w++) + total += histograms[w * n_parts + p]; + parts[p].count = total; + } + + /* Allocate partition buffers */ + bool oom = false; + for (uint32_t p = 0; p < n_parts; p++) { + if (parts[p].count == 0) continue; + parts[p].entries = (join_radix_entry_t*)scratch_alloc(&parts[p].entries_hdr, + (size_t)parts[p].count * sizeof(join_radix_entry_t)); + if (!parts[p].entries) { + ray_heap_gc(); + ray_heap_release_pages(); + parts[p].entries = (join_radix_entry_t*)scratch_alloc(&parts[p].entries_hdr, + (size_t)parts[p].count * sizeof(join_radix_entry_t)); + if (!parts[p].entries) { oom = true; break; } + } + } + if (oom) { + for (uint32_t p = 0; p < n_parts; p++) + if (parts[p].entries_hdr) scratch_free(parts[p].entries_hdr); + scratch_free(hist_hdr); + scratch_free(parts_hdr); + *parts_hdr_out = NULL; + return NULL; + } + + /* Step 2: Compute per-worker write offsets (prefix sum of histograms). + * For each partition p, worker w's write offset = + * sum(histograms[0..w-1][p]) = global prefix for workers before w. */ + ray_t* off_hdr; + uint32_t* offsets = (uint32_t*)scratch_alloc(&off_hdr, + (size_t)n_workers * n_parts * sizeof(uint32_t)); + if (!offsets) { + for (uint32_t p = 0; p < n_parts; p++) + if (parts[p].entries_hdr) scratch_free(parts[p].entries_hdr); + scratch_free(hist_hdr); + scratch_free(parts_hdr); + *parts_hdr_out = NULL; + return NULL; + } + for (uint32_t p = 0; p < n_parts; p++) { + uint32_t running = 0; + for (uint32_t w = 0; w < n_workers; w++) { + offsets[w * n_parts + p] = running; + running += histograms[w * n_parts + p]; + } + } + + /* Step 3: Scatter rows into partition buffers (fixed row assignment, no atomics) */ + join_radix_scatter_ctx_t sctx = { + .hashes = hashes, + .radix_mask = mask, .radix_shift = shift, + .n_parts = n_parts, .parts = parts, + .offsets = offsets, + .nrows = nrows, .n_workers = n_workers, + .had_error = 0, + }; + if (pool && nrows > RAY_PARALLEL_THRESHOLD) + ray_pool_dispatch_n(pool, join_radix_scatter_fn, &sctx, n_workers); + else + join_radix_scatter_fn(&sctx, 0, 0, 1); + + scratch_free(off_hdr); + scratch_free(hist_hdr); + + if (atomic_load_explicit(&sctx.had_error, memory_order_relaxed)) { + for (uint32_t p = 0; p < n_parts; p++) + if (parts[p].entries_hdr) scratch_free(parts[p].entries_hdr); + scratch_free(parts_hdr); + *parts_hdr_out = NULL; + return NULL; + } + + return parts; +} + +/* ============================================================================ + * Join execution (parallel hash join) + * + * Three-phase pipeline: + * Phase 1 (sequential): Build chained hash table on right side + * Phase 2 (parallel): Two-pass probe — count matches, prefix-sum, fill + * Phase 3 (parallel): Column gather — assemble result columns + * ============================================================================ */ + +/* Key equality helper — shared by count + fill phases */ +static inline bool join_keys_eq(ray_t* const* l_vecs, ray_t* const* r_vecs, uint8_t n_keys, + int64_t l, int64_t r) { + for (uint8_t k = 0; k < n_keys; k++) { + ray_t* lc = l_vecs[k]; + ray_t* rc = r_vecs[k]; + if (!lc || !rc) return false; + /* NULL != NULL in join predicates */ + if (ray_vec_is_null(lc, l) || ray_vec_is_null(rc, r)) return false; + if (lc->type == RAY_F64) { + if (((double*)ray_data(lc))[l] != ((double*)ray_data(rc))[r]) return false; + } else { + if (read_col_i64(ray_data(lc), l, lc->type, lc->attrs) != + read_col_i64(ray_data(rc), r, rc->type, rc->attrs)) return false; + } + } + return true; +} + +/* ── Per-partition open-addressing build + probe ─────────────────────── */ + +#define RADIX_HT_EMPTY UINT32_MAX + +/* Per-partition single-pass build+probe context. + * Each partition writes to its own local output buffer, then results + * are consolidated into contiguous arrays afterward. */ +typedef struct { + join_radix_part_t* l_parts; + join_radix_part_t* r_parts; + ray_t** l_key_vecs; + ray_t** r_key_vecs; + uint8_t n_keys; + uint8_t join_type; + /* Per-partition output: pp_l[p], pp_r[p] are local buffers */ + int32_t** pp_l; /* per-partition left indices (int32_t) */ + int32_t** pp_r; /* per-partition right indices (int32_t) */ + ray_t** pp_l_hdr; /* allocation headers for freeing */ + ray_t** pp_r_hdr; + int64_t* part_counts; /* actual output count per partition */ + uint32_t* pp_cap; /* capacity per partition */ + _Atomic(uint8_t)* matched_right; + _Atomic(uint8_t) had_error; /* set by any partition on OOM */ +} join_radix_bp_ctx_t; + +/* Grow per-partition output buffers (matched pair arrays). + * Returns true on success, false on OOM (sets had_error). */ +static inline bool bp_grow_bufs(join_radix_bp_ctx_t* c, uint32_t p, + int32_t** pl, int32_t** pr, + uint32_t* cap, uint32_t cnt) { + if (cnt < *cap) return true; + if (*cap > UINT32_MAX / 2) { + atomic_store_explicit(&c->had_error, 1, memory_order_relaxed); + return false; + } + uint32_t new_cap = *cap * 2; + ray_t* nl_hdr; ray_t* nr_hdr; + int32_t* nl = (int32_t*)scratch_alloc(&nl_hdr, (size_t)new_cap * sizeof(int32_t)); + int32_t* nr = (int32_t*)scratch_alloc(&nr_hdr, (size_t)new_cap * sizeof(int32_t)); + if (!nl || !nr) { + if (nl_hdr) scratch_free(nl_hdr); + if (nr_hdr) scratch_free(nr_hdr); + atomic_store_explicit(&c->had_error, 1, memory_order_relaxed); + return false; + } + memcpy(nl, *pl, (size_t)cnt * sizeof(int32_t)); + memcpy(nr, *pr, (size_t)cnt * sizeof(int32_t)); + scratch_free(c->pp_l_hdr[p]); scratch_free(c->pp_r_hdr[p]); + *pl = nl; *pr = nr; + c->pp_l_hdr[p] = nl_hdr; c->pp_r_hdr[p] = nr_hdr; + *cap = new_cap; + return true; +} + +static void join_radix_build_probe_fn(void* raw, uint32_t wid, int64_t task_start, int64_t task_end) { + (void)wid; (void)task_end; + join_radix_bp_ctx_t* c = (join_radix_bp_ctx_t*)raw; + uint32_t p = (uint32_t)task_start; + + join_radix_part_t* rp = &c->r_parts[p]; + join_radix_part_t* lp = &c->l_parts[p]; + + if (rp->count == 0) { + /* No right rows — emit unmatched left rows for LEFT/FULL */ + if (c->join_type >= 1 && lp->count > 0) { + uint32_t cap = lp->count; + int32_t* pl = (int32_t*)scratch_alloc(&c->pp_l_hdr[p], (size_t)cap * sizeof(int32_t)); + int32_t* pr = (int32_t*)scratch_alloc(&c->pp_r_hdr[p], (size_t)cap * sizeof(int32_t)); + if (pl && pr) { + for (uint32_t i = 0; i < lp->count; i++) { + pl[i] = (int32_t)lp->entries[i].row_idx; + pr[i] = -1; + } + c->pp_l[p] = pl; c->pp_r[p] = pr; + c->part_counts[p] = lp->count; + c->pp_cap[p] = cap; + } else { + if (c->pp_l_hdr[p]) scratch_free(c->pp_l_hdr[p]); + if (c->pp_r_hdr[p]) scratch_free(c->pp_r_hdr[p]); + c->pp_l_hdr[p] = NULL; c->pp_r_hdr[p] = NULL; + atomic_store_explicit(&c->had_error, 1, memory_order_relaxed); + } + } + return; + } + + /* Allocate per-partition output buffer. + * Capacity = max(left, right) handles 1:1 and 1:N joins. + * For N:M (overflow), we grow by re-allocating. */ + uint32_t init_cap = lp->count > rp->count ? lp->count : rp->count; + if (init_cap < 64) init_cap = 64; + int32_t* pl = (int32_t*)scratch_alloc(&c->pp_l_hdr[p], (size_t)init_cap * sizeof(int32_t)); + int32_t* pr = (int32_t*)scratch_alloc(&c->pp_r_hdr[p], (size_t)init_cap * sizeof(int32_t)); + if (!pl || !pr) { + if (c->pp_l_hdr[p]) scratch_free(c->pp_l_hdr[p]); + if (c->pp_r_hdr[p]) scratch_free(c->pp_r_hdr[p]); + c->pp_l_hdr[p] = NULL; c->pp_r_hdr[p] = NULL; + c->part_counts[p] = 0; + atomic_store_explicit(&c->had_error, 1, memory_order_relaxed); + return; + } + uint32_t cap = init_cap; + uint32_t cnt = 0; + + /* Build open-addressing HT for right partition */ + uint32_t ht_cap = 256; + uint64_t ht_target = (uint64_t)rp->count * 2; + while ((uint64_t)ht_cap < ht_target && ht_cap <= (UINT32_MAX >> 1)) ht_cap *= 2; + if ((uint64_t)ht_cap < ht_target) { + /* Partition too large for open-addressing HT — signal error */ + atomic_store_explicit(&c->had_error, 1, memory_order_relaxed); + c->part_counts[p] = 0; + scratch_free(c->pp_l_hdr[p]); scratch_free(c->pp_r_hdr[p]); + c->pp_l_hdr[p] = NULL; c->pp_r_hdr[p] = NULL; + return; + } + uint32_t ht_mask = ht_cap - 1; + + ray_t* ht_hdr; + uint32_t* ht = (uint32_t*)scratch_calloc(&ht_hdr, (size_t)ht_cap * 2 * sizeof(uint32_t)); + if (!ht) { + atomic_store_explicit(&c->had_error, 1, memory_order_relaxed); + scratch_free(c->pp_l_hdr[p]); scratch_free(c->pp_r_hdr[p]); + c->pp_l_hdr[p] = NULL; c->pp_r_hdr[p] = NULL; + c->part_counts[p] = 0; + return; + } + for (uint32_t s = 0; s < ht_cap; s++) + ht[s * 2 + 1] = RADIX_HT_EMPTY; + + for (uint32_t i = 0; i < rp->count; i++) { + uint32_t h = rp->entries[i].hash; + uint32_t slot = h & ht_mask; + if (i + 4 < rp->count) + __builtin_prefetch(&ht[(rp->entries[i + 4].hash & ht_mask) * 2], 1, 1); + while (ht[slot * 2 + 1] != RADIX_HT_EMPTY) + slot = (slot + 1) & ht_mask; + ht[slot * 2] = h; + ht[slot * 2 + 1] = rp->entries[i].row_idx; + } + + /* Single-pass probe + fill */ + for (uint32_t i = 0; i < lp->count; i++) { + uint32_t h = lp->entries[i].hash; + uint32_t lr = lp->entries[i].row_idx; + uint32_t slot = h & ht_mask; + if (i + 4 < lp->count) + __builtin_prefetch(&ht[(lp->entries[i + 4].hash & ht_mask) * 2], 0, 1); + bool matched = false; + while (ht[slot * 2 + 1] != RADIX_HT_EMPTY) { + if (ht[slot * 2] == h) { + uint32_t rr = ht[slot * 2 + 1]; + if (join_keys_eq(c->l_key_vecs, c->r_key_vecs, c->n_keys, + (int64_t)lr, (int64_t)rr)) { + if (!bp_grow_bufs(c, p, &pl, &pr, &cap, cnt)) + goto done; + pl[cnt] = (int32_t)lr; + pr[cnt] = (int32_t)rr; + cnt++; + matched = true; + if (c->matched_right) + atomic_store_explicit(&c->matched_right[rr], 1, memory_order_relaxed); + } + } + slot = (slot + 1) & ht_mask; + } + if (!matched && c->join_type >= 1) { + if (!bp_grow_bufs(c, p, &pl, &pr, &cap, cnt)) + goto done; + pl[cnt] = (int32_t)lr; + pr[cnt] = -1; + cnt++; + } + } + +done: + scratch_free(ht_hdr); + c->pp_l[p] = pl; c->pp_r[p] = pr; + c->part_counts[p] = cnt; + c->pp_cap[p] = cap; +} + +/* ── Parallel join HT build ───────────────────────────────────────────── + * Workers hash right-side rows in parallel and insert into the shared + * chain-linked hash table using atomic CAS on ht_heads[slot]. + * ht_next[r] is per-row (no contention). Load factor ~0.3 → negligible + * CAS contention. + * ──────────────────────────────────────────────────────────────────── */ + +/* ht_heads is accessed atomically from multiple workers during join build. + * Using _Atomic(uint32_t)* for C11-compliant atomic access. */ +#define JHT_EMPTY UINT32_MAX /* sentinel for empty HT slot/chain end */ + +typedef struct { + _Atomic(uint32_t)* ht_heads; /* shared, protected by atomic CAS */ + uint32_t* ht_next; /* per-row, no contention */ + uint32_t ht_mask; /* ht_cap - 1 */ + ray_t** r_key_vecs; + uint8_t n_keys; + /* ASP-Join: semijoin filter from factorized left side (NULL if N/A) */ + uint64_t* asp_bits; + int64_t asp_key_max; +} join_build_ctx_t; + +static void join_build_fn(void* raw, uint32_t wid, int64_t start, int64_t end) { + (void)wid; + join_build_ctx_t* c = (join_build_ctx_t*)raw; + _Atomic(uint32_t)* heads = c->ht_heads; + uint32_t* restrict next = c->ht_next; + uint32_t mask = c->ht_mask; + + /* ASP-Join: precompute pointer for right-side build filtering */ + uint64_t* asp_bits = c->asp_bits; + int64_t asp_max = c->asp_key_max; + int64_t* rk0 = (asp_bits && c->n_keys == 1) ? (int64_t*)ray_data(c->r_key_vecs[0]) : NULL; + + for (int64_t r = start; r < end; r++) { + /* ASP-Join skip: if right key not in left-side bitmap, skip insert */ + if (rk0 && rk0[r] >= 0 && rk0[r] <= asp_max && + !RAY_SEL_BIT_TEST(asp_bits, rk0[r])) { + next[(uint32_t)r] = JHT_EMPTY; /* mark as unused */ + continue; + } + if (r + 8 < end) { + uint64_t pf_h = hash_row_keys(c->r_key_vecs, c->n_keys, r + 8); + __builtin_prefetch(&heads[(uint32_t)(pf_h & mask)], 1, 1); + } + uint64_t h = hash_row_keys(c->r_key_vecs, c->n_keys, r); + uint32_t slot = (uint32_t)(h & mask); + uint32_t row32 = (uint32_t)r; + uint32_t old = atomic_load_explicit(&heads[slot], memory_order_relaxed); + do { + next[row32] = old; + } while (!atomic_compare_exchange_weak_explicit(&heads[slot], &old, row32, + memory_order_release, memory_order_relaxed)); + } +} + +#define JOIN_MORSEL 8192 + +typedef struct { + _Atomic(uint32_t)* ht_heads; + uint32_t* ht_next; + uint32_t ht_cap; + ray_t** l_key_vecs; + ray_t** r_key_vecs; + uint8_t n_keys; + uint8_t join_type; + int64_t left_rows; + /* Per-morsel counts/offsets (allocated by main thread) */ + int64_t* morsel_counts; + int64_t* morsel_offsets; + /* Shared output arrays (phase 2 fill) */ + int64_t* l_idx; + int64_t* r_idx; + /* FULL OUTER: track which right rows were matched (NULL if not full) */ + _Atomic(uint8_t)* matched_right; + /* S-Join: semijoin filter bitmap (NULL if not applicable) */ + uint64_t* sjoin_bits; + int64_t sjoin_key_max; +} join_probe_ctx_t; + +/* Phase 2a: count matches per morsel */ +static void join_count_fn(void* raw, uint32_t wid, int64_t task_start, int64_t task_end) { + (void)wid; (void)task_end; + join_probe_ctx_t* c = (join_probe_ctx_t*)raw; + uint32_t tid = (uint32_t)task_start; + int64_t row_start = (int64_t)tid * JOIN_MORSEL; + int64_t row_end = row_start + JOIN_MORSEL; + if (row_end > c->left_rows) row_end = c->left_rows; + + /* S-Join: precompute pointer for fast semijoin check */ + uint64_t* sjbits = c->sjoin_bits; + int64_t sjmax = c->sjoin_key_max; + int64_t* lk0 = (sjbits && c->n_keys == 1) ? (int64_t*)ray_data(c->l_key_vecs[0]) : NULL; + + int64_t count = 0; + uint32_t ht_mask = c->ht_cap - 1; + for (int64_t l = row_start; l < row_end; l++) { + /* S-Join skip: if left key not in right-side bitmap, skip probe */ + if (lk0 && lk0[l] >= 0 && lk0[l] <= sjmax && + !RAY_SEL_BIT_TEST(sjbits, lk0[l])) { + if (c->join_type >= 1) count++; /* LEFT/FULL: emit unmatched */ + continue; + } + + if (l + 8 < row_end) { + uint64_t pf_h = hash_row_keys(c->l_key_vecs, c->n_keys, l + 8); + __builtin_prefetch(&c->ht_heads[(uint32_t)(pf_h & ht_mask)], 0, 1); + } + uint64_t h = hash_row_keys(c->l_key_vecs, c->n_keys, l); + uint32_t slot = (uint32_t)(h & ht_mask); + bool matched = false; + for (uint32_t r = c->ht_heads[slot]; r != JHT_EMPTY; r = c->ht_next[r]) { + if (join_keys_eq(c->l_key_vecs, c->r_key_vecs, c->n_keys, l, (int64_t)r)) { + count++; + matched = true; + } + } + if (!matched && c->join_type >= 1) count++; + } + c->morsel_counts[tid] = count; +} + +/* Phase 2b: fill match pairs using pre-computed offsets */ +static void join_fill_fn(void* raw, uint32_t wid, int64_t task_start, int64_t task_end) { + (void)wid; (void)task_end; + join_probe_ctx_t* c = (join_probe_ctx_t*)raw; + uint32_t tid = (uint32_t)task_start; + int64_t row_start = (int64_t)tid * JOIN_MORSEL; + int64_t row_end = row_start + JOIN_MORSEL; + if (row_end > c->left_rows) row_end = c->left_rows; + + int64_t off = c->morsel_offsets[tid]; + int64_t* restrict li = c->l_idx; + int64_t* restrict ri = c->r_idx; + + /* S-Join: precompute pointer for fast semijoin check */ + uint64_t* sjbits = c->sjoin_bits; + int64_t sjmax = c->sjoin_key_max; + int64_t* lk0 = (sjbits && c->n_keys == 1) ? (int64_t*)ray_data(c->l_key_vecs[0]) : NULL; + + uint32_t ht_mask = c->ht_cap - 1; + for (int64_t l = row_start; l < row_end; l++) { + /* S-Join skip: if left key not in right-side bitmap, skip probe */ + if (lk0 && lk0[l] >= 0 && lk0[l] <= sjmax && + !RAY_SEL_BIT_TEST(sjbits, lk0[l])) { + if (c->join_type >= 1) { + li[off] = l; + ri[off] = -1; + off++; + } + continue; + } + + if (l + 8 < row_end) { + uint64_t pf_h = hash_row_keys(c->l_key_vecs, c->n_keys, l + 8); + __builtin_prefetch(&c->ht_heads[(uint32_t)(pf_h & ht_mask)], 0, 1); + } + uint64_t h = hash_row_keys(c->l_key_vecs, c->n_keys, l); + uint32_t slot = (uint32_t)(h & ht_mask); + bool matched = false; + for (uint32_t r = c->ht_heads[slot]; r != JHT_EMPTY; r = c->ht_next[r]) { + if (join_keys_eq(c->l_key_vecs, c->r_key_vecs, c->n_keys, l, (int64_t)r)) { + li[off] = l; + ri[off] = (int64_t)r; + off++; + matched = true; + /* Monotonic 0→1 store from multiple workers. */ + if (c->matched_right) atomic_store_explicit(&c->matched_right[r], 1, memory_order_relaxed); + } + } + if (!matched && c->join_type >= 1) { + li[off] = l; + ri[off] = -1; + off++; + } + } +} + +ray_t* exec_join(ray_graph_t* g, ray_op_t* op, ray_t* left_table, ray_t* right_table) { + if (!left_table || RAY_IS_ERR(left_table)) return left_table; + if (!right_table || RAY_IS_ERR(right_table)) return right_table; + + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + int64_t left_rows = ray_table_nrows(left_table); + int64_t right_rows = ray_table_nrows(right_table); + /* Guard: radix path stores row indices as int32_t (widened to int64_t on gather). + * Chained HT path uses uint32_t. Cap at INT32_MAX for correctness. */ + if (right_rows > (int64_t)INT32_MAX || left_rows > (int64_t)INT32_MAX) + return ray_error("nyi", NULL); + uint8_t n_keys = ext->join.n_join_keys; + uint8_t join_type = ext->join.join_type; + + /* VLA bound of zero is UB under -fsanitize=undefined. Guarantee >=1 + * slot; iterations below are bounded by n_keys so the extra slot is + * untouched when n_keys == 0. */ + size_t key_slots = n_keys ? n_keys : 1; + ray_t* l_key_vecs[key_slots]; + ray_t* r_key_vecs[key_slots]; + memset(l_key_vecs, 0, key_slots * sizeof(ray_t*)); + memset(r_key_vecs, 0, key_slots * sizeof(ray_t*)); + + for (uint8_t k = 0; k < n_keys; k++) { + ray_op_ext_t* lk = find_ext(g, ext->join.left_keys[k]->id); + ray_op_ext_t* rk = find_ext(g, ext->join.right_keys[k]->id); + if (lk && lk->base.opcode == OP_SCAN) + l_key_vecs[k] = ray_table_get_col(left_table, lk->sym); + if (rk && rk->base.opcode == OP_SCAN) + r_key_vecs[k] = ray_table_get_col(right_table, rk->sym); + if (rk && rk->base.opcode == OP_CONST && rk->literal) + r_key_vecs[k] = rk->literal; + } + + /* RAY_STR keys not yet supported (16-byte elements vs 8-byte hash/eq slots) */ + for (uint8_t k = 0; k < n_keys; k++) { + if ((l_key_vecs[k] && l_key_vecs[k]->type == RAY_STR) || + (r_key_vecs[k] && r_key_vecs[k]->type == RAY_STR)) + return ray_error("nyi", NULL); + } + + ray_pool_t* pool = ray_pool_get(); + + /* Shared output state — used by both radix and chained HT paths */ + ray_t* result = NULL; + ray_t* counts_hdr = NULL; + ray_t* l_idx_hdr = NULL; + ray_t* r_idx_hdr = NULL; + ray_t* matched_right_hdr = NULL; + ray_t* sjoin_sel = NULL; + ray_t* asp_sel = NULL; + ray_t* ht_next_hdr = NULL; + ray_t* ht_heads_hdr = NULL; + int64_t* l_idx = NULL; + int64_t* r_idx = NULL; + int64_t pair_count = 0; + _Atomic(uint8_t)* matched_right = NULL; + + /* ── Radix-partitioned path (large joins) ──────────────────────── */ + if (right_rows > RAY_PARALLEL_THRESHOLD) { + uint8_t radix_bits = radix_join_bits(right_rows); + uint32_t n_rparts = (uint32_t)1 << radix_bits; + + /* Pre-compute hashes for both sides (once, reused by histogram+scatter) */ + ray_t* r_hash_hdr = NULL; + uint32_t* r_hashes = (uint32_t*)scratch_alloc(&r_hash_hdr, + (size_t)right_rows * sizeof(uint32_t)); + ray_t* l_hash_hdr = NULL; + uint32_t* l_hashes = (uint32_t*)scratch_alloc(&l_hash_hdr, + (size_t)left_rows * sizeof(uint32_t)); + if (!r_hashes || !l_hashes) { + if (r_hash_hdr) scratch_free(r_hash_hdr); + if (l_hash_hdr) scratch_free(l_hash_hdr); + goto chained_ht_fallback; + } + join_radix_hash_ctx_t rhctx = { .key_vecs = r_key_vecs, .n_keys = n_keys, .hashes = r_hashes }; + join_radix_hash_ctx_t lhctx = { .key_vecs = l_key_vecs, .n_keys = n_keys, .hashes = l_hashes }; + if (pool) { + ray_pool_dispatch(pool, join_radix_hash_fn, &rhctx, right_rows); + ray_pool_dispatch(pool, join_radix_hash_fn, &lhctx, left_rows); + } else { + join_radix_hash_fn(&rhctx, 0, 0, right_rows); + join_radix_hash_fn(&lhctx, 0, 0, left_rows); + } + + if (pool_cancelled(pool)) { + scratch_free(r_hash_hdr); scratch_free(l_hash_hdr); + return ray_error("cancel", NULL); + } + + /* Partition both sides using cached hashes */ + ray_t* r_parts_hdr = NULL; + join_radix_part_t* r_parts = join_radix_partition(pool, right_rows, + radix_bits, r_hashes, &r_parts_hdr); + ray_t* l_parts_hdr = NULL; + join_radix_part_t* l_parts = join_radix_partition(pool, left_rows, + radix_bits, l_hashes, &l_parts_hdr); + scratch_free(r_hash_hdr); + scratch_free(l_hash_hdr); + if (!r_parts || !l_parts) { + /* OOM during partitioning — fall through to chained HT path */ + if (r_parts) { + for (uint32_t rp2 = 0; rp2 < n_rparts; rp2++) + if (r_parts[rp2].entries_hdr) scratch_free(r_parts[rp2].entries_hdr); + scratch_free(r_parts_hdr); + } + if (l_parts) { + for (uint32_t rp2 = 0; rp2 < n_rparts; rp2++) + if (l_parts[rp2].entries_hdr) scratch_free(l_parts[rp2].entries_hdr); + scratch_free(l_parts_hdr); + } + goto chained_ht_fallback; + } + + if (pool_cancelled(pool)) { + for (uint32_t rp2 = 0; rp2 < n_rparts; rp2++) { + if (r_parts[rp2].entries_hdr) scratch_free(r_parts[rp2].entries_hdr); + if (l_parts[rp2].entries_hdr) scratch_free(l_parts[rp2].entries_hdr); + } + scratch_free(r_parts_hdr); scratch_free(l_parts_hdr); + return ray_error("cancel", NULL); + } + + /* FULL OUTER: allocate matched_right tracker */ + if (join_type == 2 && right_rows > 0) { + matched_right = (_Atomic(uint8_t)*)scratch_calloc(&matched_right_hdr, + (size_t)right_rows); + if (!matched_right) { + for (uint32_t rp2 = 0; rp2 < n_rparts; rp2++) { + if (r_parts[rp2].entries_hdr) scratch_free(r_parts[rp2].entries_hdr); + if (l_parts[rp2].entries_hdr) scratch_free(l_parts[rp2].entries_hdr); + } + scratch_free(r_parts_hdr); scratch_free(l_parts_hdr); + matched_right_hdr = NULL; + goto chained_ht_fallback; + } + } + + /* Single-pass per-partition build+probe with local output buffers */ + ray_t* pcounts_hdr = NULL; + int64_t* part_counts = (int64_t*)scratch_calloc(&pcounts_hdr, + (size_t)n_rparts * sizeof(int64_t)); + ray_t* pp_meta_hdr = NULL; + /* Allocate per-partition pointer arrays */ + size_t pp_alloc_sz = (size_t)n_rparts * (2 * sizeof(int32_t*) + 2 * sizeof(ray_t*) + sizeof(uint32_t)); + char* pp_mem = (char*)scratch_calloc(&pp_meta_hdr, pp_alloc_sz); + if (!part_counts || !pp_mem) { + if (pcounts_hdr) scratch_free(pcounts_hdr); + if (pp_meta_hdr) scratch_free(pp_meta_hdr); + for (uint32_t rp2 = 0; rp2 < n_rparts; rp2++) { + if (r_parts[rp2].entries_hdr) scratch_free(r_parts[rp2].entries_hdr); + if (l_parts[rp2].entries_hdr) scratch_free(l_parts[rp2].entries_hdr); + } + scratch_free(r_parts_hdr); scratch_free(l_parts_hdr); + if (matched_right_hdr) { scratch_free(matched_right_hdr); matched_right_hdr = NULL; } + matched_right = NULL; + goto chained_ht_fallback; + } + int32_t** pp_l = (int32_t**)pp_mem; + int32_t** pp_r = (int32_t**)(pp_mem + (size_t)n_rparts * sizeof(int32_t*)); + ray_t** pp_l_hdr = (ray_t**)(pp_mem + (size_t)n_rparts * 2 * sizeof(int32_t*)); + ray_t** pp_r_hdr = (ray_t**)(pp_mem + (size_t)n_rparts * (2 * sizeof(int32_t*) + sizeof(ray_t*))); + uint32_t* pp_cap = (uint32_t*)(pp_mem + (size_t)n_rparts * (2 * sizeof(int32_t*) + 2 * sizeof(ray_t*))); + + join_radix_bp_ctx_t bp_ctx = { + .l_parts = l_parts, .r_parts = r_parts, + .l_key_vecs = l_key_vecs, .r_key_vecs = r_key_vecs, + .n_keys = n_keys, .join_type = join_type, + .pp_l = pp_l, .pp_r = pp_r, + .pp_l_hdr = pp_l_hdr, .pp_r_hdr = pp_r_hdr, + .part_counts = part_counts, .pp_cap = pp_cap, + .matched_right = matched_right, + .had_error = 0, + }; + if (pool && n_rparts > 1) + ray_pool_dispatch_n(pool, join_radix_build_probe_fn, &bp_ctx, n_rparts); + else + for (uint32_t rp2 = 0; rp2 < n_rparts; rp2++) + join_radix_build_probe_fn(&bp_ctx, 0, rp2, rp2 + 1); + + /* Check cancellation and errors during build+probe */ + bool bp_cancelled = pool_cancelled(pool); + bool bp_error = atomic_load_explicit(&bp_ctx.had_error, memory_order_relaxed); + if (bp_cancelled || bp_error) { + /* Free all per-partition buffers */ + for (uint32_t rp2 = 0; rp2 < n_rparts; rp2++) { + if (r_parts[rp2].entries_hdr) scratch_free(r_parts[rp2].entries_hdr); + if (l_parts[rp2].entries_hdr) scratch_free(l_parts[rp2].entries_hdr); + if (pp_l_hdr[rp2]) scratch_free(pp_l_hdr[rp2]); + if (pp_r_hdr[rp2]) scratch_free(pp_r_hdr[rp2]); + } + scratch_free(r_parts_hdr); scratch_free(l_parts_hdr); + scratch_free(pp_meta_hdr); scratch_free(pcounts_hdr); + if (matched_right_hdr) { scratch_free(matched_right_hdr); matched_right_hdr = NULL; } + matched_right = NULL; + if (bp_cancelled) return ray_error("cancel", NULL); + goto chained_ht_fallback; + } + + /* Free partition buffers — no longer needed */ + for (uint32_t rp2 = 0; rp2 < n_rparts; rp2++) { + if (r_parts[rp2].entries_hdr) scratch_free(r_parts[rp2].entries_hdr); + if (l_parts[rp2].entries_hdr) scratch_free(l_parts[rp2].entries_hdr); + } + scratch_free(r_parts_hdr); + scratch_free(l_parts_hdr); + + /* Compute total output size and consolidate per-partition buffers */ + for (uint32_t rp2 = 0; rp2 < n_rparts; rp2++) + pair_count += part_counts[rp2]; + + /* FULL OUTER: count unmatched right rows */ + int64_t unmatched_right = 0; + if (join_type == 2 && matched_right) { + for (int64_t r = 0; r < right_rows; r++) + if (!matched_right[r]) unmatched_right++; + } + int64_t total_out = pair_count + unmatched_right; + + if (total_out > 0) { + l_idx = (int64_t*)scratch_alloc(&l_idx_hdr, (size_t)total_out * sizeof(int64_t)); + r_idx = (int64_t*)scratch_alloc(&r_idx_hdr, (size_t)total_out * sizeof(int64_t)); + if (!l_idx || !r_idx) { + scratch_free(l_idx_hdr); scratch_free(r_idx_hdr); + l_idx_hdr = NULL; r_idx_hdr = NULL; + for (uint32_t rp2 = 0; rp2 < n_rparts; rp2++) { + if (pp_l_hdr[rp2]) scratch_free(pp_l_hdr[rp2]); + if (pp_r_hdr[rp2]) scratch_free(pp_r_hdr[rp2]); + } + scratch_free(pp_meta_hdr); + scratch_free(pcounts_hdr); + if (matched_right_hdr) scratch_free(matched_right_hdr); + matched_right_hdr = NULL; + return ray_error("oom", NULL); + } + + /* Copy per-partition results into contiguous arrays (int32→int64 widen) */ + int64_t off = 0; + for (uint32_t rp2 = 0; rp2 < n_rparts; rp2++) { + int64_t cnt = part_counts[rp2]; + if (cnt > 0 && pp_l[rp2] && pp_r[rp2]) { + for (int64_t j = 0; j < cnt; j++) { + l_idx[off + j] = (int64_t)pp_l[rp2][j]; + r_idx[off + j] = (int64_t)pp_r[rp2][j]; + } + off += cnt; + } + } + + /* FULL OUTER: append unmatched right rows */ + if (unmatched_right > 0) { + for (int64_t r = 0; r < right_rows; r++) { + if (!matched_right[r]) { + l_idx[off] = -1; + r_idx[off] = r; + off++; + } + } + } + pair_count = total_out; + } + + /* Free per-partition buffers allocated by worker threads. + * Safe: ray_pool_dispatch_n has completed (workers are back on semaphore), + * ray_parallel_flag is 0, and ray_free handles cross-heap deallocation + * via the foreign-block list flushed by ray_heap_gc at ray_parallel_end. */ + for (uint32_t rp2 = 0; rp2 < n_rparts; rp2++) { + if (pp_l_hdr[rp2]) scratch_free(pp_l_hdr[rp2]); + if (pp_r_hdr[rp2]) scratch_free(pp_r_hdr[rp2]); + } + scratch_free(pp_meta_hdr); + scratch_free(pcounts_hdr); + goto join_gather; + } + +chained_ht_fallback:; + /* ── Chained HT path (small joins / radix OOM fallback) ────────── */ + uint64_t ht_cap64 = 256; + uint64_t target = (uint64_t)right_rows * 2; + while (ht_cap64 < target) ht_cap64 *= 2; + if (ht_cap64 > UINT32_MAX) ht_cap64 = (uint64_t)1 << 31; + uint32_t ht_cap = (uint32_t)ht_cap64; + + uint32_t* ht_next = (uint32_t*)scratch_alloc(&ht_next_hdr, (size_t)right_rows * sizeof(uint32_t)); + // cppcheck-suppress internalAstError + // Valid C11/C17 _Atomic(T)* declaration; cppcheck parser may mis-handle this syntax. + _Atomic(uint32_t)* ht_heads = (_Atomic(uint32_t)*)scratch_alloc(&ht_heads_hdr, ht_cap * sizeof(uint32_t)); + if (!ht_next || !ht_heads) { + scratch_free(ht_next_hdr); scratch_free(ht_heads_hdr); + return ray_error("oom", NULL); + } + memset(ht_heads, 0xFF, ht_cap * sizeof(uint32_t)); /* JHT_EMPTY = 0xFFFFFFFF */ + + /* Phase 0.5: ASP-Join — extract semijoin filter from factorized left side. + * When the left input comes from a factorized expand (_count column present), + * build a RAY_SEL bitmap of left-side key values to skip right-side rows + * during hash-build whose keys can't match any left-side row. */ + uint64_t* asp_bits = NULL; + int64_t asp_key_max = 0; + if (n_keys == 1 && join_type == 0 && l_key_vecs[0] && + l_key_vecs[0]->type == RAY_I64 && right_rows > left_rows * 2) { + int64_t cnt_sym = ray_sym_intern("_count", 6); + ray_t* cnt_col = ray_table_get_col(left_table, cnt_sym); + if (cnt_col) { /* left is factorized */ + int64_t* lk = (int64_t*)ray_data(l_key_vecs[0]); + int64_t lk_max = 0; + for (int64_t i = 0; i < left_rows; i++) + if (lk[i] > lk_max) lk_max = lk[i]; + + if (lk_max < (int64_t)1 << 24) { + asp_sel = ray_sel_new(lk_max + 1); + if (asp_sel && !RAY_IS_ERR(asp_sel)) { + asp_bits = ray_sel_bits(asp_sel); + asp_key_max = lk_max; + for (int64_t i = 0; i < left_rows; i++) { + int64_t k = lk[i]; + if (k >= 0 && k <= lk_max) + RAY_SEL_BIT_SET(asp_bits, k); + } + } + } + } + } + + { + join_build_ctx_t bctx = { + .ht_heads = ht_heads, + .ht_next = ht_next, + .ht_mask = ht_cap - 1, + .r_key_vecs = r_key_vecs, + .n_keys = n_keys, + .asp_bits = asp_bits, + .asp_key_max = asp_key_max, + }; + if (pool && right_rows > RAY_PARALLEL_THRESHOLD) + ray_pool_dispatch(pool, join_build_fn, &bctx, right_rows); + else + join_build_fn(&bctx, 0, 0, right_rows); + } + CHECK_CANCEL_GOTO(pool, join_cleanup); + + /* Phase 1.5: S-Join semijoin filter extraction. + * Build a RAY_SEL bitmap of all distinct right-side key values that + * appear in the hash table. This can be used to skip left-side rows + * whose key cannot match any right-side row. + * + * Applied when: single I64 key, inner join, left side is large enough + * to benefit from filtering (> 2x right side). */ + if (n_keys == 1 && join_type == 0 && l_key_vecs[0] && r_key_vecs[0] && + l_key_vecs[0]->type == RAY_I64 && r_key_vecs[0]->type == RAY_I64 && + left_rows > right_rows * 2) { + /* Determine key range to size the bitmap */ + int64_t* rk = (int64_t*)ray_data(r_key_vecs[0]); + int64_t key_max = 0; + for (int64_t i = 0; i < right_rows; i++) + if (rk[i] > key_max) key_max = rk[i]; + + if (key_max < (int64_t)1 << 24) { /* only for reasonably bounded keys */ + sjoin_sel = ray_sel_new(key_max + 1); + if (sjoin_sel && !RAY_IS_ERR(sjoin_sel)) { + uint64_t* bits = ray_sel_bits(sjoin_sel); + for (int64_t i = 0; i < right_rows; i++) { + int64_t k = rk[i]; + if (k >= 0 && k <= key_max) + RAY_SEL_BIT_SET(bits, k); + } + } + } + } + + /* Phase 2: Parallel probe (two-pass: count → prefix-sum → fill) */ + uint32_t n_tasks = (uint32_t)((left_rows + JOIN_MORSEL - 1) / JOIN_MORSEL); + if (n_tasks == 0) n_tasks = 1; + + int64_t* morsel_counts = (int64_t*)scratch_calloc(&counts_hdr, + (size_t)(n_tasks + 1) * sizeof(int64_t)); + if (!morsel_counts) { + scratch_free(ht_next_hdr); scratch_free(ht_heads_hdr); + return ray_error("oom", NULL); + } + + /* For FULL OUTER JOIN, allocate matched_right tracker */ + if (join_type == 2 && right_rows > 0) { + matched_right = (_Atomic(uint8_t)*)scratch_calloc(&matched_right_hdr, + (size_t)right_rows); + if (!matched_right) goto join_cleanup; + } + + /* Prepare S-Join fields for probe context */ + uint64_t* sjoin_bits = NULL; + int64_t sjoin_key_max = 0; + if (sjoin_sel && !RAY_IS_ERR(sjoin_sel)) { + sjoin_bits = ray_sel_bits(sjoin_sel); + sjoin_key_max = sjoin_sel->len - 1; + } + + join_probe_ctx_t probe_ctx = { + .ht_heads = ht_heads, + .ht_next = ht_next, + .ht_cap = ht_cap, + .l_key_vecs = l_key_vecs, + .r_key_vecs = r_key_vecs, + .n_keys = n_keys, + .join_type = join_type, + .left_rows = left_rows, + .morsel_counts = morsel_counts, + .matched_right = matched_right, + .sjoin_bits = sjoin_bits, + .sjoin_key_max = sjoin_key_max, + }; + + /* 2a: Count matches per morsel */ + if (pool && n_tasks > 1) + ray_pool_dispatch_n(pool, join_count_fn, &probe_ctx, n_tasks); + else + for (uint32_t t = 0; t < n_tasks; t++) + join_count_fn(&probe_ctx, 0, t, t + 1); + + /* Prefix sum → morsel_offsets (reuse counts array as offsets) */ + pair_count = 0; + for (uint32_t t = 0; t < n_tasks; t++) { + int64_t cnt = morsel_counts[t]; + morsel_counts[t] = pair_count; + pair_count += cnt; + } + + /* Allocate output pair arrays */ + if (pair_count > 0) { + l_idx = (int64_t*)scratch_alloc(&l_idx_hdr, (size_t)pair_count * sizeof(int64_t)); + r_idx = (int64_t*)scratch_alloc(&r_idx_hdr, (size_t)pair_count * sizeof(int64_t)); + if (!l_idx || !r_idx) goto join_cleanup; + } + + /* 2b: Fill match pairs */ + probe_ctx.morsel_offsets = morsel_counts; /* now holds prefix sums */ + probe_ctx.l_idx = l_idx; + probe_ctx.r_idx = r_idx; + + if (pair_count > 0) { + if (pool && n_tasks > 1) + ray_pool_dispatch_n(pool, join_fill_fn, &probe_ctx, n_tasks); + else + for (uint32_t t = 0; t < n_tasks; t++) + join_fill_fn(&probe_ctx, 0, t, t + 1); + } + + CHECK_CANCEL_GOTO(pool, join_cleanup); + + /* FULL OUTER: append unmatched right rows (l_idx=-1, r_idx=r) */ + if (join_type == 2 && matched_right) { + int64_t unmatched_right = 0; + for (int64_t r = 0; r < right_rows; r++) + if (!matched_right[r]) unmatched_right++; + + if (unmatched_right > 0) { + int64_t total = pair_count + unmatched_right; + ray_t* new_l_hdr; + ray_t* new_r_hdr; + int64_t* new_l = (int64_t*)scratch_alloc(&new_l_hdr, + (size_t)total * sizeof(int64_t)); + int64_t* new_r = (int64_t*)scratch_alloc(&new_r_hdr, + (size_t)total * sizeof(int64_t)); + if (!new_l || !new_r) { + scratch_free(new_l_hdr); scratch_free(new_r_hdr); + goto join_cleanup; + } + if (pair_count > 0) { + memcpy(new_l, l_idx, (size_t)pair_count * sizeof(int64_t)); + memcpy(new_r, r_idx, (size_t)pair_count * sizeof(int64_t)); + } + scratch_free(l_idx_hdr); + scratch_free(r_idx_hdr); + int64_t off = pair_count; + for (int64_t r = 0; r < right_rows; r++) { + if (!matched_right[r]) { + new_l[off] = -1; + new_r[off] = r; + off++; + } + } + l_idx = new_l; r_idx = new_r; + l_idx_hdr = new_l_hdr; r_idx_hdr = new_r_hdr; + pair_count = total; + } + } + +join_gather:; + /* Phase 3: Build result table with parallel column gather. + * Use multi_gather for batched column access when possible (non-nullable + * indices), falling back to per-column gather for nullable RIGHT columns. */ + int64_t left_ncols = ray_table_ncols(left_table); + int64_t right_ncols = ray_table_ncols(right_table); + result = ray_table_new(left_ncols + right_ncols); + if (!result || RAY_IS_ERR(result)) goto join_cleanup; + + /* Allocate all output columns upfront for batched gather */ + ray_t* l_out_cols[MGATHER_MAX_COLS]; + int64_t l_out_names[MGATHER_MAX_COLS]; + int64_t l_out_count = 0; + for (int64_t c = 0; c < left_ncols && l_out_count < MGATHER_MAX_COLS; c++) { + ray_t* col = ray_table_get_col_idx(left_table, c); + if (!col) continue; + ray_t* new_col = col_vec_new(col, pair_count); + if (!new_col || RAY_IS_ERR(new_col)) continue; + new_col->len = pair_count; + l_out_cols[l_out_count] = new_col; + l_out_names[l_out_count] = ray_table_col_name(left_table, c); + l_out_count++; + } + + ray_t* r_out_cols[MGATHER_MAX_COLS]; + ray_t* r_src_cols[MGATHER_MAX_COLS]; + int64_t r_out_names[MGATHER_MAX_COLS]; + int64_t r_out_count = 0; + for (int64_t c = 0; c < right_ncols; c++) { + ray_t* col = ray_table_get_col_idx(right_table, c); + int64_t name_id = ray_table_col_name(right_table, c); + if (!col) continue; + bool is_key = false; + for (uint8_t k = 0; k < n_keys; k++) { + ray_op_ext_t* rk = find_ext(g, ext->join.right_keys[k]->id); + if (rk && rk->base.opcode == OP_SCAN && rk->sym == name_id) { + is_key = true; break; + } + } + if (is_key) continue; + if (r_out_count >= MGATHER_MAX_COLS) continue; + ray_t* new_col = col_vec_new(col, pair_count); + if (!new_col || RAY_IS_ERR(new_col)) continue; + new_col->len = pair_count; + r_out_cols[r_out_count] = new_col; + r_src_cols[r_out_count] = col; + r_out_names[r_out_count] = name_id; + r_out_count++; + } + + if (pair_count > 0) { + /* Left columns: multi_gather (non-nullable for INNER/LEFT) */ + bool l_nullable = (join_type == 2); /* only FULL OUTER */ + if (!l_nullable && l_out_count > 1 && l_out_count <= MGATHER_MAX_COLS) { + multi_gather_ctx_t mgctx = { .idx = l_idx, .ncols = l_out_count }; + int64_t si = 0; + for (int64_t c = 0; c < left_ncols && si < l_out_count; c++) { + ray_t* col = ray_table_get_col_idx(left_table, c); + if (!col) continue; + mgctx.srcs[si] = (char*)ray_data(col); + mgctx.dsts[si] = (char*)ray_data(l_out_cols[si]); + mgctx.esz[si] = col_esz(col); + si++; + } + if (pool && pair_count > RAY_PARALLEL_THRESHOLD) + ray_pool_dispatch(pool, multi_gather_fn, &mgctx, pair_count); + else + multi_gather_fn(&mgctx, 0, 0, pair_count); + } else { + /* Fall back to per-column gather for nullable or single column */ + int64_t si = 0; + for (int64_t c = 0; c < left_ncols && si < l_out_count; c++) { + ray_t* col = ray_table_get_col_idx(left_table, c); + if (!col) continue; + gather_ctx_t gctx = { + .idx = l_idx, .src_col = col, .dst_col = l_out_cols[si], + .esz = col_esz(col), .nullable = l_nullable, + }; + if (pool && pair_count > RAY_PARALLEL_THRESHOLD) + ray_pool_dispatch(pool, gather_fn, &gctx, pair_count); + else + gather_fn(&gctx, 0, 0, pair_count); + si++; + } + } + + /* Right columns: per-column gather (nullable for LEFT/FULL OUTER) */ + bool r_nullable = (join_type >= 1); + if (!r_nullable && r_out_count > 1 && r_out_count <= MGATHER_MAX_COLS) { + multi_gather_ctx_t mgctx = { .idx = r_idx, .ncols = r_out_count }; + for (int64_t i = 0; i < r_out_count; i++) { + mgctx.srcs[i] = (char*)ray_data(r_src_cols[i]); + mgctx.dsts[i] = (char*)ray_data(r_out_cols[i]); + mgctx.esz[i] = col_esz(r_out_cols[i]); + } + if (pool && pair_count > RAY_PARALLEL_THRESHOLD) + ray_pool_dispatch(pool, multi_gather_fn, &mgctx, pair_count); + else + multi_gather_fn(&mgctx, 0, 0, pair_count); + } else { + for (int64_t i = 0; i < r_out_count; i++) { + gather_ctx_t gctx = { + .idx = r_idx, .src_col = r_src_cols[i], .dst_col = r_out_cols[i], + .esz = col_esz(r_src_cols[i]), .nullable = r_nullable, + }; + if (pool && pair_count > RAY_PARALLEL_THRESHOLD) + ray_pool_dispatch(pool, gather_fn, &gctx, pair_count); + else + gather_fn(&gctx, 0, 0, pair_count); + } + } + } + + /* Propagate RAY_STR string pools and null bitmaps from source columns */ + { + int64_t si = 0; + for (int64_t c = 0; c < left_ncols && si < l_out_count; c++) { + ray_t* col = ray_table_get_col_idx(left_table, c); + if (!col) continue; + col_propagate_str_pool(l_out_cols[si], col); + col_propagate_nulls_gather(l_out_cols[si], col, l_idx, pair_count); + si++; + } + } + for (int64_t i = 0; i < r_out_count; i++) { + col_propagate_str_pool(r_out_cols[i], r_src_cols[i]); + col_propagate_nulls_gather(r_out_cols[i], r_src_cols[i], r_idx, pair_count); + } + + /* Add columns to result */ + for (int64_t i = 0; i < l_out_count; i++) { + result = ray_table_add_col(result, l_out_names[i], l_out_cols[i]); + ray_release(l_out_cols[i]); + } + for (int64_t i = 0; i < r_out_count; i++) { + result = ray_table_add_col(result, r_out_names[i], r_out_cols[i]); + ray_release(r_out_cols[i]); + } + +join_cleanup: + if (ht_next_hdr) scratch_free(ht_next_hdr); + if (ht_heads_hdr) scratch_free(ht_heads_hdr); + scratch_free(l_idx_hdr); + scratch_free(r_idx_hdr); + if (counts_hdr) scratch_free(counts_hdr); + scratch_free(matched_right_hdr); + if (sjoin_sel) ray_release(sjoin_sel); + if (asp_sel) ray_release(asp_sel); + + return result; +} + +/* ============================================================================ + * OP_ANTIJOIN: anti-semi-join — keep left rows with NO matching right row + * Build hash set from right keys, probe left, emit non-matching left rows. + * ============================================================================ */ + +ray_t* exec_antijoin(ray_graph_t* g, ray_op_t* op, + ray_t* left_table, ray_t* right_table) { + if (!left_table || RAY_IS_ERR(left_table)) return left_table; + if (!right_table || RAY_IS_ERR(right_table)) return right_table; + + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + int64_t left_rows = ray_table_nrows(left_table); + int64_t right_rows = ray_table_nrows(right_table); + + if (right_rows > (int64_t)INT32_MAX || left_rows > (int64_t)INT32_MAX) + return ray_error("nyi", NULL); + + uint8_t n_keys = ext->join.n_join_keys; + + /* Trivial case: empty right → all left rows pass */ + if (right_rows == 0) { + ray_retain(left_table); + return left_table; + } + /* Trivial case: empty left → empty result */ + if (left_rows == 0) { + ray_retain(left_table); + return left_table; + } + + ray_t* l_key_vecs[16]; + ray_t* r_key_vecs[16]; + memset(l_key_vecs, 0, n_keys * sizeof(ray_t*)); + memset(r_key_vecs, 0, n_keys * sizeof(ray_t*)); + + for (uint8_t k = 0; k < n_keys; k++) { + ray_op_ext_t* lk = find_ext(g, ext->join.left_keys[k]->id); + ray_op_ext_t* rk = find_ext(g, ext->join.right_keys[k]->id); + if (lk && lk->base.opcode == OP_SCAN) + l_key_vecs[k] = ray_table_get_col(left_table, lk->sym); + if (rk && rk->base.opcode == OP_SCAN) + r_key_vecs[k] = ray_table_get_col(right_table, rk->sym); + if (rk && rk->base.opcode == OP_CONST && rk->literal) + r_key_vecs[k] = rk->literal; + } + + /* RAY_STR keys not yet supported */ + for (uint8_t k = 0; k < n_keys; k++) { + if ((l_key_vecs[k] && l_key_vecs[k]->type == RAY_STR) || + (r_key_vecs[k] && r_key_vecs[k]->type == RAY_STR)) + return ray_error("nyi", NULL); + } + + /* Build chained hash table from right side */ + ray_t* ht_next_hdr = NULL; + ray_t* ht_heads_hdr = NULL; + + uint64_t ht_cap64 = 256; + uint64_t target = (uint64_t)right_rows * 2; + while (ht_cap64 < target) ht_cap64 *= 2; + if (ht_cap64 > UINT32_MAX) ht_cap64 = (uint64_t)1 << 31; + uint32_t ht_cap = (uint32_t)ht_cap64; + + uint32_t* ht_next = (uint32_t*)scratch_alloc(&ht_next_hdr, + (size_t)right_rows * sizeof(uint32_t)); + _Atomic(uint32_t)* ht_heads = (_Atomic(uint32_t)*)scratch_alloc(&ht_heads_hdr, + ht_cap * sizeof(uint32_t)); + if (!ht_next || !ht_heads) { + if (ht_next_hdr) scratch_free(ht_next_hdr); + if (ht_heads_hdr) scratch_free(ht_heads_hdr); + return ray_error("oom", NULL); + } + memset(ht_heads, 0xFF, ht_cap * sizeof(uint32_t)); /* JHT_EMPTY */ + + /* Build: insert right rows into HT */ + ray_pool_t* pool = ray_pool_get(); + { + join_build_ctx_t bctx = { + .ht_heads = ht_heads, + .ht_next = ht_next, + .ht_mask = ht_cap - 1, + .r_key_vecs = r_key_vecs, + .n_keys = n_keys, + .asp_bits = NULL, + .asp_key_max = 0, + }; + if (pool && right_rows > RAY_PARALLEL_THRESHOLD) + ray_pool_dispatch(pool, join_build_fn, &bctx, right_rows); + else + join_build_fn(&bctx, 0, 0, right_rows); + } + + if (pool_cancelled(pool)) { + scratch_free(ht_next_hdr); + scratch_free(ht_heads_hdr); + return ray_error("cancel", NULL); + } + + /* Probe: scan left rows, collect indices of those with NO match */ + ray_t* out_idx_hdr = NULL; + int64_t* out_idx = (int64_t*)scratch_alloc(&out_idx_hdr, + (size_t)left_rows * sizeof(int64_t)); + if (!out_idx) { + scratch_free(ht_next_hdr); + scratch_free(ht_heads_hdr); + return ray_error("oom", NULL); + } + + uint32_t ht_mask = ht_cap - 1; + int64_t out_count = 0; + for (int64_t l = 0; l < left_rows; l++) { + uint64_t h = hash_row_keys(l_key_vecs, n_keys, l); + uint32_t slot = (uint32_t)(h & ht_mask); + bool matched = false; + for (uint32_t r = ht_heads[slot]; r != JHT_EMPTY; r = ht_next[r]) { + if (join_keys_eq(l_key_vecs, r_key_vecs, n_keys, l, (int64_t)r)) { + matched = true; + break; /* anti-join: one match is enough to exclude */ + } + } + if (!matched) { + out_idx[out_count++] = l; + } + } + + scratch_free(ht_next_hdr); + scratch_free(ht_heads_hdr); + + /* Gather: build result table with only left columns */ + int64_t left_ncols = ray_table_ncols(left_table); + ray_t* result = ray_table_new(left_ncols); + if (!result || RAY_IS_ERR(result)) { + scratch_free(out_idx_hdr); + return result; + } + + if (out_count > 0) { + for (int64_t c = 0; c < left_ncols; c++) { + ray_t* col = ray_table_get_col_idx(left_table, c); + if (!col) continue; + ray_t* new_col = col_vec_new(col, out_count); + if (!new_col || RAY_IS_ERR(new_col)) continue; + new_col->len = out_count; + + gather_ctx_t gctx = { + .idx = out_idx, .src_col = col, .dst_col = new_col, + .esz = col_esz(col), .nullable = false, + }; + if (pool && out_count > RAY_PARALLEL_THRESHOLD) + ray_pool_dispatch(pool, gather_fn, &gctx, out_count); + else + gather_fn(&gctx, 0, 0, out_count); + + col_propagate_str_pool(new_col, col); + + int64_t name_id = ray_table_col_name(left_table, c); + result = ray_table_add_col(result, name_id, new_col); + ray_release(new_col); + } + } + + scratch_free(out_idx_hdr); + return result; +} + +/* ============================================================================ + * OP_WINDOW_JOIN: ASOF join (sort-merge) + * For each left row, find the most recent right row where right.time <= left.time, + * optionally partitioned by equality keys. O(N+M) after sorting. + * ============================================================================ */ + +ray_t* exec_window_join(ray_graph_t* g, ray_op_t* op, + ray_t* left_table, ray_t* right_table) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + uint8_t n_eq = ext->asof.n_eq_keys; + uint8_t join_type = ext->asof.join_type; + + int64_t left_n = ray_table_nrows(left_table); + int64_t right_n = ray_table_nrows(right_table); + + /* Resolve time key */ + ray_op_ext_t* time_ext = find_ext(g, ext->asof.time_key->id); + if (!time_ext || time_ext->base.opcode != OP_SCAN) + return ray_error("nyi", NULL); + int64_t time_sym = time_ext->sym; + + /* Resolve equality keys */ + int64_t eq_syms[256]; + for (uint8_t k = 0; k < n_eq; k++) { + ray_op_ext_t* ek = find_ext(g, ext->asof.eq_keys[k]->id); + if (!ek || ek->base.opcode != OP_SCAN) + return ray_error("nyi", NULL); + eq_syms[k] = ek->sym; + } + + /* Get time vectors — use int64 representation for comparison. + * TIME uses 4-byte i32 (ms), TIMESTAMP uses 8-byte i64 (ns). + * We expand to a temporary i64 array for uniform comparison. */ + ray_t* lt_time_vec = ray_table_get_col(left_table, time_sym); + ray_t* rt_time_vec = ray_table_get_col(right_table, time_sym); + if (!lt_time_vec || !rt_time_vec) return ray_error("schema", NULL); + int8_t time_type = lt_time_vec->type; + + /* Helper macro to read time value as int64_t regardless of storage type */ + #define READ_TIME(vec, idx) \ + ((time_type == RAY_TIME || time_type == RAY_DATE) \ + ? (int64_t)((int32_t*)ray_data(vec))[(idx)] \ + : ((int64_t*)ray_data(vec))[(idx)]) + + /* Build i64 time arrays for efficient comparison */ + ray_t* lt_time_hdr = NULL, *rt_time_hdr = NULL; + int64_t* lt_time = (int64_t*)scratch_alloc(<_time_hdr, (size_t)left_n * sizeof(int64_t)); + int64_t* rt_time = (int64_t*)scratch_alloc(&rt_time_hdr, (size_t)right_n * sizeof(int64_t)); + if ((!lt_time && left_n > 0) || (!rt_time && right_n > 0)) { + if (lt_time_hdr) scratch_free(lt_time_hdr); + if (rt_time_hdr) scratch_free(rt_time_hdr); + return ray_error("oom", NULL); + } + for (int64_t i = 0; i < left_n; i++) lt_time[i] = READ_TIME(lt_time_vec, i); + for (int64_t i = 0; i < right_n; i++) rt_time[i] = READ_TIME(rt_time_vec, i); + #undef READ_TIME + + /* Get eq key vectors — stored as ray_t* for type-safe access */ + ray_t* lt_eq[256], *rt_eq[256]; + for (uint8_t k = 0; k < n_eq; k++) { + ray_t* lv = ray_table_get_col(left_table, eq_syms[k]); + ray_t* rv = ray_table_get_col(right_table, eq_syms[k]); + if (!lv || !rv) { + if (lt_time_hdr) scratch_free(lt_time_hdr); + if (rt_time_hdr) scratch_free(rt_time_hdr); + return ray_error("schema", NULL); + } + lt_eq[k] = lv; + rt_eq[k] = rv; + } + + /* Precompute per-row "any key is null" bitsets. Null-keyed rows must + * not match — left rows fall through to the left-outer null fill, + * right rows are skipped entirely during the merge walk. SQL-style + * NULLs-never-match semantics. */ + ray_t* lt_null_hdr = NULL, *rt_null_hdr = NULL; + uint8_t* lt_null = left_n > 0 + ? (uint8_t*)scratch_alloc(<_null_hdr, (size_t)left_n) + : NULL; + uint8_t* rt_null = right_n > 0 + ? (uint8_t*)scratch_alloc(&rt_null_hdr, (size_t)right_n) + : NULL; + if ((!lt_null && left_n > 0) || (!rt_null && right_n > 0)) { + if (lt_null_hdr) scratch_free(lt_null_hdr); + if (rt_null_hdr) scratch_free(rt_null_hdr); + if (lt_time_hdr) scratch_free(lt_time_hdr); + if (rt_time_hdr) scratch_free(rt_time_hdr); + return ray_error("oom", NULL); + } + if (left_n > 0) memset(lt_null, 0, (size_t)left_n); + if (right_n > 0) memset(rt_null, 0, (size_t)right_n); + if (lt_time_vec->attrs & RAY_ATTR_HAS_NULLS) + for (int64_t i = 0; i < left_n; i++) + if (ray_vec_is_null(lt_time_vec, i)) lt_null[i] = 1; + if (rt_time_vec->attrs & RAY_ATTR_HAS_NULLS) + for (int64_t i = 0; i < right_n; i++) + if (ray_vec_is_null(rt_time_vec, i)) rt_null[i] = 1; + for (uint8_t k = 0; k < n_eq; k++) { + if (lt_eq[k]->attrs & RAY_ATTR_HAS_NULLS) + for (int64_t i = 0; i < left_n; i++) + if (ray_vec_is_null(lt_eq[k], i)) lt_null[i] = 1; + if (rt_eq[k]->attrs & RAY_ATTR_HAS_NULLS) + for (int64_t i = 0; i < right_n; i++) + if (ray_vec_is_null(rt_eq[k], i)) rt_null[i] = 1; + } + + /* Sort both tables by (eq_keys, time_key) using index arrays. Rows + * with any null key sort LAST (NULLS LAST) so the merge walk reaches + * them once all real candidates are consumed and can skip them + * cheaply. */ + ray_t* li_hdr = NULL, *ri_hdr = NULL; + int64_t* li_idx = (int64_t*)scratch_alloc(&li_hdr, (size_t)left_n * sizeof(int64_t)); + int64_t* ri_idx = (int64_t*)scratch_alloc(&ri_hdr, (size_t)right_n * sizeof(int64_t)); + if ((!li_idx && left_n > 0) || (!ri_idx && right_n > 0)) { + if (li_hdr) scratch_free(li_hdr); + if (ri_hdr) scratch_free(ri_hdr); + if (lt_null_hdr) scratch_free(lt_null_hdr); + if (rt_null_hdr) scratch_free(rt_null_hdr); + if (lt_time_hdr) scratch_free(lt_time_hdr); + if (rt_time_hdr) scratch_free(rt_time_hdr); + return ray_error("oom", NULL); + } + for (int64_t i = 0; i < left_n; i++) li_idx[i] = i; + for (int64_t i = 0; i < right_n; i++) ri_idx[i] = i; + + /* Bottom-up mergesort on index arrays — O(N log N) */ + { + int64_t max_n = left_n > right_n ? left_n : right_n; + ray_t* tmp_hdr = NULL; + int64_t* tmp = max_n > 0 + ? (int64_t*)scratch_alloc(&tmp_hdr, (size_t)max_n * sizeof(int64_t)) + : NULL; + if (!tmp && max_n > 0) { + scratch_free(li_hdr); scratch_free(ri_hdr); + if (lt_null_hdr) scratch_free(lt_null_hdr); + if (rt_null_hdr) scratch_free(rt_null_hdr); + if (lt_time_hdr) scratch_free(lt_time_hdr); + if (rt_time_hdr) scratch_free(rt_time_hdr); + return ray_error("oom", NULL); + } + + /* Sort left indices by (nulls-last, eq_keys, time) */ + for (int64_t width = 1; width < left_n; width *= 2) { + for (int64_t lo = 0; lo < left_n; lo += 2 * width) { + int64_t mid = lo + width; + int64_t hi = lo + 2 * width; + if (mid > left_n) mid = left_n; + if (hi > left_n) hi = left_n; + int64_t a = lo, b = mid, t = lo; + while (a < mid && b < hi) { + int64_t ai = li_idx[a], bi = li_idx[b]; + int cmp = 0; + if (lt_null[ai] != lt_null[bi]) + cmp = lt_null[ai] - lt_null[bi]; /* 1 > 0 → nulls last */ + for (uint8_t k2 = 0; k2 < n_eq && cmp == 0; k2++) { + int64_t va = read_col_i64(ray_data(lt_eq[k2]), ai, lt_eq[k2]->type, lt_eq[k2]->attrs); + int64_t vb = read_col_i64(ray_data(lt_eq[k2]), bi, lt_eq[k2]->type, lt_eq[k2]->attrs); + if (va < vb) cmp = -1; + else if (va > vb) cmp = 1; + } + if (cmp == 0) { + if (lt_time[ai] < lt_time[bi]) cmp = -1; + else if (lt_time[ai] > lt_time[bi]) cmp = 1; + } + tmp[t++] = (cmp <= 0) ? li_idx[a++] : li_idx[b++]; + } + while (a < mid) tmp[t++] = li_idx[a++]; + while (b < hi) tmp[t++] = li_idx[b++]; + for (int64_t c = lo; c < hi; c++) li_idx[c] = tmp[c]; + } + } + + /* Sort right indices by (nulls-last, eq_keys, time) */ + for (int64_t width = 1; width < right_n; width *= 2) { + for (int64_t lo = 0; lo < right_n; lo += 2 * width) { + int64_t mid = lo + width; + int64_t hi = lo + 2 * width; + if (mid > right_n) mid = right_n; + if (hi > right_n) hi = right_n; + int64_t a = lo, b = mid, t = lo; + while (a < mid && b < hi) { + int64_t ai = ri_idx[a], bi = ri_idx[b]; + int cmp = 0; + if (rt_null[ai] != rt_null[bi]) + cmp = rt_null[ai] - rt_null[bi]; + for (uint8_t k2 = 0; k2 < n_eq && cmp == 0; k2++) { + int64_t va = read_col_i64(ray_data(rt_eq[k2]), ai, rt_eq[k2]->type, rt_eq[k2]->attrs); + int64_t vb = read_col_i64(ray_data(rt_eq[k2]), bi, rt_eq[k2]->type, rt_eq[k2]->attrs); + if (va < vb) cmp = -1; + else if (va > vb) cmp = 1; + } + if (cmp == 0) { + if (rt_time[ai] < rt_time[bi]) cmp = -1; + else if (rt_time[ai] > rt_time[bi]) cmp = 1; + } + tmp[t++] = (cmp <= 0) ? ri_idx[a++] : ri_idx[b++]; + } + while (a < mid) tmp[t++] = ri_idx[a++]; + while (b < hi) tmp[t++] = ri_idx[b++]; + for (int64_t c = lo; c < hi; c++) ri_idx[c] = tmp[c]; + } + } + + if (tmp_hdr) scratch_free(tmp_hdr); + } + + /* Build match array: for each left row (sorted), find best right match */ + ray_t* match_hdr = NULL; + int64_t* match = (int64_t*)scratch_alloc(&match_hdr, (size_t)left_n * sizeof(int64_t)); + if (!match && left_n > 0) { + scratch_free(li_hdr); scratch_free(ri_hdr); + if (lt_null_hdr) scratch_free(lt_null_hdr); + if (rt_null_hdr) scratch_free(rt_null_hdr); + if (lt_time_hdr) scratch_free(lt_time_hdr); + if (rt_time_hdr) scratch_free(rt_time_hdr); + return ray_error("oom", NULL); + } + + /* Two-pointer merge with best-match carry-forward. Because the sort + * pins null-keyed rows to the end, skipping them is just an early + * "no match" for left and a plain `rp++` for right. */ + int64_t rp = 0; /* right pointer (only advances) */ + int64_t best_ri = -1; /* best right match in current partition */ + /* Track the previous *non-null* left row for partition-change detection + * so a null-keyed left row doesn't force an incorrect partition reset + * (and so its own null keys aren't read through read_col_i64). */ + int64_t prev_non_null_li = -1; + for (int64_t lp = 0; lp < left_n; lp++) { + int64_t li = li_idx[lp]; + + if (lt_null[li]) { + /* Null-keyed left row cannot match; in left-outer mode it + * still appears in the result with all right cols null. */ + match[lp] = -1; + continue; + } + + /* Detect partition change — reset best match and rewind rp */ + if (prev_non_null_li >= 0) { + int changed = 0; + for (uint8_t k = 0; k < n_eq; k++) { + int64_t cv = read_col_i64(ray_data(lt_eq[k]), li, lt_eq[k]->type, lt_eq[k]->attrs); + int64_t pv = read_col_i64(ray_data(lt_eq[k]), prev_non_null_li, lt_eq[k]->type, lt_eq[k]->attrs); + if (cv != pv) { changed = 1; break; } + } + if (changed) { + best_ri = -1; + /* Rewind rp to find start of new partition in right table */ + while (rp > 0) { + int64_t ri_prev = ri_idx[rp - 1]; + if (rt_null[ri_prev]) break; + int eq_match = 1; + for (uint8_t k = 0; k < n_eq; k++) { + int64_t rv = read_col_i64(ray_data(rt_eq[k]), ri_prev, rt_eq[k]->type, rt_eq[k]->attrs); + int64_t lv = read_col_i64(ray_data(lt_eq[k]), li, lt_eq[k]->type, lt_eq[k]->attrs); + if (rv < lv) { eq_match = 0; break; } + } + if (!eq_match) break; + rp--; + } + } + } + + /* Advance right pointer, accumulating best match */ + while (rp < right_n) { + int64_t ri = ri_idx[rp]; + if (rt_null[ri]) { rp++; continue; } /* null keys never match */ + int eq_cmp = 0; + for (uint8_t k = 0; k < n_eq && eq_cmp == 0; k++) { + int64_t rv = read_col_i64(ray_data(rt_eq[k]), ri, rt_eq[k]->type, rt_eq[k]->attrs); + int64_t lv = read_col_i64(ray_data(lt_eq[k]), li, lt_eq[k]->type, lt_eq[k]->attrs); + if (rv < lv) eq_cmp = -1; + else if (rv > lv) eq_cmp = 1; + } + if (eq_cmp > 0) break; /* right partition past left */ + if (eq_cmp == 0) { + if (rt_time[ri] <= lt_time[li]) + best_ri = ri; /* valid candidate */ + else + break; /* right time past left time */ + } + rp++; + } + match[lp] = best_ri; + prev_non_null_li = li; + } + + /* Remap match[] from sorted order to original left-row order. + * match[lp] gives the best right row for sorted left position lp. + * We need match_orig[li] = best right row for original left row li. */ + ray_t* mo_hdr = NULL; + int64_t* match_orig = (int64_t*)scratch_alloc(&mo_hdr, (size_t)left_n * sizeof(int64_t)); + if (!match_orig && left_n > 0) { + scratch_free(match_hdr); scratch_free(li_hdr); scratch_free(ri_hdr); + return ray_error("oom", NULL); + } + for (int64_t lp = 0; lp < left_n; lp++) + match_orig[li_idx[lp]] = match[lp]; + + /* Count output rows */ + int64_t out_n = 0; + if (join_type == 1) { + out_n = left_n; /* left outer: all left rows */ + } else { + for (int64_t i = 0; i < left_n; i++) + if (match_orig[i] >= 0) out_n++; + } + + /* Build output table */ + int64_t left_ncols = ray_table_ncols(left_table); + int64_t right_ncols = ray_table_ncols(right_table); + + /* Collect right column indices, excluding duplicate key columns */ + int64_t right_out_idx[256]; + int64_t right_out_count = 0; + for (int64_t c = 0; c < right_ncols; c++) { + int64_t rname = ray_table_col_name(right_table, c); + int skip = 0; + if (rname == time_sym) skip = 1; + for (uint8_t k = 0; k < n_eq && !skip; k++) + if (rname == eq_syms[k]) skip = 1; + if (!skip) right_out_idx[right_out_count++] = c; + } + + ray_t* out = ray_table_new(left_ncols + right_out_count); + + /* Build index arrays for gather so col_propagate_nulls_gather can + * copy the null bitmap correctly (null bit in source → null bit in + * output, plus explicit null for match_orig == -1 on the right side). */ + ray_t* lidx_hdr = NULL, *ridx_hdr = NULL; + int64_t* lidx = out_n > 0 + ? (int64_t*)scratch_alloc(&lidx_hdr, (size_t)out_n * sizeof(int64_t)) + : NULL; + int64_t* ridx = out_n > 0 + ? (int64_t*)scratch_alloc(&ridx_hdr, (size_t)out_n * sizeof(int64_t)) + : NULL; + if (out_n > 0 && (!lidx || !ridx)) { + if (lidx_hdr) scratch_free(lidx_hdr); + if (ridx_hdr) scratch_free(ridx_hdr); + scratch_free(mo_hdr); + scratch_free(match_hdr); + scratch_free(li_hdr); + scratch_free(ri_hdr); + if (lt_null_hdr) scratch_free(lt_null_hdr); + if (rt_null_hdr) scratch_free(rt_null_hdr); + if (lt_time_hdr) scratch_free(lt_time_hdr); + if (rt_time_hdr) scratch_free(rt_time_hdr); + return ray_error("oom", NULL); + } + { + int64_t wi = 0; + for (int64_t li = 0; li < left_n; li++) { + if (join_type == 0 && match_orig[li] < 0) continue; + lidx[wi] = li; + ridx[wi] = match_orig[li]; + wi++; + } + } + + /* Gather left columns — iterate in original row order, preserve nulls */ + for (int64_t c = 0; c < left_ncols; c++) { + int64_t col_name = ray_table_col_name(left_table, c); + ray_t* src_col = ray_table_get_col_idx(left_table, c); + int8_t ctype = src_col->type; + ray_t* dst_col = ray_vec_new(ctype, out_n); + + uint8_t esz = ray_type_sizes[ctype]; + char* src = (char*)ray_data(src_col); + char* dst = (char*)ray_data(dst_col); + for (int64_t wi = 0; wi < out_n; wi++) + memcpy(dst + wi * esz, src + lidx[wi] * esz, esz); + dst_col->len = out_n; + col_propagate_str_pool(dst_col, src_col); + col_propagate_nulls_gather(dst_col, src_col, lidx, out_n); + out = ray_table_add_col(out, col_name, dst_col); + ray_release(dst_col); + } + + /* Gather right columns (excluding key duplicates) — original left-row order. + * For unmatched rows (ridx[wi] == -1) we memset 0 for the value and + * rely on col_propagate_nulls_gather to set the null bit; the zero + * bytes keep the vector well-formed when consumers ignore the null + * bit. */ + for (int64_t rc = 0; rc < right_out_count; rc++) { + int64_t cidx = right_out_idx[rc]; + int64_t col_name = ray_table_col_name(right_table, cidx); + ray_t* src_col = ray_table_get_col_idx(right_table, cidx); + int8_t ctype = src_col->type; + ray_t* dst_col = ray_vec_new(ctype, out_n); + + uint8_t esz = ray_type_sizes[ctype]; + char* src = (char*)ray_data(src_col); + char* dst = (char*)ray_data(dst_col); + for (int64_t wi = 0; wi < out_n; wi++) { + int64_t ri = ridx[wi]; + if (ri >= 0) memcpy(dst + wi * esz, src + ri * esz, esz); + else memset(dst + wi * esz, 0, esz); + } + dst_col->len = out_n; + col_propagate_str_pool(dst_col, src_col); + col_propagate_nulls_gather(dst_col, src_col, ridx, out_n); + out = ray_table_add_col(out, col_name, dst_col); + ray_release(dst_col); + } + + if (lidx_hdr) scratch_free(lidx_hdr); + if (ridx_hdr) scratch_free(ridx_hdr); + scratch_free(mo_hdr); + scratch_free(match_hdr); + scratch_free(li_hdr); + scratch_free(ri_hdr); + if (lt_null_hdr) scratch_free(lt_null_hdr); + if (rt_null_hdr) scratch_free(rt_null_hdr); + if (lt_time_hdr) scratch_free(lt_time_hdr); + if (rt_time_hdr) scratch_free(rt_time_hdr); + return out; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/journal.c b/crates/rayforce-sys/vendor/rayforce/src/ops/journal.c new file mode 100644 index 0000000..fa18294 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/journal.c @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "journal.h" +#include "store/journal.h" +#include "store/serde.h" +#include "core/ipc.h" +#include "mem/sys.h" + +#include + +/* Copy a Rayfall string atom into a NUL-terminated C buffer. Returns + * NULL if the atom isn't a string or doesn't fit. */ +static const char* str_to_cpath(ray_t* s, char* buf, size_t bufsz) { + if (!s || s->type != -RAY_STR) return NULL; + const char* p = ray_str_ptr(s); + size_t n = ray_str_len(s); + if (n + 1 > bufsz) return NULL; + memcpy(buf, p, n); + buf[n] = '\0'; + return buf; +} + +/* Map a ray_err_t into a Rayfall error object so callers can `try` them. */ +static ray_t* err_to_ray(ray_err_t e, const char* fallback) { + if (e == RAY_OK) return RAY_NULL_OBJ; + const char* code = ray_err_code_str(e); + return ray_error(code ? code : (fallback ? fallback : "io"), NULL); +} + +/* (.log.open args) — args is a 2-tuple (`async; "base") or (`sync; "base"). + * Accepting the mode as a sym keyword keeps the call self-documenting + * without needing a second function or a magic int. */ +ray_t* ray_log_open_fn(ray_t** args, int64_t n) { + if (n != 2) + return ray_error("rank", ".log.open expects (`async|`sync; \"base\")"); + if (!args[0] || args[0]->type != -RAY_SYM) + return ray_error("type", ".log.open mode must be `async or `sync"); + if (!args[1] || args[1]->type != -RAY_STR) + return ray_error("type", ".log.open base must be a string"); + + int64_t sym_async = ray_sym_intern("async", 5); + int64_t sym_sync = ray_sym_intern("sync", 4); + int64_t mode_id = args[0]->i64; + ray_journal_mode_t mode; + if (mode_id == sym_async) mode = RAY_JOURNAL_ASYNC; + else if (mode_id == sym_sync) mode = RAY_JOURNAL_SYNC; + else return ray_error("domain", ".log.open mode must be `async or `sync"); + + char base[1024]; + if (!str_to_cpath(args[1], base, sizeof(base))) + return ray_error("type", ".log.open base path too long or not a string"); + + ray_err_t e = ray_journal_open(base, mode); + return err_to_ray(e, "io"); +} + +/* (.log.write expr) — append a synthetic entry containing the + * serialized form of `expr`. Useful for users who want REPL-driven + * mutations captured in the log alongside the IPC stream. + * + * If the journal isn't open, ERROR rather than silently no-op — a + * silent no-op would lie to the user about durability ("I logged + * your change") when in fact nothing was persisted. */ +ray_t* ray_log_write_fn(ray_t* expr) { + if (!ray_journal_is_open()) + return ray_error("noopen", ".log.write: no journal open (start with -l/-L)"); + if (!expr) return ray_error("type", ".log.write expects an argument"); + + int64_t pay_size = ray_serde_size(expr); + if (pay_size <= 0) return ray_error("domain", ".log.write: serde size 0"); + + uint8_t* payload = (uint8_t*)ray_sys_alloc((size_t)pay_size); + if (!payload) return ray_error("oom", NULL); + + int64_t written = ray_ser_raw(payload, expr); + if (written != pay_size) { ray_sys_free(payload); return ray_error("io", NULL); } + + ray_ipc_header_t hdr = { + .prefix = RAY_SERDE_PREFIX, + .version = RAY_SERDE_WIRE_VERSION, + .flags = 0, + .endian = 0, + .msgtype = RAY_IPC_MSG_ASYNC, + .size = pay_size, + }; + ray_err_t e = ray_journal_write_bytes(&hdr, payload, pay_size); + ray_sys_free(payload); + return err_to_ray(e, "io"); +} + +ray_t* ray_log_replay_fn(ray_t* path) { + char p[1024]; + if (!str_to_cpath(path, p, sizeof(p))) + return ray_error("type", ".log.replay expects a string path"); + + int64_t chunks = 0, errs = 0; + ray_jreplay_status_t status = RAY_JREPLAY_OK; + ray_journal_replay(p, &chunks, &errs, &status); + switch (status) { + case RAY_JREPLAY_OK: + return ray_i64(chunks); + case RAY_JREPLAY_BADTAIL: { + int64_t valid_chunks = 0, valid_bytes = 0; + ray_journal_validate(p, &valid_chunks, &valid_bytes); + return ray_error("badtail", + "%s: framing broken after %lld entries (valid bytes = %lld)", + p, (long long)chunks, (long long)valid_bytes); + } + case RAY_JREPLAY_DESER: + return ray_error("deser", + "%s: deserialization failed at chunk %lld — framing intact, content/version skew", + p, (long long)chunks); + case RAY_JREPLAY_DECOMP: + return ray_error("decompress", + "%s: decompression failed at chunk %lld — framing intact, do not truncate", + p, (long long)chunks); + case RAY_JREPLAY_OOM: + return ray_error("oom", + "%s: out of memory mid-replay after %lld entries", + p, (long long)chunks); + case RAY_JREPLAY_IO: + return ray_error("io", + "%s: I/O failure after %lld entries", p, (long long)chunks); + } + return ray_error("internal", "unknown replay status"); +} + +/* (.log.validate "path") -> (chunks; valid_bytes) — a 2-list. */ +ray_t* ray_log_validate_fn(ray_t* path) { + char p[1024]; + if (!str_to_cpath(path, p, sizeof(p))) + return ray_error("type", ".log.validate expects a string path"); + + int64_t chunks = 0, valid_bytes = 0; + ray_err_t e = ray_journal_validate(p, &chunks, &valid_bytes); + if (e != RAY_OK) return err_to_ray(e, "io"); + + ray_t* list = ray_list_new(2); + if (!list || RAY_IS_ERR(list)) return ray_error("oom", NULL); + ray_t* a = ray_i64(chunks); + ray_t* b = ray_i64(valid_bytes); + list = ray_list_append(list, a); ray_release(a); + if (RAY_IS_ERR(list)) { ray_release(b); return list; } + list = ray_list_append(list, b); ray_release(b); + return list; +} + +ray_t* ray_log_roll_fn(ray_t** args, int64_t n) { + (void)args; (void)n; + if (!ray_journal_is_open()) + return ray_error("domain", ".log.roll: no journal open"); + return err_to_ray(ray_journal_roll(), "io"); +} + +ray_t* ray_log_snapshot_fn(ray_t** args, int64_t n) { + (void)args; (void)n; + if (!ray_journal_is_open()) + return ray_error("domain", ".log.snapshot: no journal open"); + return err_to_ray(ray_journal_snapshot(), "io"); +} + +ray_t* ray_log_sync_fn(ray_t** args, int64_t n) { + (void)args; (void)n; + return err_to_ray(ray_journal_sync(), "io"); +} + +ray_t* ray_log_close_fn(ray_t** args, int64_t n) { + (void)args; (void)n; + return err_to_ray(ray_journal_close(), "io"); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/journal.h b/crates/rayforce-sys/vendor/rayforce/src/ops/journal.h new file mode 100644 index 0000000..daea3df --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/journal.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Rayfall-facing thin wrappers over store/journal.{c,h}. These are the + * functions registered under the `.log.*` namespace in eval.c. */ +#ifndef RAY_OPS_JOURNAL_H +#define RAY_OPS_JOURNAL_H + +#include + +/* (.log.open args) — args = (`async; "base") or (`sync; "base"). + * Loads .qdb if present, replays .log if present, then + * opens .log for append. */ +ray_t* ray_log_open_fn(ray_t** args, int64_t n); + +/* (.log.write expr) — append a synthetic entry containing the + * serialized form of `expr`. No-op (returns null) if no journal is + * open or if a replay is currently in progress. */ +ray_t* ray_log_write_fn(ray_t* expr); + +/* (.log.replay "path") -> i64 chunks replayed. Errors with "badtail" + * if the file ends mid-frame; the error message includes the byte + * offset of the last good entry. */ +ray_t* ray_log_replay_fn(ray_t* path); + +/* (.log.validate "path") -> (chunks; valid_bytes) pair. Maps to + * q's `-11!(-2; file)` — count valid frames without evaluating. */ +ray_t* ray_log_validate_fn(ray_t* path); + +/* (.log.roll) — close and rename current log to ..log, + * open a fresh empty .log. */ +ray_t* ray_log_roll_fn(ray_t** args, int64_t n); + +/* (.log.snapshot) — write the current global env to .qdb, + * then roll the log. */ +ray_t* ray_log_snapshot_fn(ray_t** args, int64_t n); + +/* (.log.sync) — fflush + fsync the open log (no-op in -L mode). */ +ray_t* ray_log_sync_fn(ray_t** args, int64_t n); + +/* (.log.close) — close the active log. */ +ray_t* ray_log_close_fn(ray_t** args, int64_t n); + +#endif /* RAY_OPS_JOURNAL_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/lftj.c b/crates/rayforce-sys/vendor/rayforce/src/ops/lftj.c new file mode 100644 index 0000000..c991ec9 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/lftj.c @@ -0,0 +1,258 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "lftj.h" +#include + +/* Grow output buffers when full. Returns false on OOM. */ +static bool lftj_grow_output(lftj_enum_ctx_t* ctx) { + if (ctx->out_cap > INT64_MAX / 2) return false; + int64_t new_cap = ctx->out_cap < 64 ? 64 : ctx->out_cap * 2; + /* Allocate all new blocks first (atomic: no state change on failure) */ + ray_t* new_hdrs[LFTJ_MAX_VARS]; + for (uint8_t v = 0; v < ctx->n_vars; v++) { + new_hdrs[v] = ray_alloc((size_t)new_cap * sizeof(int64_t)); + if (!new_hdrs[v]) { + for (uint8_t j = 0; j < v; j++) ray_free(new_hdrs[j]); + return false; + } + memcpy(ray_data(new_hdrs[v]), ctx->col_data[v], + (size_t)ctx->out_count * sizeof(int64_t)); + } + /* Commit: swap pointers (no allocation can fail past here) */ + for (uint8_t v = 0; v < ctx->n_vars; v++) { + ray_free(ctx->buf_hdrs[v]); + ctx->buf_hdrs[v] = new_hdrs[v]; + ctx->col_data[v] = (int64_t*)ray_data(new_hdrs[v]); + } + ctx->out_cap = new_cap; + return true; +} + +/* -------------------------------------------------------------------------- + * Leapfrog search: intersect k sorted iterators + * Returns true + sets *out if intersection found. + * -------------------------------------------------------------------------- */ + +bool leapfrog_search(ray_lftj_iter_t** iters, int k, int64_t* out) { + if (k <= 0) return false; + + /* Check for any exhausted iterator */ + for (int i = 0; i < k; i++) + if (lftj_at_end(iters[i])) return false; + + /* Find initial max */ + int max_idx = 0; + for (int i = 1; i < k; i++) + if (lftj_key(iters[i]) > lftj_key(iters[max_idx])) max_idx = i; + + for (;;) { + int64_t max_val = lftj_key(iters[max_idx]); + int next = (max_idx + 1) % k; + + lftj_seek(iters[next], max_val); + if (lftj_at_end(iters[next])) return false; + + if (lftj_key(iters[next]) == max_val) { + /* Check all iterators agree */ + bool all_equal = true; + for (int i = 0; i < k; i++) { + if (lftj_key(iters[i]) != max_val) { + all_equal = false; + break; + } + } + if (all_equal) { + *out = max_val; + return true; + } + } + max_idx = next; + } +} + +/* -------------------------------------------------------------------------- + * Binding plan construction + * -------------------------------------------------------------------------- */ + +bool lftj_build_plan(lftj_enum_ctx_t* ctx, + ray_rel_t** rels, uint8_t n_rels, uint8_t n_vars, + const uint8_t* rel_src_var, const uint8_t* rel_dst_var) { + if (n_vars > LFTJ_MAX_VARS) return false; + ctx->n_vars = n_vars; + + for (uint8_t v = 0; v < n_vars; v++) + ctx->var_plans[v].n_bindings = 0; + + /* For each relationship, add bindings to the appropriate variables. + * A relationship rel[i] connecting src_var→dst_var adds: + * - If dst_var has higher index: binding on dst_var using fwd CSR, bound_var=src_var + * - If src_var has higher index: binding on src_var using rev CSR, bound_var=dst_var + * + * For the first variable (depth 0), we need a special "root" iterator + * that enumerates all nodes. We handle this differently: depth-0 variable + * gets bindings from all rels where it's the src, using a full range iterator. + */ + for (uint8_t r = 0; r < n_rels; r++) { + uint8_t sv = rel_src_var[r]; + uint8_t dv = rel_dst_var[r]; + + /* Self-loop (sv == dv) is invalid — skip it */ + if (sv == dv) continue; + if (sv >= n_vars || dv >= n_vars) return false; + + /* Add binding to the later-bound variable */ + if (sv < dv) { + /* sv is bound first; add fwd binding to dv */ + lftj_var_plan_t* vp = &ctx->var_plans[dv]; + if (vp->n_bindings >= LFTJ_MAX_ITERS_PER_VAR) return false; + vp->bindings[vp->n_bindings].csr = &rels[r]->fwd; + vp->bindings[vp->n_bindings].bound_var = sv; + vp->n_bindings++; + } else { + /* dv is bound first; add rev binding to sv */ + lftj_var_plan_t* vp = &ctx->var_plans[sv]; + if (vp->n_bindings >= LFTJ_MAX_ITERS_PER_VAR) return false; + vp->bindings[vp->n_bindings].csr = &rels[r]->rev; + vp->bindings[vp->n_bindings].bound_var = dv; + vp->n_bindings++; + } + } + + return true; +} + +bool lftj_build_default_plan(lftj_enum_ctx_t* ctx, + ray_rel_t** rels, uint8_t n_rels, uint8_t n_vars) { + if (n_vars == 3 && n_rels == 3) { + /* Triangle: rels[0]=a→b, rels[1]=b→c, rels[2]=a→c */ + uint8_t src_vars[3] = {0, 1, 0}; + uint8_t dst_vars[3] = {1, 2, 2}; + return lftj_build_plan(ctx, rels, n_rels, n_vars, src_vars, dst_vars); + } else if (n_vars == 2) { + /* All rels connect var 0→var 1 */ + uint8_t src_vars[16], dst_vars[16]; + if (n_rels > 16) return false; + for (uint8_t r = 0; r < n_rels; r++) { + src_vars[r] = 0; + dst_vars[r] = 1; + } + return lftj_build_plan(ctx, rels, n_rels, n_vars, src_vars, dst_vars); + } else if (n_vars == 4 && n_rels == 6) { + /* 4-clique: rels[0]=a→b, rels[1]=a→c, rels[2]=a→d, + * rels[3]=b→c, rels[4]=b→d, rels[5]=c→d */ + uint8_t src_vars[6] = {0, 0, 0, 1, 1, 2}; + uint8_t dst_vars[6] = {1, 2, 3, 2, 3, 3}; + return lftj_build_plan(ctx, rels, n_rels, n_vars, src_vars, dst_vars); + } + + /* Fallback: chain pattern — rel[i] connects var i→var i+1 */ + if (n_rels == n_vars - 1) { + uint8_t src_vars[16], dst_vars[16]; + if (n_rels > 16) return false; + for (uint8_t r = 0; r < n_rels; r++) { + src_vars[r] = r; + dst_vars[r] = r + 1; + } + return lftj_build_plan(ctx, rels, n_rels, n_vars, src_vars, dst_vars); + } + + return false; +} + +/* -------------------------------------------------------------------------- + * Recursive backtracking enumeration + * + * At each depth d, open iterators for variable d's bindings using the + * currently bound values, then leapfrog-intersect to find valid bindings. + * At the last depth, emit tuples to output. + * -------------------------------------------------------------------------- */ + +void lftj_enumerate(lftj_enum_ctx_t* ctx, uint8_t depth) { + if (ctx->oom) return; + + if (depth == ctx->n_vars) { + /* All variables bound — emit tuple */ + if (ctx->out_count >= ctx->out_cap) { + if (!lftj_grow_output(ctx)) { + ctx->oom = true; + return; + } + } + for (uint8_t v = 0; v < ctx->n_vars; v++) + ctx->col_data[v][ctx->out_count] = ctx->bound[v]; + ctx->out_count++; + return; + } + + lftj_var_plan_t* vp = &ctx->var_plans[depth]; + + if (vp->n_bindings == 0) { + /* Root variable (depth 0 with no bindings): iterate all nodes. + * Use the first rel's fwd CSR to determine node range. */ + if (depth != 0) return; /* non-root var must have bindings */ + + /* Find max n_nodes across all CSRs in the query */ + int64_t n_nodes = 0; + for (uint8_t v = 0; v < ctx->n_vars; v++) { + for (uint8_t b = 0; b < ctx->var_plans[v].n_bindings; b++) { + if (ctx->var_plans[v].bindings[b].csr) { + int64_t nn = ctx->var_plans[v].bindings[b].csr->n_nodes; + if (nn > n_nodes) n_nodes = nn; + } + } + } + if (n_nodes == 0) return; + + for (int64_t a = 0; a < n_nodes; a++) { + ctx->bound[0] = a; + lftj_enumerate(ctx, 1); + if (ctx->oom) return; + } + return; + } + + /* Open iterators for this variable's bindings */ + ray_lftj_iter_t iter_buf[LFTJ_MAX_ITERS_PER_VAR]; + ray_lftj_iter_t* iter_ptrs[LFTJ_MAX_ITERS_PER_VAR]; + + for (uint8_t b = 0; b < vp->n_bindings; b++) { + lftj_binding_t* bind = &vp->bindings[b]; + if (!bind->csr) return; + int64_t parent = ctx->bound[bind->bound_var]; + if (parent < 0 || parent >= bind->csr->n_nodes) return; + lftj_open(&iter_buf[b], bind->csr, parent); + iter_ptrs[b] = &iter_buf[b]; + } + + /* Leapfrog intersect */ + int64_t val; + while (leapfrog_search(iter_ptrs, vp->n_bindings, &val)) { + ctx->bound[depth] = val; + lftj_enumerate(ctx, depth + 1); + if (ctx->oom) return; + /* Advance all iterators past current match */ + for (uint8_t b = 0; b < vp->n_bindings; b++) + lftj_next(iter_ptrs[b]); + } +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/lftj.h b/crates/rayforce-sys/vendor/rayforce/src/ops/lftj.h new file mode 100644 index 0000000..1ce4380 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/lftj.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_LFTJ_H +#define RAY_LFTJ_H + +#include "ops.h" +#include "store/csr.h" + +/* Trie iterator over sorted CSR adjacency list */ +typedef struct ray_lftj_iter { + int64_t* targets; /* pointer into CSR targets data */ + int64_t start; /* current range start */ + int64_t end; /* current range end */ + int64_t pos; /* current position in [start, end) */ +} ray_lftj_iter_t; + +/* O(1) */ +static inline int64_t lftj_key(ray_lftj_iter_t* it) { + if (!it->targets || it->pos >= it->end) return INT64_MAX; + return it->targets[it->pos]; +} + +static inline bool lftj_at_end(ray_lftj_iter_t* it) { + return !it->targets || it->pos >= it->end; +} + +static inline void lftj_next(ray_lftj_iter_t* it) { + if (it->pos < it->end) it->pos++; +} + +/* O(log degree) - binary search within [pos, end) */ +static inline void lftj_seek(ray_lftj_iter_t* it, int64_t v) { + if (!it->targets) { it->pos = it->end; return; } + int64_t lo = it->pos, hi = it->end; + while (lo < hi) { + int64_t mid = lo + (hi - lo) / 2; + if (it->targets[mid] < v) lo = mid + 1; + else hi = mid; + } + it->pos = lo; +} + +/* Open trie level: set iterator to a node's adjacency list */ +static inline void lftj_open(ray_lftj_iter_t* it, ray_csr_t* csr, int64_t parent) { + if (!csr || !csr->offsets || !csr->targets + || parent < 0 || parent >= csr->n_nodes) { + it->targets = NULL; it->start = 0; it->end = 0; it->pos = 0; + return; + } + int64_t* o = (int64_t*)ray_data(csr->offsets); + it->targets = (int64_t*)ray_data(csr->targets); + it->start = o[parent]; + it->end = o[parent + 1]; + it->pos = it->start; +} + +/* Leapfrog search: intersect k sorted iterators */ +bool leapfrog_search(ray_lftj_iter_t** iters, int k, int64_t* out); + +/* -------------------------------------------------------------------------- + * General LFTJ enumeration + * -------------------------------------------------------------------------- */ + +#define LFTJ_MAX_VARS 16 +#define LFTJ_MAX_ITERS_PER_VAR 8 + +/* Binding entry: one iterator constraint on a variable. + * "Open CSR `csr` at the node bound to `bound_var`" */ +typedef struct lftj_binding { + ray_csr_t* csr; /* CSR to open (fwd or rev of some rel) */ + uint8_t bound_var; /* index of already-bound variable providing the parent node */ +} lftj_binding_t; + +/* Per-variable binding plan */ +typedef struct lftj_var_plan { + lftj_binding_t bindings[LFTJ_MAX_ITERS_PER_VAR]; + uint8_t n_bindings; +} lftj_var_plan_t; + +/* Enumeration context */ +typedef struct lftj_enum_ctx { + lftj_var_plan_t var_plans[LFTJ_MAX_VARS]; + uint8_t n_vars; + int64_t bound[LFTJ_MAX_VARS]; /* currently bound values */ + + /* Output buffers (caller-owned, dynamically grown) */ + int64_t** col_data; /* [n_vars] arrays of output values */ + int64_t out_count; + int64_t out_cap; + ray_t* buf_hdrs[LFTJ_MAX_VARS]; /* scratch headers for realloc */ + bool oom; /* set on allocation failure */ +} lftj_enum_ctx_t; + +/* Build binding plan from relationship array. + * Assumes variable ordering 0..n_vars-1. + * For each rel: rel[i] connects src_var→dst_var. + * The caller encodes this mapping as (src_var, dst_var) pairs. + * Returns true on success. */ +bool lftj_build_plan(lftj_enum_ctx_t* ctx, + ray_rel_t** rels, uint8_t n_rels, uint8_t n_vars, + const uint8_t* rel_src_var, const uint8_t* rel_dst_var); + +/* Build default binding plan for simple patterns. + * Triangle (n_vars=3, n_rels=3): rels[0]=a→b, rels[1]=b→c, rels[2]=a→c + * 2-var (n_vars=2): all rels connect var 0→var 1 + * Returns true on success, false if pattern not recognized. */ +bool lftj_build_default_plan(lftj_enum_ctx_t* ctx, + ray_rel_t** rels, uint8_t n_rels, uint8_t n_vars); + +/* Recursive backtracking enumeration. + * Caller must initialize ctx->col_data, out_cap, out_count=0, buf_hdrs. + * Populates ctx->col_data with matching tuples. */ +void lftj_enumerate(lftj_enum_ctx_t* ctx, uint8_t depth); + +#endif /* RAY_LFTJ_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/linkop.c b/crates/rayforce-sys/vendor/rayforce/src/ops/linkop.c new file mode 100644 index 0000000..0d0aa11 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/linkop.c @@ -0,0 +1,328 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "linkop.h" +#include "idxop.h" +#include "ops/internal.h" /* col_propagate_str_pool */ +#include "ops/ops.h" /* RAY_IS_PARTED */ +#include "mem/cow.h" +#include "vec/vec.h" +#include "table/table.h" +#include "table/sym.h" +#include "lang/eval.h" +#include "lang/env.h" +#include + +/* -------------------------------------------------------------------------- + * Promote inline nullmap to ext-nullmap before attaching a link. + * + * A linked column places its int64 target sym at nullmap-union bytes 8-15. + * If the column has inline nulls and >64 elements, those bytes hold real + * bitmap bits that would be clobbered. Promote up front to keep nulls + * intact. Mirrors the promotion logic in ray_vec_set_null_checked. */ +static ray_err_t promote_inline_to_ext(ray_t* vec) { + if (!(vec->attrs & RAY_ATTR_HAS_NULLS)) return RAY_OK; + if (vec->attrs & RAY_ATTR_NULLMAP_EXT) return RAY_OK; + + int64_t bitmap_len = (vec->len + 7) / 8; + if (bitmap_len < 1) bitmap_len = 1; + ray_t* ext = ray_vec_new(RAY_U8, bitmap_len); + if (!ext || RAY_IS_ERR(ext)) return RAY_ERR_OOM; + ext->len = bitmap_len; + + /* Copy existing inline bits (16 bytes max) into ext. */ + int64_t copy = bitmap_len < 16 ? bitmap_len : 16; + memcpy(ray_data(ext), vec->nullmap, (size_t)copy); + if (bitmap_len > 16) { + memset((char*)ray_data(ext) + 16, 0, (size_t)(bitmap_len - 16)); + } + /* Now overwrite bytes 0-7 with the ext_nullmap pointer. Bytes 8-15 + * become don't-care — caller is about to write link_target there. */ + vec->ext_nullmap = ext; + vec->attrs |= RAY_ATTR_NULLMAP_EXT; + return RAY_OK; +} + +/* -------------------------------------------------------------------------- + * ray_link_attach + * -------------------------------------------------------------------------- */ + +ray_t* ray_link_attach(ray_t** vp, int64_t target_sym_id) { + if (!vp || !*vp || RAY_IS_ERR(*vp)) + return ray_error("type", "link: null/error vector"); + ray_t* v = *vp; + + if (!ray_is_vec(v) || (v->type != RAY_I32 && v->type != RAY_I64)) + return ray_error("type", "link: column must be RAY_I32 or RAY_I64 (got %d)", + (int)v->type); + if (v->attrs & RAY_ATTR_SLICE) + return ray_error("type", "link: cannot attach to a slice; materialize first"); + if (target_sym_id < 0) + return ray_error("type", "link: invalid target sym ID"); + + /* Validate that target_sym_id resolves to a RAY_TABLE in the env. */ + ray_t* target = ray_env_get(target_sym_id); + if (!target || target->type != RAY_TABLE) + return ray_error("name", "link: target sym does not name a table"); + + /* Reject parted dim tables — deref math (target_col[linkcol[i]]) is + * straight indexing, with no notion of which segment a global rowid + * lives in. Pointing a link at a parted target would silently + * misbehave at deref time. Better an explicit nyi here than a + * three-layers-deep wrong-answer bug. See guide-indexes.html and + * queries-links.html for the supported shape (parted fact -> regular + * non-parted dim). */ + int64_t tcols = ray_table_ncols(target); + for (int64_t c = 0; c < tcols; c++) { + ray_t* tcol = ray_table_get_col_idx(target, c); + if (tcol && RAY_IS_PARTED(tcol->type)) + return ray_error("nyi", + "link: target table has a parted column (%d); " + "link targets must be non-parted (in-memory or splayed) tables", + (int)c); + } + + /* COW so we own the bytes we're about to mutate. */ + v = ray_cow(v); + if (!v || RAY_IS_ERR(v)) return v; + *vp = v; + + /* Promote nulls to ext if necessary so bytes 8-15 are free. */ + ray_err_t err = promote_inline_to_ext(v); + if (err != RAY_OK) return ray_error(ray_err_code_str(err), "link: oom"); + + /* Replace any existing link (idempotent re-attach with new target). */ + v->link_target = target_sym_id; + v->attrs |= RAY_ATTR_HAS_LINK; + + /* If an accelerator index is also attached, the index's saved snapshot + * captured the pre-link bytes 8-15 (which were _idx_pad / NULL). Update + * the snapshot so a future index-drop restores the link too. */ + if (v->attrs & RAY_ATTR_HAS_INDEX) { + ray_index_t* ix = ray_index_payload(v->index); + memcpy(&ix->saved_nullmap[8], &target_sym_id, 8); + } + return v; +} + +/* -------------------------------------------------------------------------- + * ray_link_detach + * -------------------------------------------------------------------------- */ + +ray_t* ray_link_detach(ray_t** vp) { + if (!vp || !*vp || RAY_IS_ERR(*vp)) return *vp; + ray_t* v = *vp; + if (!(v->attrs & RAY_ATTR_HAS_LINK)) return v; + + v = ray_cow(v); + if (!v || RAY_IS_ERR(v)) { *vp = v; return v; } + *vp = v; + + v->link_target = 0; + v->attrs &= (uint8_t)~RAY_ATTR_HAS_LINK; + + if (v->attrs & RAY_ATTR_HAS_INDEX) { + ray_index_t* ix = ray_index_payload(v->index); + memset(&ix->saved_nullmap[8], 0, 8); + } + return v; +} + +/* -------------------------------------------------------------------------- + * ray_link_deref — produce target_col[link_col[i]] for each row i + * + * Result type matches the target column type. Length matches the link + * column. Null rows in the link become null in the result; null rows in + * the target also propagate. Returns NULL when target table or field + * column don't exist (caller treats as a probe miss). + * -------------------------------------------------------------------------- */ + +ray_t* ray_link_deref(ray_t* v, int64_t sym_id) { + if (!ray_link_has(v)) return NULL; + if (v->type != RAY_I32 && v->type != RAY_I64) return NULL; + + /* Slice-through: a slice over a linked parent inherits the link. + * link_target lives on the parent; the slice's own bytes 8-15 are + * slice_offset, which would be garbage if we read it as a sym ID. */ + int64_t target_sym = (v->attrs & RAY_ATTR_SLICE) + ? v->slice_parent->link_target + : v->link_target; + ray_t* target_tab = ray_env_get(target_sym); + if (!target_tab || target_tab->type != RAY_TABLE) return NULL; + + /* Reject parted targets at deref time, mirroring the attach-time guard + * in ray_link_attach. The attach-time check catches the obvious case + * (user calls (.col.link 'parted_dim ...)), but two paths bypass it: + * 1. Lazy rebind — attach saw a non-parted table; the sym was later + * rebound to a parted one (env lookup is at deref time). + * 2. .link sidecar reload — try_load_link_sidecar (col.c) writes + * link_target straight from the on-disk sym name without any + * env-state check. + * Without a deref-time guard, both produce a silent wrong-answer bug + * (target_col[linkcol[i]] indexes into RAY_PARTED data, which is a + * list of segment pointers — straight-byte indexing is meaningless). */ + int64_t tcols = ray_table_ncols(target_tab); + for (int64_t c = 0; c < tcols; c++) { + ray_t* tcol = ray_table_get_col_idx(target_tab, c); + if (tcol && RAY_IS_PARTED(tcol->type)) + return ray_error("nyi", + "link deref: target table has a parted column (%d); " + "links to parted dim tables are not supported in v1", + (int)c); + } + + ray_t* target_col = ray_table_get_col(target_tab, sym_id); + if (!target_col) return NULL; + + int64_t n = v->len; + int64_t target_n = target_col->len; + int8_t out_type = target_col->type; + + /* Resolve through slices: SYM-width and (later) sym_dict / str_pool + * all live on the slice_parent's attrs/union, never on the slice + * itself. The slice contributes only its [slice_offset, len) view. + * Compute the canonical width and base-pointer once here so the + * gather loop stays correct for narrow-width sliced sym columns. */ + ray_t* col_owner = (target_col->attrs & RAY_ATTR_SLICE) + ? target_col->slice_parent : target_col; + int64_t col_off = (target_col->attrs & RAY_ATTR_SLICE) + ? target_col->slice_offset : 0; + uint8_t target_width = col_owner->attrs & RAY_SYM_W_MASK; + uint8_t target_esz = (out_type == RAY_SYM) + ? (uint8_t)(1u << target_width) + : ray_sym_elem_size(out_type, col_owner->attrs); + + /* Allocate result. For RAY_SYM mirror the parent's width so the + * subsequent memcpy is byte-correct; otherwise the canonical size + * for the type. */ + ray_t* result; + if (out_type == RAY_SYM) { + result = ray_sym_vec_new(target_width, n); + } else { + result = ray_vec_new(out_type, n); + } + if (!result || RAY_IS_ERR(result)) return result; + result->len = n; + + uint8_t out_esz = ray_sym_elem_size(out_type, result->attrs); + if (out_esz > 0) memset(ray_data(result), 0, (size_t)n * out_esz); + /* By construction, out_esz == target_esz: SYM widths match, + * STR is always 16, numeric types match because out_type == target. */ + + const uint8_t* link_base = (const uint8_t*)ray_data(v); + uint8_t link_esz = ray_sym_elem_size(v->type, v->attrs); + char* out_base = (char*)ray_data(result); + /* Compute the source-data base by hand (not via ray_data on the + * slice) because ray_data_fn assumes ray_type_sizes[RAY_SYM] = 8 + * (W64), which mis-offsets narrow-width sliced sym columns. */ + const char* col_data_base = (const char*)ray_data(col_owner); + const char* tgt_base = col_data_base + (size_t)col_off * target_esz; + + for (int64_t i = 0; i < n; i++) { + if (ray_vec_is_null(v, i)) { + ray_vec_set_null(result, i, true); + continue; + } + int64_t rid; + if (link_esz == 4) { + int32_t r; + memcpy(&r, link_base + i * 4, 4); + rid = (int64_t)r; + } else { + memcpy(&rid, link_base + i * 8, 8); + } + if (rid < 0 || rid >= target_n) { + ray_vec_set_null(result, i, true); + continue; + } + if (ray_vec_is_null(target_col, rid)) { + ray_vec_set_null(result, i, true); + continue; + } + if (target_esz > 0 && out_esz == target_esz) { + memcpy(out_base + i * out_esz, + tgt_base + rid * target_esz, + target_esz); + } + } + + /* Type-specific metadata propagation. + * RAY_STR: share the source pool so ray_str_t pool_offs are valid. + * RAY_SYM: if the source column carries a local sym_dict, share it. + * + * sym_dict aliases bytes 8-15 of the nullmap union. It is only a + * real pointer when the column doesn't have inline nulls clobbering + * those bytes, i.e. either no nulls or NULLMAP_EXT. Mirrors the + * guard pattern in src/ops/sort.c:3307 and src/ops/rerank.c:182. */ + if (out_type == RAY_STR) { + col_propagate_str_pool(result, target_col); + } else if (out_type == RAY_SYM) { + if (col_owner && !(col_owner->attrs & RAY_ATTR_SLICE) && + (!(col_owner->attrs & RAY_ATTR_HAS_NULLS) || + (col_owner->attrs & RAY_ATTR_NULLMAP_EXT)) && + col_owner->sym_dict) { + ray_retain(col_owner->sym_dict); + result->sym_dict = col_owner->sym_dict; + } + } + return result; +} + +/* -------------------------------------------------------------------------- + * Rayfall builtin entry points + * -------------------------------------------------------------------------- */ + +ray_t* ray_col_link_fn(ray_t* target_sym, ray_t* int_vec) { + if (!target_sym || target_sym->type != -RAY_SYM) + return ray_error("type", "(.col.link target v): target must be a sym"); + if (!int_vec || RAY_IS_ERR(int_vec)) + return int_vec ? int_vec : ray_error("type", "(.col.link target v): null v"); + int64_t target_id = target_sym->i64; + + ray_t* w = int_vec; + ray_retain(w); + ray_t* r = ray_link_attach(&w, target_id); + if (RAY_IS_ERR(r)) { ray_release(w); return r; } + return w; +} + +ray_t* ray_col_unlink_fn(ray_t* v) { + if (!v || RAY_IS_ERR(v)) return v; + ray_t* w = v; + ray_retain(w); + ray_t* r = ray_link_detach(&w); + if (RAY_IS_ERR(r)) { ray_release(w); return r; } + return w; +} + +ray_t* ray_col_link_p_fn(ray_t* v) { + return ray_bool(ray_link_has(v) ? 1 : 0); +} + +ray_t* ray_col_target_fn(ray_t* v) { + if (!ray_link_has(v)) return RAY_NULL_OBJ; + /* Slice-aware: ray_link_target_id reads from slice_parent for slices, + * because v->link_target on a slice aliases slice_offset and would + * surface as a garbage sym ID. */ + return ray_sym(ray_link_target_id(v)); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/linkop.h b/crates/rayforce-sys/vendor/rayforce/src/ops/linkop.h new file mode 100644 index 0000000..4ef7477 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/linkop.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_LINKOP_H +#define RAY_LINKOP_H + +/* + * linkop.h -- Linked columns. + * + * A linked column is an integer vector (RAY_I32 / RAY_I64) where every + * value is a row index into a target table. Querying linkcol.field + * dereferences as target_table[linkcol[i]][field] for each row i — a + * single array access, no hash probe. + * + * Storage: RAY_ATTR_HAS_LINK = 0x04 set on the column; the int64 sym ID + * naming the target lives at bytes 8-15 of the nullmap union (the + * `link_target` field). See include/rayforce.h for the union layout + * and src/mem/heap.h for the attr-bit semantics. + * + * Resolution is lazy: link_target is just a sym, looked up against the + * global env at deref time. If the target table has been rebound, the + * link follows automatically. + * + * HAS_LINK is a property of the column, not a transient accelerator — + * unlike HAS_INDEX it is preserved across in-place mutation and + * persisted to disk via a `.link` sidecar file. + */ + +#include +#include "mem/heap.h" + +/* ===== Attach / Detach ===== */ + +/* Attach a link to *vp pointing at the target named by target_sym_id. + * Returns the (possibly COW'd) parent vector with HAS_LINK set, or a + * RAY_ERROR. Validates: target sym must resolve to a RAY_TABLE in the + * current env; *vp must be a RAY_I32 or RAY_I64 vector and not a slice. */ +ray_t* ray_link_attach(ray_t** vp, int64_t target_sym_id); + +/* Clear HAS_LINK from *vp. No-op if not linked. link_target byte slot + * is zeroed. Returns *vp. */ +ray_t* ray_link_detach(ray_t** vp); + +/* ===== Introspection ===== */ + +/* True iff `v` is a linked column or a slice of one. Slices over a + * linked parent inherit the link transparently — the slice's own attrs + * carry RAY_ATTR_SLICE without HAS_LINK, but `link_target` lives on the + * parent and reading it through the slice is safe via slice_parent. */ +static inline bool ray_link_has(const ray_t* v) { + if (!v || RAY_IS_ERR((ray_t*)v)) return false; + if (v->attrs & RAY_ATTR_HAS_LINK) return true; + if (v->attrs & RAY_ATTR_SLICE) { + const ray_t* p = v->slice_parent; + return p && (p->attrs & RAY_ATTR_HAS_LINK); + } + return false; +} + +/* Returns the target sym ID (int64) or -1 if no link is attached. + * Slice-aware: looks through to slice_parent->link_target. */ +static inline int64_t ray_link_target_id(const ray_t* v) { + if (!ray_link_has(v)) return (int64_t)-1; + if (v->attrs & RAY_ATTR_SLICE) return v->slice_parent->link_target; + return v->link_target; +} + +/* ===== Resolution ===== */ + +/* Dereference linked column v at field sym_id of the target table. + * Returns a fresh owning ref to a column the same length as v, with + * the same type as the target's field column. Null rows in v + * propagate as nulls in the result; null rows in the target also + * propagate. Returns NULL if the target table is missing or doesn't + * have a column named `sym_id` (caller may treat as a probe miss). */ +ray_t* ray_link_deref(ray_t* v, int64_t sym_id); + +/* ===== Rayfall builtin entry points ===== */ + +ray_t* ray_col_link_fn (ray_t* target_sym, ray_t* int_vec); /* (.col.link 'target v) */ +ray_t* ray_col_unlink_fn (ray_t* v); /* (.col.unlink v) */ +ray_t* ray_col_link_p_fn (ray_t* v); /* (.col.link? v) */ +ray_t* ray_col_target_fn (ray_t* v); /* (.col.target v) */ + +#endif /* RAY_LINKOP_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/ops.h b/crates/rayforce-sys/vendor/rayforce/src/ops/ops.h new file mode 100644 index 0000000..033a9aa --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/ops.h @@ -0,0 +1,726 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_OPS_H +#define RAY_OPS_H + +#include +#include "store/hnsw.h" /* ray_hnsw_metric_t, ray_hnsw_t */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* ===== Internal Type Constants ===== */ + +#define RAY_SEL 14 /* selection bitmap (lazy filter) */ + +/* Lazy DAG handle (atom-only; stored inline in nullmap region) */ +#define RAY_LAZY 104 + +/* ===== Forward Declarations (internal types) ===== */ + +typedef struct ray_pool ray_pool_t; +typedef struct ray_csr ray_csr_t; +typedef struct ray_rel ray_rel_t; +typedef struct ray_hnsw ray_hnsw_t; + +/* ===== Lazy DAG Handle Accessors ===== */ + +typedef struct ray_graph ray_graph_t; +typedef struct ray_op ray_op_t; + +static inline bool ray_is_lazy(ray_t* x) { + return x && !RAY_IS_ERR(x) && x->type == RAY_LAZY; +} + +ray_t* ray_lazy_materialize(ray_t* val); + +/* ===== Cancel API ===== */ + +void ray_cancel(void); + +/* ===== Parted Types ===== */ + +#define RAY_PARTED_BASE 32 +#define RAY_MAPCOMMON 64 /* virtual partition column */ + +/* MAPCOMMON inferred sub-types (stored in attrs field) */ +#define RAY_MC_SYM 0 /* opaque partition key strings */ +#define RAY_MC_DATE 1 /* YYYY.MM.DD partition directories */ +#define RAY_MC_I64 2 /* pure integer partition keys */ + +#define RAY_IS_PARTED(t) ((t) >= RAY_PARTED_BASE && (t) < RAY_MAPCOMMON) +#define RAY_PARTED_BASETYPE(t) ((t) - RAY_PARTED_BASE) + +/* ===== Morsel Constants ===== */ + +#define RAY_MORSEL_ELEMS 1024 + +/* ===== Slab Cache Constants ===== */ + +#define RAY_SLAB_CACHE_SIZE 64 +#define RAY_SLAB_ORDERS 5 + +/* ===== Heap Allocator Constants ===== */ + +#define RAY_ORDER_MIN 6 +#define RAY_ORDER_MAX 30 + +/* ===== Parallel Threshold ===== */ + +#define RAY_PARALLEL_THRESHOLD (64 * RAY_MORSEL_ELEMS) +#define RAY_DISPATCH_MORSELS 8 + +/* Radix-partitioned hash join tuning. + * L2_TARGET: per-partition HT working set limit (tuned for L1d/L2). */ +#define RAY_JOIN_L2_TARGET (256 * 1024) /* target partition HT size in bytes */ +#define RAY_JOIN_MIN_RADIX 2 /* min radix bits (4 partitions) */ +#define RAY_JOIN_MAX_RADIX 14 /* max radix bits (16K partitions) */ + +/* ===== Operation Graph ===== */ + +/* Opcodes — Sources */ +#define OP_SCAN 1 +#define OP_CONST 2 +#define OP_TIL 3 /* generate 0..n-1 sequence (lazy source) */ + +/* Opcodes — Unary element-wise (fuseable) */ +#define OP_NEG 10 +#define OP_ABS 11 +#define OP_NOT 12 +#define OP_SQRT 13 +#define OP_LOG 14 +#define OP_EXP 15 +#define OP_CEIL 16 +#define OP_FLOOR 17 +#define OP_ISNULL 18 +#define OP_CAST 19 +#define OP_ROUND 9 /* unary element-wise round */ + +/* Opcodes — Binary element-wise (fuseable) */ +#define OP_ADD 20 +#define OP_SUB 21 +#define OP_MUL 22 +#define OP_DIV 23 +#define OP_MOD 24 +#define OP_EQ 25 +#define OP_NE 26 +#define OP_LT 27 +#define OP_LE 28 +#define OP_GT 29 +#define OP_GE 30 +#define OP_AND 31 +#define OP_OR 32 +#define OP_MIN2 33 +#define OP_MAX2 34 +#define OP_IF 35 +#define OP_LIKE 36 +#define OP_UPPER 37 +#define OP_LOWER 38 +#define OP_STRLEN 39 +#define OP_SUBSTR 40 +#define OP_REPLACE 41 +#define OP_TRIM 42 +#define OP_CONCAT 43 +#define OP_EXTRACT 45 +#define OP_DATE_TRUNC 46 +#define OP_IN 47 /* binary: col in set_vec -> BOOL */ +#define OP_NOT_IN 48 /* binary: col not in set_vec -> BOOL */ + +/* EXTRACT / DATE_TRUNC field identifiers */ +#define RAY_EXTRACT_YEAR 0 +#define RAY_EXTRACT_MONTH 1 +#define RAY_EXTRACT_DAY 2 +#define RAY_EXTRACT_HOUR 3 +#define RAY_EXTRACT_MINUTE 4 +#define RAY_EXTRACT_SECOND 5 +#define RAY_EXTRACT_DOW 6 +#define RAY_EXTRACT_DOY 7 +#define RAY_EXTRACT_EPOCH 8 + +/* Opcodes — Reductions (pipeline breakers) */ +#define OP_SUM 50 +#define OP_PROD 51 +#define OP_MIN 52 +#define OP_MAX 53 +#define OP_COUNT 54 +#define OP_AVG 55 +#define OP_FIRST 56 +#define OP_LAST 57 +#define OP_COUNT_DISTINCT 58 +#define OP_STDDEV 59 + +/* Opcodes — Structural (pipeline breakers) */ +#define OP_FILTER 60 +#define OP_SORT 61 +#define OP_GROUP 62 +#define OP_JOIN 63 +#define OP_WINDOW_JOIN 64 +#define OP_SELECT 66 +#define OP_HEAD 67 +#define OP_TAIL 68 + +/* Opcodes — Window */ +#define OP_WINDOW 72 + +/* Opcodes — Statistical aggregates */ +#define OP_STDDEV_POP 73 +#define OP_VAR 74 +#define OP_VAR_POP 75 +#define OP_ILIKE 76 +#define OP_PIVOT 77 /* single-pass pivot table */ +#define OP_ANTIJOIN 78 /* anti-semi-join (left rows with no right match) */ + +/* Opcodes — Graph */ +#define OP_EXPAND 80 /* 1-hop CSR neighbor expansion */ +#define OP_VAR_EXPAND 81 /* variable-length BFS/DFS */ +#define OP_SHORTEST_PATH 82 /* BFS shortest path */ +#define OP_WCO_JOIN 83 /* worst-case optimal join (LFTJ) */ +#define OP_PAGERANK 84 /* iterative PageRank */ +#define OP_CONNECTED_COMP 85 /* connected components (label prop) */ +#define OP_DIJKSTRA 86 /* weighted shortest path (Dijkstra) */ +#define OP_LOUVAIN 87 /* community detection (Louvain) */ + +/* Opcodes — Graph algorithms (batch 1) */ +#define OP_DEGREE_CENT 92 /* degree centrality */ +#define OP_TOPSORT 93 /* topological sort (Kahn's) */ +#define OP_DFS 94 /* depth-first search traversal */ + +/* Opcodes — Graph algorithms (batch 2) */ +#define OP_ASTAR 95 /* A* shortest path (coordinate heuristic) */ +#define OP_K_SHORTEST 96 /* Yen's k-shortest paths */ +#define OP_CLUSTER_COEFF 97 /* clustering coefficients */ +#define OP_RANDOM_WALK 98 /* random walk traversal */ +#define OP_BETWEENNESS 99 /* betweenness centrality (Brandes) */ +#define OP_CLOSENESS 100 /* closeness centrality */ +#define OP_MST 101 /* minimum spanning forest (Kruskal) */ + +/* Opcodes — Vector similarity */ +#define OP_COSINE_SIM 88 /* cosine similarity between embeddings */ +#define OP_EUCLIDEAN_DIST 89 /* euclidean distance between embeddings */ +#define OP_KNN 90 /* brute-force K nearest neighbors */ +#define OP_HNSW_KNN 91 /* HNSW approximate K nearest neighbors */ +#define OP_ANN_RERANK 102 /* index-backed ANN over filtered source */ +#define OP_KNN_RERANK 103 /* brute-force KNN over filtered source */ + +/* Opcodes — Misc */ +#define OP_ALIAS 70 +#define OP_MATERIALIZE 71 + +/* Window function kinds (stored in func_kinds[]) */ +#define RAY_WIN_ROW_NUMBER 0 +#define RAY_WIN_RANK 1 +#define RAY_WIN_DENSE_RANK 2 +#define RAY_WIN_NTILE 3 +#define RAY_WIN_SUM 4 +#define RAY_WIN_AVG 5 +#define RAY_WIN_MIN 6 +#define RAY_WIN_MAX 7 +#define RAY_WIN_COUNT 8 +#define RAY_WIN_LAG 9 +#define RAY_WIN_LEAD 10 +#define RAY_WIN_FIRST_VALUE 11 +#define RAY_WIN_LAST_VALUE 12 +#define RAY_WIN_NTH_VALUE 13 + +/* Frame types */ +#define RAY_FRAME_ROWS 0 +#define RAY_FRAME_RANGE 1 + +/* Frame bounds */ +#define RAY_BOUND_UNBOUNDED_PRECEDING 0 +#define RAY_BOUND_N_PRECEDING 1 +#define RAY_BOUND_CURRENT_ROW 2 +#define RAY_BOUND_N_FOLLOWING 3 +#define RAY_BOUND_UNBOUNDED_FOLLOWING 4 + +/* Op flags */ +#define OP_FLAG_FUSED 0x01 +#define OP_FLAG_DEAD 0x02 + +/* Operation node (32 bytes, fits one cache line) */ +typedef struct ray_op { + uint16_t opcode; /* OP_ADD, OP_SCAN, OP_FILTER, etc. */ + uint8_t arity; /* 0, 1, or 2 */ + uint8_t flags; /* FUSED, DEAD */ + int8_t out_type; /* inferred output type */ + uint8_t pad[3]; + uint32_t id; /* unique node ID */ + uint32_t est_rows; /* estimated row count */ + struct ray_op* inputs[2]; /* NULL if unused */ +} ray_op_t; + +/* Extended operation node for N-ary ops (heap-allocated, variable size) */ +typedef struct ray_op_ext { + ray_op_t base; /* 32 bytes standard node */ + union { + ray_t* literal; /* OP_CONST: inline literal value */ + int64_t sym; /* OP_SCAN: column name symbol ID */ + struct { /* OP_GROUP: group-by specification */ + ray_op_t** keys; + uint8_t n_keys; + uint8_t n_aggs; + uint16_t* agg_ops; + ray_op_t** agg_ins; + }; + struct { /* OP_SORT: multi-column sort */ + ray_op_t** columns; + uint8_t* desc; + uint8_t* nulls_first; /* 1=nulls first, 0=nulls last */ + uint8_t n_cols; + } sort; + struct { /* OP_JOIN: join specification */ + ray_op_t** left_keys; + ray_op_t** right_keys; + uint8_t n_join_keys; + uint8_t join_type; /* 0=inner, 1=left, 2=full, 3=anti */ + } join; + struct { /* OP_WINDOW_JOIN: ASOF join */ + ray_op_t* time_key; /* time/ordered key column */ + ray_op_t** eq_keys; /* equality partition keys */ + uint8_t n_eq_keys; /* number of equality keys */ + uint8_t join_type; /* 0=inner, 1=left outer */ + } asof; + struct { /* OP_WINDOW: window functions */ + ray_op_t** part_keys; + ray_op_t** order_keys; + uint8_t* order_descs; + ray_op_t** func_inputs; + uint8_t* func_kinds; /* RAY_WIN_ROW_NUMBER etc. */ + int64_t* func_params; /* NTILE(n), LAG offset, etc. */ + uint8_t n_part_keys; + uint8_t n_order_keys; + uint8_t n_funcs; + uint8_t frame_type; /* RAY_FRAME_ROWS / RAY_FRAME_RANGE */ + uint8_t frame_start; /* RAY_BOUND_* */ + uint8_t frame_end; /* RAY_BOUND_* */ + int64_t frame_start_n; + int64_t frame_end_n; + } window; + struct { /* OP_EXPAND / OP_VAR_EXPAND / OP_SHORTEST_PATH / graph algos */ + void* rel; /* ray_rel_t* (opaque to public header) */ + void* sip_sel; /* ray_t* RAY_SEL bitmap for SIP source-side skip */ + uint8_t direction; /* 0=fwd, 1=rev, 2=both */ + uint8_t min_depth; + uint8_t max_depth; + uint8_t path_tracking; + uint8_t factorized; /* 1 = emit factorized output (fvec) */ + uint16_t max_iter; /* PageRank/Louvain iterations */ + double damping; /* PageRank damping factor */ + int64_t weight_col_sym; /* Dijkstra/Astar/Yen weight column */ + int64_t coord_col_syms[2]; /* A*: lat/lon property column names */ + void* node_props; /* ray_t* node property table (A*: coords) */ + } graph; + struct { /* OP_WCO_JOIN */ + void** rels; /* ray_rel_t** array */ + uint8_t n_rels; + uint8_t n_vars; + } wco; + struct { /* OP_COSINE_SIM / OP_EUCLIDEAN_DIST / OP_INNER_PRODUCT / OP_KNN */ + float* query_vec; /* query embedding (caller-owned, must outlive graph) */ + int32_t dim; /* embedding dimension */ + int64_t k; /* top-K for KNN */ + int32_t metric; /* ray_hnsw_metric_t — used by OP_KNN only */ + } vector; + struct { /* OP_HNSW_KNN */ + void* hnsw_idx; /* ray_hnsw_t* (opaque, must outlive graph) */ + float* query_vec; + int32_t dim; + int64_t k; + int32_t ef_search; + } hnsw; + struct { /* OP_ANN_RERANK / OP_KNN_RERANK */ + void* hnsw_idx; /* ray_hnsw_t* for ANN; NULL for KNN */ + int64_t col_sym; /* sym id of column for KNN; 0 for ANN */ + float* query_vec; /* caller-owned */ + int32_t dim; + int32_t metric; /* ray_hnsw_metric_t — KNN variant only */ + int64_t k; /* target result count from `take` */ + int32_t ef_search; /* ANN only */ + } rerank; + struct { /* OP_PIVOT */ + ray_op_t** index_cols; /* OP_SCAN nodes for index columns */ + ray_op_t* pivot_col; /* OP_SCAN node for pivot column */ + ray_op_t* value_col; /* OP_SCAN node for value column */ + uint16_t agg_op; /* OP_SUM, OP_AVG, etc. */ + uint8_t n_index; /* number of index columns */ + } pivot; + }; + uint64_t* seg_mask; /* partition pruning bitmap (NULL = all active) */ + int64_t seg_mask_count; /* number of partitions the mask covers */ +} ray_op_ext_t; + +/* Operation graph */ +typedef struct ray_graph { + ray_op_t* nodes; /* array of op nodes (malloc'd) */ + uint32_t node_count; /* number of nodes */ + uint32_t node_cap; /* allocated capacity */ + ray_t* table; /* bound table (provides columns for OP_SCAN) */ + ray_t** tables; /* table registry (indexed by table_id) */ + uint16_t n_tables; /* number of registered tables */ + ray_op_ext_t** ext_nodes; /* tracked extended nodes for cleanup */ + uint32_t ext_count; /* number of extended nodes */ + uint32_t ext_cap; /* capacity of ext_nodes array */ + ray_t* selection; /* RAY_SEL bitmap — lazy filter (NULL = all pass) */ + + /* Compile-time local env for lambda / let inlining in + * compile_expr_dag (src/ops/query.c). Stack of + * {formal_sym_id → node_id}. Pushed on lambda call / let + * entry, popped on exit. Looked up BEFORE ray_scan so + * formals shadow column names naturally. + * + * Stores node IDs (uint32_t), not raw ray_op_t* — the + * g->nodes array is dynamically resized, so any realloc + * between push and lookup would dangle stored pointers. + * Lookup re-resolves &g->nodes[id] on every call. */ + struct { + int64_t sym; + uint32_t node_id; + } cexpr_env[32]; + int cexpr_env_top; +} ray_graph_t; + +/* ===== Morsel Iterator ===== */ + +typedef struct { + ray_t* vec; /* source vector */ + int64_t offset; /* current position (element index) */ + int64_t len; /* total length of vector */ + uint32_t elem_size; /* bytes per element */ + int64_t morsel_len; /* elements in current morsel (<=RAY_MORSEL_ELEMS) */ + void* morsel_ptr; /* pointer to current morsel data */ + uint8_t* null_bits; /* current morsel null bitmap (or NULL) */ +} ray_morsel_t; + +/* ===== Selection Bitmap (RAY_SEL) ===== */ + +/* Segment flags — one per morsel (RAY_MORSEL_ELEMS rows) */ +#define RAY_SEL_NONE 0 /* all bits 0 — skip entire morsel */ +#define RAY_SEL_ALL 1 /* all bits 1 — process without bitmap check */ +#define RAY_SEL_MIX 2 /* mixed bits — must check per-row */ + +/* Words per morsel segment: 1024 rows / 64 bits = 16 uint64_t */ +#define RAY_SEL_WORDS_PER_SEG (RAY_MORSEL_ELEMS / 64) + +/* Inline metadata at ray_data(sel) */ +typedef struct { + int64_t total_pass; /* total passing rows */ + uint32_t n_segs; /* ceil(nrows / RAY_MORSEL_ELEMS) */ + uint32_t _pad; +} ray_sel_meta_t; + +/* + * RAY_SEL block layout (ray_data offset 0): + * + * ray_sel_meta_t meta (16 bytes) + * uint8_t seg_flags[] (n_segs, padded to 8-byte alignment) + * uint16_t seg_popcnt[](n_segs, padded to 8-byte alignment) + * uint64_t bits[] (ceil(nrows/64) words) + */ + +static inline ray_sel_meta_t* ray_sel_meta(ray_t* s) { + return (ray_sel_meta_t*)ray_data(s); +} +static inline uint8_t* ray_sel_flags(ray_t* s) { + return (uint8_t*)ray_data(s) + sizeof(ray_sel_meta_t); +} +static inline uint16_t* ray_sel_popcnt(ray_t* s) { + uint32_t n = ray_sel_meta(s)->n_segs; + return (uint16_t*)(ray_sel_flags(s) + ((n + 7u) & ~7u)); +} +static inline uint64_t* ray_sel_bits(ray_t* s) { + uint32_t n = ray_sel_meta(s)->n_segs; + uint16_t* pc = ray_sel_popcnt(s); + return (uint64_t*)(pc + ((n + 3u) & ~3u)); +} + +/* Bit ops */ +#define RAY_SEL_BIT_TEST(bits, r) ((bits)[(r) >> 6] & (1ULL << ((r) & 63))) +#define RAY_SEL_BIT_SET(bits, r) ((bits)[(r) >> 6] |= (1ULL << ((r) & 63))) +#define RAY_SEL_BIT_CLR(bits, r) ((bits)[(r) >> 6] &= ~(1ULL << ((r) & 63))) + +/* ===== Executor Pipeline ===== */ + +typedef struct ray_pipe { + ray_op_t* op; /* operation node */ + struct ray_pipe* inputs[2]; /* upstream pipes */ + ray_morsel_t state; /* current morsel state */ + ray_t* materialized; /* materialized intermediate (or NULL) */ + int spill_fd; /* file descriptor for spill (-1 if none) */ +} ray_pipe_t; + +/* ===== Selection API ===== */ + +ray_t* ray_sel_new(int64_t nrows); /* all-zero (no rows pass) */ +ray_t* ray_sel_from_pred(ray_t* bool_vec); /* convert RAY_BOOL vec -> RAY_SEL */ +ray_t* ray_sel_and(ray_t* a, ray_t* b); /* AND two selections */ +void ray_sel_recompute(ray_t* sel); /* rebuild seg_flags + popcounts */ + +/* ===== Morsel Iterator API ===== */ + +void ray_morsel_init(ray_morsel_t* m, ray_t* vec); +void ray_morsel_init_range(ray_morsel_t* m, ray_t* vec, int64_t start, int64_t end); +bool ray_morsel_next(ray_morsel_t* m); + +/* ===== Operation Graph API ===== */ + +ray_graph_t* ray_graph_new(ray_t* tbl); +void ray_graph_free(ray_graph_t* g); + +/* Source ops */ +ray_op_t* ray_scan(ray_graph_t* g, const char* col_name); +ray_op_t* ray_const_f64(ray_graph_t* g, double val); +ray_op_t* ray_const_i64(ray_graph_t* g, int64_t val); +ray_op_t* ray_const_bool(ray_graph_t* g, bool val); +ray_op_t* ray_const_str(ray_graph_t* g, const char* s, size_t len); +ray_op_t* ray_const_vec(ray_graph_t* g, ray_t* vec); +ray_op_t* ray_const_atom(ray_graph_t* g, ray_t* atom); +ray_op_t* ray_const_table(ray_graph_t* g, ray_t* table); + +/* Unary element-wise ops */ +ray_op_t* ray_neg(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_abs(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_not(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_sqrt_op(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_log_op(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_exp_op(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_ceil_op(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_floor_op(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_round_op(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_isnull(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_cast(ray_graph_t* g, ray_op_t* a, int8_t target_type); + +/* Generic binary op — opcode-driven dispatch, no switch/case */ +ray_op_t* ray_binop(ray_graph_t* g, uint16_t opcode, ray_op_t* a, ray_op_t* b); + +/* Binary element-wise ops */ +ray_op_t* ray_add(ray_graph_t* g, ray_op_t* a, ray_op_t* b); +ray_op_t* ray_sub(ray_graph_t* g, ray_op_t* a, ray_op_t* b); +ray_op_t* ray_mul(ray_graph_t* g, ray_op_t* a, ray_op_t* b); +ray_op_t* ray_div(ray_graph_t* g, ray_op_t* a, ray_op_t* b); +ray_op_t* ray_mod(ray_graph_t* g, ray_op_t* a, ray_op_t* b); +ray_op_t* ray_eq(ray_graph_t* g, ray_op_t* a, ray_op_t* b); +ray_op_t* ray_ne(ray_graph_t* g, ray_op_t* a, ray_op_t* b); +ray_op_t* ray_lt(ray_graph_t* g, ray_op_t* a, ray_op_t* b); +ray_op_t* ray_le(ray_graph_t* g, ray_op_t* a, ray_op_t* b); +ray_op_t* ray_gt(ray_graph_t* g, ray_op_t* a, ray_op_t* b); +ray_op_t* ray_ge(ray_graph_t* g, ray_op_t* a, ray_op_t* b); +ray_op_t* ray_and(ray_graph_t* g, ray_op_t* a, ray_op_t* b); +ray_op_t* ray_or(ray_graph_t* g, ray_op_t* a, ray_op_t* b); +ray_op_t* ray_min2(ray_graph_t* g, ray_op_t* a, ray_op_t* b); +ray_op_t* ray_max2(ray_graph_t* g, ray_op_t* a, ray_op_t* b); +ray_op_t* ray_in(ray_graph_t* g, ray_op_t* col, ray_op_t* set); +ray_op_t* ray_not_in(ray_graph_t* g, ray_op_t* col, ray_op_t* set); +ray_op_t* ray_if(ray_graph_t* g, ray_op_t* cond, ray_op_t* then_val, ray_op_t* else_val); +ray_op_t* ray_like(ray_graph_t* g, ray_op_t* input, ray_op_t* pattern); +ray_op_t* ray_ilike(ray_graph_t* g, ray_op_t* input, ray_op_t* pattern); +ray_op_t* ray_upper(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_lower(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_strlen(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_substr(ray_graph_t* g, ray_op_t* str, ray_op_t* start, ray_op_t* len); +ray_op_t* ray_replace(ray_graph_t* g, ray_op_t* str, ray_op_t* from, ray_op_t* to); +ray_op_t* ray_trim_op(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_concat(ray_graph_t* g, ray_op_t** args, int n); + +/* Date/time extraction and truncation */ +ray_op_t* ray_extract(ray_graph_t* g, ray_op_t* col, int64_t field); +ray_op_t* ray_date_trunc(ray_graph_t* g, ray_op_t* col, int64_t field); + +/* Source ops */ +ray_op_t* ray_til(ray_graph_t* g, int64_t n); + +/* Reduction ops */ +ray_op_t* ray_sum(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_prod(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_min_op(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_max_op(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_count(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_avg(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_first(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_last(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_count_distinct(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_stddev(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_stddev_pop(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_var(ray_graph_t* g, ray_op_t* a); +ray_op_t* ray_var_pop(ray_graph_t* g, ray_op_t* a); + +/* Structural ops */ +ray_op_t* ray_filter(ray_graph_t* g, ray_op_t* input, ray_op_t* predicate); +ray_op_t* ray_sort_op(ray_graph_t* g, ray_op_t* table_node, + ray_op_t** keys, uint8_t* descs, uint8_t* nulls_first, + uint8_t n_cols); +ray_op_t* ray_group(ray_graph_t* g, ray_op_t** keys, uint8_t n_keys, + uint16_t* agg_ops, ray_op_t** agg_ins, uint8_t n_aggs); +ray_op_t* ray_distinct(ray_graph_t* g, ray_op_t** keys, uint8_t n_keys); +ray_op_t* ray_pivot_op(ray_graph_t* g, + ray_op_t** index_cols, uint8_t n_index, + ray_op_t* pivot_col, + ray_op_t* value_col, + uint16_t agg_op); +ray_op_t* ray_join(ray_graph_t* g, + ray_op_t* left_table, ray_op_t** left_keys, + ray_op_t* right_table, ray_op_t** right_keys, + uint8_t n_keys, uint8_t join_type); +ray_op_t* ray_antijoin(ray_graph_t* g, + ray_op_t* left_table, ray_op_t** left_keys, + ray_op_t* right_table, ray_op_t** right_keys, + uint8_t n_keys); +ray_op_t* ray_asof_join(ray_graph_t* g, + ray_op_t* left_table, ray_op_t* right_table, + ray_op_t* time_key, + ray_op_t** eq_keys, uint8_t n_eq_keys, + uint8_t join_type); +ray_op_t* ray_window_op(ray_graph_t* g, ray_op_t* table_node, + ray_op_t** part_keys, uint8_t n_part, + ray_op_t** order_keys, uint8_t* order_descs, uint8_t n_order, + uint8_t* func_kinds, ray_op_t** func_inputs, + int64_t* func_params, uint8_t n_funcs, + uint8_t frame_type, uint8_t frame_start, uint8_t frame_end, + int64_t frame_start_n, int64_t frame_end_n); +ray_op_t* ray_select(ray_graph_t* g, ray_op_t* input, + ray_op_t** cols, uint8_t n_cols); +ray_op_t* ray_head(ray_graph_t* g, ray_op_t* input, int64_t n); +ray_op_t* ray_tail(ray_graph_t* g, ray_op_t* input, int64_t n); +ray_op_t* ray_alias(ray_graph_t* g, ray_op_t* input, const char* name); +ray_op_t* ray_materialize(ray_graph_t* g, ray_op_t* input); + +/* ===== Graph Ops ===== */ + +/* Multi-table support */ +uint16_t ray_graph_add_table(ray_graph_t* g, ray_t* table); +ray_op_t* ray_scan_table(ray_graph_t* g, uint16_t table_id, const char* col_name); + +/* Graph traversal */ +ray_op_t* ray_expand(ray_graph_t* g, ray_op_t* src_nodes, + ray_rel_t* rel, uint8_t direction); +ray_op_t* ray_var_expand(ray_graph_t* g, ray_op_t* start_nodes, + ray_rel_t* rel, uint8_t direction, + uint8_t min_depth, uint8_t max_depth, + bool track_path); +ray_op_t* ray_shortest_path(ray_graph_t* g, ray_op_t* src, ray_op_t* dst, + ray_rel_t* rel, uint8_t max_depth); +ray_op_t* ray_wco_join(ray_graph_t* g, + ray_rel_t** rels, uint8_t n_rels, + uint8_t n_vars); + +/* Graph algorithms */ +ray_op_t* ray_pagerank(ray_graph_t* g, ray_rel_t* rel, + uint16_t max_iter, double damping); +ray_op_t* ray_connected_comp(ray_graph_t* g, ray_rel_t* rel); +ray_op_t* ray_dijkstra(ray_graph_t* g, ray_op_t* src, ray_op_t* dst, + ray_rel_t* rel, const char* weight_col, + uint8_t max_depth); +ray_op_t* ray_louvain(ray_graph_t* g, ray_rel_t* rel, + uint16_t max_iter); +ray_op_t* ray_degree_cent(ray_graph_t* g, ray_rel_t* rel); +ray_op_t* ray_topsort(ray_graph_t* g, ray_rel_t* rel); +ray_op_t* ray_dfs(ray_graph_t* g, ray_op_t* src, ray_rel_t* rel, uint8_t max_depth); +ray_op_t* ray_astar(ray_graph_t* g, ray_op_t* src, ray_op_t* dst, + ray_rel_t* rel, const char* weight_col, + const char* lat_col, const char* lon_col, + ray_t* node_props, uint8_t max_depth); +ray_op_t* ray_k_shortest(ray_graph_t* g, ray_op_t* src, ray_op_t* dst, + ray_rel_t* rel, const char* weight_col, uint16_t k); +ray_op_t* ray_cluster_coeff(ray_graph_t* g, ray_rel_t* rel); +ray_op_t* ray_random_walk(ray_graph_t* g, ray_op_t* src, ray_rel_t* rel, + uint16_t walk_length); +ray_op_t* ray_betweenness(ray_graph_t* g, ray_rel_t* rel, uint16_t sample_size); +ray_op_t* ray_closeness(ray_graph_t* g, ray_rel_t* rel, uint16_t sample_size); +ray_op_t* ray_mst(ray_graph_t* g, ray_rel_t* rel, const char* weight_col); + +/* Vector similarity ops */ +ray_op_t* ray_cosine_sim(ray_graph_t* g, ray_op_t* emb_col, + const float* query_vec, int32_t dim); +ray_op_t* ray_euclidean_dist(ray_graph_t* g, ray_op_t* emb_col, + const float* query_vec, int32_t dim); +ray_op_t* ray_knn(ray_graph_t* g, ray_op_t* emb_col, + const float* query_vec, int32_t dim, int64_t k, + ray_hnsw_metric_t metric); + +/* HNSW-accelerated KNN (uses pre-built index instead of brute-force) */ +ray_op_t* ray_hnsw_knn(ray_graph_t* g, ray_hnsw_t* idx, + const float* query_vec, int32_t dim, + int64_t k, int32_t ef_search); + +/* Rerank ops: consume a filtered source table and return top-K nearest rows + * (source columns + _dist appended). Used by `select ... nearest ... take`. */ +ray_op_t* ray_ann_rerank(ray_graph_t* g, ray_op_t* src, + ray_hnsw_t* idx, const float* query_vec, + int32_t dim, int64_t k, int32_t ef_search); +ray_op_t* ray_knn_rerank(ray_graph_t* g, ray_op_t* src, + int64_t col_sym, const float* query_vec, + int32_t dim, int64_t k, ray_hnsw_metric_t metric); + +/* CSR / Relationship API */ +ray_rel_t* ray_rel_build(ray_t* from_table, const char* fk_col, + int64_t n_target_nodes, bool sort_targets); +ray_rel_t* ray_rel_from_edges(ray_t* edge_table, + const char* src_col, const char* dst_col, + int64_t n_src_nodes, int64_t n_dst_nodes, + bool sort_targets); +ray_err_t ray_rel_save(ray_rel_t* rel, const char* dir); +ray_rel_t* ray_rel_load(const char* dir); +ray_rel_t* ray_rel_mmap(const char* dir); +void ray_rel_set_props(ray_rel_t* rel, ray_t* props); +void ray_rel_free(ray_rel_t* rel); +const int64_t* ray_rel_neighbors(ray_rel_t* rel, int64_t node, + uint8_t direction, int64_t* out_count); +int64_t ray_rel_n_nodes(ray_rel_t* rel, uint8_t direction); + +/* ===== Optimizer API ===== */ + +ray_op_t* ray_optimize(ray_graph_t* g, ray_op_t* root); +void ray_fuse_pass(ray_graph_t* g, ray_op_t* root); + +/* ===== Plan Printer ===== */ + +const char* ray_opcode_name(uint16_t op); +void ray_graph_dump(ray_graph_t* g, ray_op_t* root, void* out); + +/* ===== Sort API ===== */ + +/* Sort columns and return index array (I64 vector of sorted indices). + * Uses parallel radix sort for numerics, merge sort for strings/symbols. + * descs/nulls_first may be NULL (all-asc / nulls-last default). */ +ray_t* ray_sort_indices(ray_t** cols, uint8_t* descs, uint8_t* nulls_first, + uint8_t n_cols, int64_t nrows); + +/* ===== Executor API ===== */ + +ray_t* ray_execute(ray_graph_t* g, ray_op_t* root); + +/* ===== Lazy DAG Handle (Internal) ===== */ + +#define RAY_LAZY_GRAPH(p) (*(ray_graph_t**)((p)->nullmap)) +#define RAY_LAZY_OP(p) (*(ray_op_t**)(((p)->nullmap) + 8)) + +ray_op_t* ray_graph_input_vec(ray_graph_t* g, ray_t* vec); +ray_t* ray_lazy_wrap(ray_graph_t* g, ray_op_t* op); +ray_t* ray_lazy_append(ray_t* lazy, uint16_t opcode); + +#ifdef __cplusplus +} +#endif + +#endif /* RAY_OPS_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/opt.c b/crates/rayforce-sys/vendor/rayforce/src/ops/opt.c new file mode 100644 index 0000000..65c5b38 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/opt.c @@ -0,0 +1,2031 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if !defined(RAY_OS_WINDOWS) && !defined(_GNU_SOURCE) +#define _GNU_SOURCE +#endif + +#include "opt.h" +#include "core/profile.h" +#include "mem/sys.h" +#include "mem/heap.h" +#include +#include + +/* Forward declaration — defined below, used by type inference and DCE passes. */ +static ray_op_ext_t* find_ext(ray_graph_t* g, uint32_t node_id); + +/* -------------------------------------------------------------------------- + * Optimizer passes (v1): Type Inference + Constant Folding + Fusion + DCE + * + * Per the spec's staged rollout: + * v1: Type Inference + Constant Folding + Fusion + DCE + * v2: Predicate/Projection Pushdown + CSE (future) + * v3: Op Reordering + Join Optimization (future) + * -------------------------------------------------------------------------- */ + +/* -------------------------------------------------------------------------- + * Pass 1: Type inference (bottom-up) + * + * Most type inference is done during graph construction (graph.c). + * This pass validates and propagates any missing types. + * -------------------------------------------------------------------------- */ + +static int8_t promote_type(int8_t a, int8_t b) { + if (a == RAY_STR || b == RAY_STR) return RAY_STR; + if (a == RAY_F64 || b == RAY_F64) return RAY_F64; + /* Treat SYM/TIMESTAMP/DATE/TIME as integer-class types */ + if (a == RAY_I64 || b == RAY_I64 || a == RAY_SYM || b == RAY_SYM || + a == RAY_TIMESTAMP || b == RAY_TIMESTAMP) return RAY_I64; + if (a == RAY_I32 || b == RAY_I32 || + a == RAY_DATE || b == RAY_DATE || a == RAY_TIME || b == RAY_TIME) return RAY_I32; + if (a == RAY_I16 || b == RAY_I16) return RAY_I16; + if (a == RAY_U8 || b == RAY_U8) return RAY_U8; + return RAY_BOOL; +} + +static void infer_type_for_node(ray_op_t* node) { + if (node->out_type == 0 && node->opcode != OP_SCAN && node->opcode != OP_CONST) { + /* Comparison and boolean ops always produce BOOL */ + if (node->opcode >= OP_EQ && node->opcode <= OP_GE) { + node->out_type = RAY_BOOL; + return; + } + if (node->opcode == OP_AND || node->opcode == OP_OR) { + node->out_type = RAY_BOOL; + return; + } + if (node->arity >= 2 && node->inputs[0] && node->inputs[1]) { + node->out_type = promote_type(node->inputs[0]->out_type, + node->inputs[1]->out_type); + } else if (node->arity >= 1 && node->inputs[0]) { + node->out_type = node->inputs[0]->out_type; + } + } +} + +static void pass_type_inference(ray_graph_t* g, ray_op_t* root) { + if (!root || root->flags & OP_FLAG_DEAD) return; + + /* Iterative post-order: collect nodes into an order array, then + process in reverse (children before parents). */ + uint32_t nc = g->node_count; + uint32_t stack_cap = nc * 2 + 64; /* extra space for high fan-out nodes */ + uint32_t stack_local[256], order_local[256]; + bool visited_stack[256]; + uint32_t *stack = stack_cap <= 256 ? stack_local : (uint32_t*)ray_sys_alloc(stack_cap * sizeof(uint32_t)); + uint32_t *order = nc <= 256 ? order_local : (uint32_t*)ray_sys_alloc(nc * sizeof(uint32_t)); + bool* visited; + if (nc <= 256) { + visited = visited_stack; + } else { + visited = (bool*)ray_sys_alloc(nc * sizeof(bool)); + } + if (!stack || !order || !visited) { + { if (stack_cap > 256) ray_sys_free(stack); if (nc > 256) { ray_sys_free(order); ray_sys_free(visited); } } + return; + } + memset(visited, 0, nc * sizeof(bool)); + + int sp = 0, oc = 0; + stack[sp++] = root->id; + while (sp > 0 && oc < (int)nc) { + uint32_t nid = stack[--sp]; + ray_op_t* n = &g->nodes[nid]; + if (!n || n->flags & OP_FLAG_DEAD) continue; + if (visited[nid]) continue; + visited[nid] = true; + order[oc++] = nid; + for (int i = 0; i < 2 && i < n->arity; i++) { + if (n->inputs[i] && sp < (int)stack_cap) + stack[sp++] = n->inputs[i]->id; + } + /* M3: Traverse ext node children so type inference reaches all + referenced nodes (GROUP keys/aggs, SORT/PROJECT/SELECT columns, + JOIN keys, WINDOW partition/order/func_inputs). */ + ray_op_ext_t* ext = find_ext(g, nid); + if (ext) { + switch (n->opcode) { + case OP_GROUP: + for (uint8_t k = 0; k < ext->n_keys; k++) + if (ext->keys[k] && !visited[ext->keys[k]->id] && sp < (int)stack_cap) + stack[sp++] = ext->keys[k]->id; + for (uint8_t a = 0; a < ext->n_aggs; a++) + if (ext->agg_ins[a] && !visited[ext->agg_ins[a]->id] && sp < (int)stack_cap) + stack[sp++] = ext->agg_ins[a]->id; + break; + case OP_SORT: + case OP_SELECT: + for (uint8_t k = 0; k < ext->sort.n_cols; k++) + if (ext->sort.columns[k] && !visited[ext->sort.columns[k]->id] && sp < (int)stack_cap) + stack[sp++] = ext->sort.columns[k]->id; + break; + case OP_JOIN: + for (uint8_t k = 0; k < ext->join.n_join_keys; k++) { + if (ext->join.left_keys[k] && !visited[ext->join.left_keys[k]->id] && sp < (int)stack_cap) + stack[sp++] = ext->join.left_keys[k]->id; + if (ext->join.right_keys && ext->join.right_keys[k] && + !visited[ext->join.right_keys[k]->id] && sp < (int)stack_cap) + stack[sp++] = ext->join.right_keys[k]->id; + } + break; + case OP_WINDOW_JOIN: { + ray_op_ext_t* wj_ext = find_ext(g, n->id); + if (wj_ext) { + if (wj_ext->asof.time_key && !visited[wj_ext->asof.time_key->id] && sp < (int)stack_cap) + stack[sp++] = wj_ext->asof.time_key->id; + for (uint8_t k = 0; k < wj_ext->asof.n_eq_keys; k++) { + if (wj_ext->asof.eq_keys[k] && !visited[wj_ext->asof.eq_keys[k]->id] && sp < (int)stack_cap) + stack[sp++] = wj_ext->asof.eq_keys[k]->id; + } + } + break; + } + case OP_WINDOW: + for (uint8_t k = 0; k < ext->window.n_part_keys; k++) + if (ext->window.part_keys[k] && !visited[ext->window.part_keys[k]->id] && sp < (int)stack_cap) + stack[sp++] = ext->window.part_keys[k]->id; + for (uint8_t k = 0; k < ext->window.n_order_keys; k++) + if (ext->window.order_keys[k] && !visited[ext->window.order_keys[k]->id] && sp < (int)stack_cap) + stack[sp++] = ext->window.order_keys[k]->id; + for (uint8_t f = 0; f < ext->window.n_funcs; f++) + if (ext->window.func_inputs[f] && !visited[ext->window.func_inputs[f]->id] && sp < (int)stack_cap) + stack[sp++] = ext->window.func_inputs[f]->id; + break; + /* M3b: 3-input ops store third operand node ID in ext->literal */ + case OP_IF: + case OP_SUBSTR: + case OP_REPLACE: { + uint32_t third_id = (uint32_t)(uintptr_t)ext->literal; + if (third_id < nc && !visited[third_id] && sp < (int)stack_cap) + stack[sp++] = third_id; + break; + } + /* M3c: OP_CONCAT trailing arg node IDs beyond inputs[0..1] */ + case OP_CONCAT: + if (ext->sym >= 2) { + int n_args = (int)ext->sym; + uint32_t* trail = (uint32_t*)((char*)(ext + 1)); + for (int j = 2; j < n_args; j++) { + uint32_t arg_id = trail[j - 2]; + if (arg_id < nc && !visited[arg_id] && sp < (int)stack_cap) + stack[sp++] = arg_id; + } + } + break; + default: + break; + } + } + } + /* Process in reverse order (children before parents) */ + for (int i = oc - 1; i >= 0; i--) + infer_type_for_node(&g->nodes[order[i]]); + + { if (stack_cap > 256) ray_sys_free(stack); if (nc > 256) { ray_sys_free(order); ray_sys_free(visited); } } +} + +/* -------------------------------------------------------------------------- + * Pass 2: Constant folding + * + * If all inputs to an element-wise op are OP_CONST, evaluate immediately + * and replace the node with a new OP_CONST. + * -------------------------------------------------------------------------- */ + +static bool is_const(ray_op_t* n) { + return n && n->opcode == OP_CONST; +} + +/* O(ext_count) per call; acceptable for typical graph sizes (tens to + hundreds of nodes). L2: intentional duplication to keep files + self-contained — also present in fuse.c. */ +static ray_op_ext_t* find_ext(ray_graph_t* g, uint32_t node_id) { + for (uint32_t i = 0; i < g->ext_count; i++) { + if (g->ext_nodes[i] && g->ext_nodes[i]->base.id == node_id) + return g->ext_nodes[i]; + } + return NULL; +} + +static bool track_ext_node(ray_graph_t* g, ray_op_ext_t* ext) { + if (g->ext_count >= g->ext_cap) { + if (g->ext_cap > UINT32_MAX / 2) return false; + uint32_t new_cap = g->ext_cap == 0 ? 16 : g->ext_cap * 2; + ray_op_ext_t** new_exts = + (ray_op_ext_t**)ray_sys_realloc(g->ext_nodes, new_cap * sizeof(ray_op_ext_t*)); + if (!new_exts) return false; + g->ext_nodes = new_exts; + g->ext_cap = new_cap; + } + g->ext_nodes[g->ext_count++] = ext; + return true; +} + +static ray_op_ext_t* ensure_ext_node(ray_graph_t* g, uint32_t node_id) { + ray_op_ext_t* ext = find_ext(g, node_id); + if (ext) return ext; + + ext = (ray_op_ext_t*)ray_sys_alloc(sizeof(ray_op_ext_t)); + if (!ext) return NULL; + /* M1: Zero-init to prevent use of uninitialized fields (literal, + keys, agg_ins, etc.) before the caller populates them. */ + memset(ext, 0, sizeof(*ext)); + ext->base.id = node_id; + if (!track_ext_node(g, ext)) { + ray_sys_free(ext); + return NULL; + } + return ext; +} + +static bool atom_to_numeric(ray_t* v, double* out_f, int64_t* out_i, bool* is_f64) { + if (!v || !ray_is_atom(v)) return false; + if (RAY_ATOM_IS_NULL(v)) return false; + switch (v->type) { + case -RAY_F64: + *out_f = v->f64; + *out_i = (int64_t)v->f64; + *is_f64 = true; + return true; + case -RAY_I64: + case -RAY_SYM: + case -RAY_DATE: + case -RAY_TIME: + case -RAY_TIMESTAMP: + *out_i = v->i64; + *out_f = (double)v->i64; + *is_f64 = false; + return true; + case -RAY_I32: + *out_i = (int64_t)v->i32; + *out_f = (double)v->i32; + *is_f64 = false; + return true; + case -RAY_I16: + *out_i = (int64_t)v->i16; + *out_f = (double)v->i16; + *is_f64 = false; + return true; + case -RAY_U8: + case -RAY_BOOL: + *out_i = (int64_t)v->u8; + *out_f = (double)v->u8; + *is_f64 = false; + return true; + default: + return false; + } +} + +static bool replace_with_const(ray_graph_t* g, ray_op_t* node, ray_t* literal) { + /* H3: If the node already has an ext node (GROUP, SORT, JOIN, etc.), + skip constant replacement — overwriting the ext union would clobber + structural data. Structural ops should never be constant-folded. */ + if (find_ext(g, node->id)) return false; + + ray_op_ext_t* ext = ensure_ext_node(g, node->id); + if (!ext) return false; + + ext->base = *node; + ext->base.opcode = OP_CONST; + ext->base.arity = 0; + ext->base.inputs[0] = NULL; + ext->base.inputs[1] = NULL; + ext->base.flags &= (uint8_t)~OP_FLAG_FUSED; + ext->base.out_type = literal->type < 0 ? (int8_t)(-(int)literal->type) : literal->type; + ext->literal = literal; + + *node = ext->base; + g->nodes[node->id] = ext->base; + return true; +} + +static bool fold_unary_const(ray_graph_t* g, ray_op_t* node) { + ray_op_t* operand = node->inputs[0]; + if (!is_const(operand)) return false; + + ray_op_ext_t* oe = find_ext(g, operand->id); + if (!oe || !oe->literal || !ray_is_atom(oe->literal)) return false; + + double vf = 0.0; + int64_t vi = 0; + bool is_f64 = false; + if (!atom_to_numeric(oe->literal, &vf, &vi, &is_f64)) return false; + + ray_t* folded = NULL; + switch (node->opcode) { + case OP_NEG: + if (is_f64) folded = ray_f64(-vf); + else if (vi == INT64_MIN) return false; /* -INT64_MIN overflows */ + else folded = ray_i64(-vi); + break; + case OP_ABS: + if (is_f64) + folded = ray_f64(fabs(vf)); + else if (vi == INT64_MIN) return false; /* -INT64_MIN overflows */ + else folded = ray_i64(vi < 0 ? -vi : vi); + break; + case OP_NOT: + folded = ray_bool(is_f64 ? vf == 0.0 : vi == 0); + break; + case OP_SQRT: + folded = ray_f64(sqrt(is_f64 ? vf : (double)vi)); + break; + case OP_LOG: + folded = ray_f64(log(is_f64 ? vf : (double)vi)); + break; + case OP_EXP: + folded = ray_f64(exp(is_f64 ? vf : (double)vi)); + break; + case OP_CEIL: + folded = is_f64 ? ray_f64(ceil(vf)) : ray_i64(vi); + break; + case OP_FLOOR: + folded = is_f64 ? ray_f64(floor(vf)) : ray_i64(vi); + break; + default: + return false; + } + + if (!folded || RAY_IS_ERR(folded)) return false; + if (!replace_with_const(g, node, folded)) { + ray_release(folded); + return false; + } + return true; +} + +static bool fold_binary_const(ray_graph_t* g, ray_op_t* node) { + ray_op_t* lhs = node->inputs[0]; + ray_op_t* rhs = node->inputs[1]; + if (!is_const(lhs) || !is_const(rhs)) return false; + + ray_op_ext_t* le = find_ext(g, lhs->id); + ray_op_ext_t* re = find_ext(g, rhs->id); + if (!le || !re || !le->literal || !re->literal) return false; + if (!ray_is_atom(le->literal) || !ray_is_atom(re->literal)) return false; + + double lf = 0.0, rf = 0.0; + int64_t li = 0, ri = 0; + bool l_is_f64 = false, r_is_f64 = false; + if (!atom_to_numeric(le->literal, &lf, &li, &l_is_f64)) return false; + if (!atom_to_numeric(re->literal, &rf, &ri, &r_is_f64)) return false; + + ray_t* folded = NULL; + switch (node->out_type) { + case RAY_F64: { + double lv = l_is_f64 ? lf : (double)li; + double rv = r_is_f64 ? rf : (double)ri; + double r = 0.0; + switch (node->opcode) { + case OP_ADD: r = lv + rv; break; + case OP_SUB: r = lv - rv; break; + case OP_MUL: r = lv * rv; break; + case OP_DIV: r = lv / rv; break; /* IEEE 754: ±Inf or NaN */ + case OP_MOD: r = fmod(lv, rv); break; /* IEEE 754: NaN for rv==0 */ + case OP_MIN2: r = fmin(lv, rv); break; /* NaN-propagating */ + case OP_MAX2: r = fmax(lv, rv); break; /* NaN-propagating */ + default: return false; + } + folded = ray_f64(r); + break; + } + case RAY_I64: { + int64_t lv = l_is_f64 ? (int64_t)lf : li; + int64_t rv = r_is_f64 ? (int64_t)rf : ri; + int64_t r = 0; + switch (node->opcode) { + case OP_ADD: r = (int64_t)((uint64_t)lv + (uint64_t)rv); break; + case OP_SUB: r = (int64_t)((uint64_t)lv - (uint64_t)rv); break; + case OP_MUL: r = (int64_t)((uint64_t)lv * (uint64_t)rv); break; + case OP_DIV: + r = (rv != 0 && !(lv == INT64_MIN && rv == -1)) ? lv / rv : 0; + break; + case OP_MOD: + r = (rv != 0 && !(lv == INT64_MIN && rv == -1)) ? lv % rv : 0; + break; + case OP_MIN2: r = lv < rv ? lv : rv; break; + case OP_MAX2: r = lv > rv ? lv : rv; break; + default: return false; + } + folded = ray_i64(r); + break; + } + case RAY_BOOL: { + /* NaN comparison follows IEEE 754; SQL NULL handled separately + in executor. */ + double lv = l_is_f64 ? lf : (double)li; + double rv = r_is_f64 ? rf : (double)ri; + bool r = false; + switch (node->opcode) { + case OP_EQ: r = lv == rv; break; + case OP_NE: r = lv != rv; break; + case OP_LT: r = lv < rv; break; + case OP_LE: r = lv <= rv; break; + case OP_GT: r = lv > rv; break; + case OP_GE: r = lv >= rv; break; + case OP_AND: r = (lv != 0.0) && (rv != 0.0); break; + case OP_OR: r = (lv != 0.0) || (rv != 0.0); break; + default: return false; + } + folded = ray_bool(r); + break; + } + case RAY_I32: case RAY_DATE: case RAY_TIME: { + int32_t lv = (int32_t)(l_is_f64 ? (int64_t)lf : li); + int32_t rv = (int32_t)(r_is_f64 ? (int64_t)rf : ri); + int32_t r = 0; + switch (node->opcode) { + case OP_ADD: r = (int32_t)((uint32_t)lv + (uint32_t)rv); break; + case OP_SUB: r = (int32_t)((uint32_t)lv - (uint32_t)rv); break; + case OP_MUL: r = (int32_t)((uint32_t)lv * (uint32_t)rv); break; + case OP_DIV: + r = (rv != 0 && !(lv == INT32_MIN && rv == -1)) ? lv / rv : 0; + break; + case OP_MOD: + r = (rv != 0 && !(lv == INT32_MIN && rv == -1)) ? lv % rv : 0; + break; + case OP_MIN2: r = lv < rv ? lv : rv; break; + case OP_MAX2: r = lv > rv ? lv : rv; break; + default: return false; + } + folded = ray_i32(r); + break; + } + default: + return false; + } + + if (!folded || RAY_IS_ERR(folded)) return false; + if (!replace_with_const(g, node, folded)) { + ray_release(folded); + return false; + } + return true; +} + +static bool atom_to_bool(ray_t* v, bool* out) { + double vf = 0.0; + int64_t vi = 0; + bool is_f64 = false; + if (!atom_to_numeric(v, &vf, &vi, &is_f64)) return false; + if (is_f64) { + *out = vf != 0.0; + } else { + *out = vi != 0; + } + return true; +} + +static bool fold_filter_const_predicate(ray_graph_t* g, ray_op_t* node) { + if (node->opcode != OP_FILTER || node->arity != 2) return false; + ray_op_t* pred = node->inputs[1]; + if (!is_const(pred)) return false; + + ray_op_ext_t* pred_ext = find_ext(g, pred->id); + if (!pred_ext || !pred_ext->literal || !ray_is_atom(pred_ext->literal)) return false; + + bool keep_rows = false; + if (!atom_to_bool(pred_ext->literal, &keep_rows)) return false; + + if (keep_rows) { + node->opcode = OP_MATERIALIZE; + node->arity = 1; + node->inputs[1] = NULL; + node->flags &= (uint8_t)~OP_FLAG_FUSED; + g->nodes[node->id] = *node; + return true; + } + + ray_op_ext_t* ext = ensure_ext_node(g, node->id); + if (!ext) return false; + ext->base = *node; + ext->base.opcode = OP_HEAD; + ext->base.arity = 1; + ext->base.inputs[1] = NULL; + ext->base.est_rows = 0; + ext->base.flags &= (uint8_t)~OP_FLAG_FUSED; + ext->sym = 0; + + *node = ext->base; + g->nodes[node->id] = ext->base; + return true; +} + +/* Fold reduction(OP_TIL(n)) → closed-form result. + * sum(0..n-1) = n*(n-1)/2, min(0..n-1) = 0, max(0..n-1) = n-1, + * count(0..n-1) = n, avg(0..n-1) = (n-1)/2.0 */ +static bool fold_reduction_til(ray_graph_t* g, ray_op_t* node) { + if (node->arity != 1) return false; + ray_op_t* input = node->inputs[0]; + if (!input || input->opcode != OP_TIL) return false; + ray_op_ext_t* til_ext = find_ext(g, input->id); + if (!til_ext || !til_ext->literal) return false; + int64_t n = til_ext->literal->i64; + if (n <= 0) return false; + + ray_t* folded = NULL; + switch (node->opcode) { + case OP_SUM: folded = ray_i64((n * (n - 1)) / 2); break; + case OP_MIN: folded = ray_i64(0); break; + case OP_MAX: folded = ray_i64(n - 1); break; + case OP_COUNT: folded = ray_i64(n); break; + case OP_AVG: folded = ray_f64((double)(n - 1) / 2.0); break; + case OP_FIRST: folded = ray_i64(0); break; + case OP_LAST: folded = ray_i64(n - 1); break; + default: return false; + } + if (!folded || RAY_IS_ERR(folded)) return false; + if (!replace_with_const(g, node, folded)) { ray_release(folded); return false; } + return true; +} + +static void fold_node(ray_graph_t* g, ray_op_t* node) { + /* Fold unary element-wise ops with constant input */ + if (node->arity == 1 && node->opcode >= OP_NEG && node->opcode <= OP_FLOOR) { + (void)fold_unary_const(g, node); + } + /* Fold binary element-wise ops with two const inputs */ + if (node->arity == 2 && node->opcode >= OP_ADD && node->opcode <= OP_MAX2) { + (void)fold_binary_const(g, node); + } + /* Fold reduction(til(n)) to closed-form */ + if (node->arity == 1 && node->opcode >= OP_SUM && node->opcode <= OP_LAST) { + (void)fold_reduction_til(g, node); + } + /* FILTER with constant predicate can be reduced to pass-through/empty. */ + (void)fold_filter_const_predicate(g, node); +} + +static void pass_constant_fold(ray_graph_t* g, ray_op_t* root) { + if (!root || root->flags & OP_FLAG_DEAD) return; + + /* Iterative post-order: collect nodes, then process in reverse + (children before parents). */ + uint32_t nc = g->node_count; + uint32_t stack_cap = nc * 2 + 64; /* extra space for high fan-out nodes */ + uint32_t stack_local[256], order_local[256]; + bool visited_stack[256]; + uint32_t *stack = stack_cap <= 256 ? stack_local : (uint32_t*)ray_sys_alloc(stack_cap * sizeof(uint32_t)); + uint32_t *order = nc <= 256 ? order_local : (uint32_t*)ray_sys_alloc(nc * sizeof(uint32_t)); + bool* visited; + if (nc <= 256) { + visited = visited_stack; + } else { + visited = (bool*)ray_sys_alloc(nc * sizeof(bool)); + } + if (!stack || !order || !visited) { + { if (stack_cap > 256) ray_sys_free(stack); if (nc > 256) { ray_sys_free(order); ray_sys_free(visited); } } + return; + } + memset(visited, 0, nc * sizeof(bool)); + + int sp = 0, oc = 0; + stack[sp++] = root->id; + while (sp > 0 && oc < (int)nc) { + uint32_t nid = stack[--sp]; + ray_op_t* n = &g->nodes[nid]; + if (!n || n->flags & OP_FLAG_DEAD) continue; + if (visited[nid]) continue; + visited[nid] = true; + order[oc++] = nid; + for (int i = 0; i < 2 && i < n->arity; i++) { + if (n->inputs[i] && sp < (int)stack_cap) + stack[sp++] = n->inputs[i]->id; + } + /* H1: Traverse ext-node children so constant folding reaches all + referenced nodes (GROUP keys/aggs, SORT/PROJECT/SELECT columns, + JOIN keys, WINDOW partition/order/func_inputs). */ + ray_op_ext_t* ext = find_ext(g, nid); + if (ext) { + switch (n->opcode) { + case OP_GROUP: + for (uint8_t k = 0; k < ext->n_keys; k++) + if (ext->keys[k] && !visited[ext->keys[k]->id] && sp < (int)stack_cap) + stack[sp++] = ext->keys[k]->id; + for (uint8_t a = 0; a < ext->n_aggs; a++) + if (ext->agg_ins[a] && !visited[ext->agg_ins[a]->id] && sp < (int)stack_cap) + stack[sp++] = ext->agg_ins[a]->id; + break; + case OP_SORT: + case OP_SELECT: + for (uint8_t k = 0; k < ext->sort.n_cols; k++) + if (ext->sort.columns[k] && !visited[ext->sort.columns[k]->id] && sp < (int)stack_cap) + stack[sp++] = ext->sort.columns[k]->id; + break; + case OP_JOIN: + for (uint8_t k = 0; k < ext->join.n_join_keys; k++) { + if (ext->join.left_keys[k] && !visited[ext->join.left_keys[k]->id] && sp < (int)stack_cap) + stack[sp++] = ext->join.left_keys[k]->id; + if (ext->join.right_keys && ext->join.right_keys[k] && + !visited[ext->join.right_keys[k]->id] && sp < (int)stack_cap) + stack[sp++] = ext->join.right_keys[k]->id; + } + break; + case OP_WINDOW_JOIN: { + ray_op_ext_t* wj_ext = find_ext(g, n->id); + if (wj_ext) { + if (wj_ext->asof.time_key && !visited[wj_ext->asof.time_key->id] && sp < (int)stack_cap) + stack[sp++] = wj_ext->asof.time_key->id; + for (uint8_t k = 0; k < wj_ext->asof.n_eq_keys; k++) { + if (wj_ext->asof.eq_keys[k] && !visited[wj_ext->asof.eq_keys[k]->id] && sp < (int)stack_cap) + stack[sp++] = wj_ext->asof.eq_keys[k]->id; + } + } + break; + } + case OP_WINDOW: + for (uint8_t k = 0; k < ext->window.n_part_keys; k++) + if (ext->window.part_keys[k] && !visited[ext->window.part_keys[k]->id] && sp < (int)stack_cap) + stack[sp++] = ext->window.part_keys[k]->id; + for (uint8_t k = 0; k < ext->window.n_order_keys; k++) + if (ext->window.order_keys[k] && !visited[ext->window.order_keys[k]->id] && sp < (int)stack_cap) + stack[sp++] = ext->window.order_keys[k]->id; + for (uint8_t f = 0; f < ext->window.n_funcs; f++) + if (ext->window.func_inputs[f] && !visited[ext->window.func_inputs[f]->id] && sp < (int)stack_cap) + stack[sp++] = ext->window.func_inputs[f]->id; + break; + /* H1b: 3-input ops store third operand node ID in ext->literal */ + case OP_IF: + case OP_SUBSTR: + case OP_REPLACE: { + uint32_t third_id = (uint32_t)(uintptr_t)ext->literal; + if (third_id < nc && !visited[third_id] && sp < (int)stack_cap) + stack[sp++] = third_id; + break; + } + /* H1c: OP_CONCAT trailing arg node IDs beyond inputs[0..1] */ + case OP_CONCAT: + if (ext->sym >= 2) { + int n_args = (int)ext->sym; + uint32_t* trail = (uint32_t*)((char*)(ext + 1)); + for (int j = 2; j < n_args; j++) { + uint32_t arg_id = trail[j - 2]; + if (arg_id < nc && !visited[arg_id] && sp < (int)stack_cap) + stack[sp++] = arg_id; + } + } + break; + default: + break; + } + } + } + /* Process in reverse order (children before parents) */ + for (int i = oc - 1; i >= 0; i--) + fold_node(g, &g->nodes[order[i]]); + + { if (stack_cap > 256) ray_sys_free(stack); if (nc > 256) { ray_sys_free(order); ray_sys_free(visited); } } +} + +/* -------------------------------------------------------------------------- + * Pass 3: Dead code elimination + * + * Mark nodes unreachable from root as DEAD. + * -------------------------------------------------------------------------- */ + +static void mark_live(ray_graph_t* g, ray_op_t* root, bool* live) { + if (!root) return; + + uint32_t nc = g->node_count; + if (nc > UINT32_MAX / 2) return; + /* Worst case: each node can contribute up to ~N children (CONCAT trailing), + but nc*2 is a safe upper bound for the stack. */ + uint32_t stack_cap = nc * 2; + uint32_t stack_local[256]; + uint32_t *stack = stack_cap <= 256 ? stack_local : (uint32_t*)ray_sys_alloc(stack_cap * sizeof(uint32_t)); + if (!stack) return; + int sp = 0; + stack[sp++] = root->id; + while (sp > 0) { + uint32_t nid = stack[--sp]; + if (live[nid]) continue; + live[nid] = true; + ray_op_t* n = &g->nodes[nid]; + for (int i = 0; i < 2; i++) { + if (n->inputs[i] && sp < (int)stack_cap) + stack[sp++] = n->inputs[i]->id; + } + /* H4: 3-input ops (OP_IF, OP_SUBSTR, OP_REPLACE) store the third + operand node ID as (uintptr_t)ext->literal. */ + if (n->opcode == OP_IF || n->opcode == OP_SUBSTR || n->opcode == OP_REPLACE) { + ray_op_ext_t* ext = find_ext(g, nid); + if (ext) { + uint32_t third_id = (uint32_t)(uintptr_t)ext->literal; + if (third_id < nc && sp < (int)stack_cap) + stack[sp++] = third_id; + } + } + /* H5: OP_CONCAT stores extra arg IDs (beyond inputs[0..1]) as + uint32_t values in trailing bytes after the ext node. + ext->sym holds the total arg count. */ + if (n->opcode == OP_CONCAT) { + ray_op_ext_t* ext = find_ext(g, nid); + /* M4: Guard against ext->sym < 2 — trailing uint32_t values + only exist when there are more than 2 arguments. */ + if (ext && ext->sym >= 2) { + int n_args = (int)ext->sym; + uint32_t* trail = (uint32_t*)((char*)(ext + 1)); + for (int i = 2; i < n_args; i++) { + uint32_t arg_id = trail[i - 2]; + if (arg_id < nc && sp < (int)stack_cap) + stack[sp++] = arg_id; + } + } + } + /* H1: Traverse ext node children for structural ops so DCE does + not incorrectly mark referenced nodes as dead. */ + if (n->opcode == OP_GROUP || n->opcode == OP_SORT || + n->opcode == OP_JOIN || n->opcode == OP_ANTIJOIN || + n->opcode == OP_WINDOW_JOIN || + n->opcode == OP_WINDOW || n->opcode == OP_PIVOT || + n->opcode == OP_SELECT) { + ray_op_ext_t* ext = find_ext(g, nid); + if (ext) { + switch (n->opcode) { + case OP_GROUP: + for (uint8_t k = 0; k < ext->n_keys; k++) { + if (ext->keys[k] && !live[ext->keys[k]->id] && sp < (int)stack_cap) + stack[sp++] = ext->keys[k]->id; + } + for (uint8_t a = 0; a < ext->n_aggs; a++) { + if (ext->agg_ins[a] && !live[ext->agg_ins[a]->id] && sp < (int)stack_cap) + stack[sp++] = ext->agg_ins[a]->id; + } + break; + case OP_SORT: + case OP_SELECT: + for (uint8_t k = 0; k < ext->sort.n_cols; k++) { + if (ext->sort.columns[k] && !live[ext->sort.columns[k]->id] && sp < (int)stack_cap) + stack[sp++] = ext->sort.columns[k]->id; + } + break; + case OP_JOIN: + case OP_ANTIJOIN: + for (uint8_t k = 0; k < ext->join.n_join_keys; k++) { + if (ext->join.left_keys[k] && !live[ext->join.left_keys[k]->id] && sp < (int)stack_cap) + stack[sp++] = ext->join.left_keys[k]->id; + if (ext->join.right_keys && ext->join.right_keys[k] && + !live[ext->join.right_keys[k]->id] && sp < (int)stack_cap) + stack[sp++] = ext->join.right_keys[k]->id; + } + break; + case OP_PIVOT: + for (uint8_t k = 0; k < ext->pivot.n_index; k++) { + if (ext->pivot.index_cols[k] && !live[ext->pivot.index_cols[k]->id] && sp < (int)stack_cap) + stack[sp++] = ext->pivot.index_cols[k]->id; + } + if (ext->pivot.pivot_col && !live[ext->pivot.pivot_col->id] && sp < (int)stack_cap) + stack[sp++] = ext->pivot.pivot_col->id; + if (ext->pivot.value_col && !live[ext->pivot.value_col->id] && sp < (int)stack_cap) + stack[sp++] = ext->pivot.value_col->id; + break; + case OP_WINDOW_JOIN: { + ray_op_ext_t* wj_ext = find_ext(g, n->id); + if (wj_ext) { + if (wj_ext->asof.time_key && !live[wj_ext->asof.time_key->id] && sp < (int)stack_cap) + stack[sp++] = wj_ext->asof.time_key->id; + for (uint8_t k = 0; k < wj_ext->asof.n_eq_keys; k++) { + if (wj_ext->asof.eq_keys[k] && !live[wj_ext->asof.eq_keys[k]->id] && sp < (int)stack_cap) + stack[sp++] = wj_ext->asof.eq_keys[k]->id; + } + } + break; + } + case OP_WINDOW: + for (uint8_t k = 0; k < ext->window.n_part_keys; k++) { + if (ext->window.part_keys[k] && !live[ext->window.part_keys[k]->id] && sp < (int)stack_cap) + stack[sp++] = ext->window.part_keys[k]->id; + } + for (uint8_t k = 0; k < ext->window.n_order_keys; k++) { + if (ext->window.order_keys[k] && !live[ext->window.order_keys[k]->id] && sp < (int)stack_cap) + stack[sp++] = ext->window.order_keys[k]->id; + } + for (uint8_t f = 0; f < ext->window.n_funcs; f++) { + if (ext->window.func_inputs[f] && !live[ext->window.func_inputs[f]->id] && sp < (int)stack_cap) + stack[sp++] = ext->window.func_inputs[f]->id; + } + break; + default: + break; + } + } + } + } + if (stack_cap > 256) ray_sys_free(stack); +} + +static void pass_dce(ray_graph_t* g, ray_op_t* root) { + uint32_t nc = g->node_count; + bool* live; + bool live_stack[256]; + if (nc <= 256) { + live = live_stack; + } else { + live = (bool*)ray_sys_alloc(nc * sizeof(bool)); + if (!live) return; + } + memset(live, 0, nc * sizeof(bool)); + + mark_live(g, root, live); + + for (uint32_t i = 0; i < nc; i++) { + if (!live[i]) { + g->nodes[i].flags |= OP_FLAG_DEAD; + } + } + if (nc > 256) ray_sys_free(live); +} + +/* -------------------------------------------------------------------------- + * Pass: SIP (Sideways Information Passing) + * + * Bottom-up DAG walk. For each OP_EXPAND: + * 1. Find downstream filter on target side + * 2. Reverse-CSR: mark source nodes that have any passing target -> RAY_SEL + * 3. Attach source_sel to upstream scan + * + * Currently a no-op placeholder — activated when graph ops are present. + * -------------------------------------------------------------------------- */ + +/* Find downstream consumer of a node (first node that uses it as input) */ +static ray_op_t* find_consumer(ray_graph_t* g, uint32_t node_id) { + for (uint32_t i = 0; i < g->node_count; i++) { + ray_op_t* n = &g->nodes[i]; + if (n->flags & OP_FLAG_DEAD) continue; + for (int j = 0; j < n->arity && j < 2; j++) { + if (n->inputs[j] && n->inputs[j]->id == node_id) + return n; + } + } + return NULL; +} + +/* Find upstream OP_SCAN that feeds into a node via input chain (iterative) */ +static ray_op_t* find_upstream_scan(ray_graph_t* g, ray_op_t* node) { + uint32_t limit = g ? g->node_count : 1024; + for (uint32_t steps = 0; node && steps < limit; steps++) { + if (node->opcode == OP_SCAN) return node; + if (node->arity > 0 && node->inputs[0]) + node = node->inputs[0]; + else return NULL; + } + return NULL; +} + +static void sip_pass(ray_graph_t* g, ray_op_t* root) { + if (!g || !root) return; + + uint32_t nc = g->node_count; + + /* Collect graph traversal nodes (bottom-up for chained SIP) */ + uint32_t expand_ids[64]; + uint32_t n_expands = 0; + for (uint32_t i = 0; i < nc && n_expands < 64; i++) { + ray_op_t* n = &g->nodes[i]; + if (n->flags & OP_FLAG_DEAD) continue; + if (n->opcode != OP_EXPAND && n->opcode != OP_VAR_EXPAND + && n->opcode != OP_SHORTEST_PATH) continue; + expand_ids[n_expands++] = i; + } + + /* Process bottom-up (deepest expand first — process in reverse ID order + * since deeper nodes in the pipeline tend to have higher IDs) */ + for (int ei = (int)n_expands - 1; ei >= 0; ei--) { + ray_op_t* expand = &g->nodes[expand_ids[ei]]; + ray_op_ext_t* ext = find_ext(g, expand->id); + if (!ext || !ext->graph.rel) continue; + + /* 1. Find downstream consumer — look for OP_FILTER on target side */ + ray_op_t* consumer = find_consumer(g, expand->id); + if (!consumer) continue; + + /* If the consumer is OP_FILTER, we can extract a semijoin. + * The filter's condition restricts which target nodes pass. + * We reverse-propagate through the CSR to mark which source + * nodes could produce any passing target. */ + if (consumer->opcode != OP_FILTER) continue; + + /* 2. Find the input scan to this expand (source side) */ + ray_op_t* src_scan = NULL; + if (expand->arity > 0 && expand->inputs[0]) + src_scan = find_upstream_scan(g, expand->inputs[0]); + + if (!src_scan) continue; + + /* 3. Propagate backward: attach selection hint to the expand node. + * The executor will use this to build a RAY_SEL bitmap at runtime + * by evaluating the filter condition, reverse-CSR propagating, + * and applying the resulting source-side selection. + * + * We store the filter node ID in the expand's ext pad bytes + * so the executor can find the downstream filter for runtime SIP. */ + /* pad[2] = 1 signals the executor to build SIP bitmap at runtime. + * Note: pad is only 3 bytes (pad[0..2]) — do NOT write uint16_t + * at pad+2 as that overflows into the 'id' field at offset 8. */ + ext->base.pad[2] = 1; + } +} + +/* -------------------------------------------------------------------------- + * Pass: Factorized detection + * + * Detect OP_EXPAND → OP_GROUP patterns where factorized execution + * avoids materializing the full cross-product. + * -------------------------------------------------------------------------- */ +static void factorize_pass(ray_graph_t* g, ray_op_t* root) { + if (!g || !root) return; + + uint32_t nc = g->node_count; + for (uint32_t i = 0; i < nc; i++) { + ray_op_t* n = &g->nodes[i]; + if (n->flags & OP_FLAG_DEAD) continue; + if (n->opcode != OP_EXPAND) continue; + + ray_op_ext_t* ext = find_ext(g, n->id); + if (!ext || ext->graph.factorized) continue; /* already set by SIP pass */ + + /* Look for immediate OP_GROUP consumer with _src as group key */ + ray_op_t* consumer = find_consumer(g, n->id); + if (!consumer || consumer->opcode != OP_GROUP) continue; + + ray_op_ext_t* grp_ext = find_ext(g, consumer->id); + if (!grp_ext || grp_ext->n_keys != 1 || !grp_ext->keys[0]) continue; + + ray_op_ext_t* key_ext = find_ext(g, grp_ext->keys[0]->id); + if (!key_ext || key_ext->base.opcode != OP_SCAN) continue; + + int64_t src_sym = ray_sym_intern("_src", 4); + if (key_ext->sym == src_sym) { + ext->graph.factorized = 1; + } + } +} + +/* -------------------------------------------------------------------------- + * Pass: Filter reordering + * + * Reorder chained OP_FILTER nodes so cheapest predicates execute first. + * Also splits AND trees into separate chained filters. + * -------------------------------------------------------------------------- */ + +/* Allocate a new node in the graph (for use during optimization passes). + * Same logic as graph_alloc_node in graph.c but local to opt.c. */ +static ray_op_t* graph_alloc_node_opt(ray_graph_t* g) { + if (g->node_count >= g->node_cap) { + if (g->node_cap > UINT32_MAX / 2) return NULL; + uint32_t new_cap = g->node_cap * 2; + uintptr_t old_base = (uintptr_t)g->nodes; + ray_op_t* new_nodes = (ray_op_t*)ray_sys_realloc(g->nodes, + new_cap * sizeof(ray_op_t)); + if (!new_nodes) return NULL; + g->nodes = new_nodes; + g->node_cap = new_cap; + /* Fix up all input pointers after realloc */ + ptrdiff_t delta = (ptrdiff_t)((uintptr_t)g->nodes - old_base); + if (delta != 0) { + for (uint32_t i = 0; i < g->node_count; i++) { + if (g->nodes[i].inputs[0]) + g->nodes[i].inputs[0] = (ray_op_t*)((char*)g->nodes[i].inputs[0] + delta); + if (g->nodes[i].inputs[1]) + g->nodes[i].inputs[1] = (ray_op_t*)((char*)g->nodes[i].inputs[1] + delta); + } + /* Fix ext node input pointers */ + for (uint32_t i = 0; i < g->ext_count; i++) { + if (g->ext_nodes[i]) { + if (g->ext_nodes[i]->base.inputs[0]) + g->ext_nodes[i]->base.inputs[0] = + (ray_op_t*)((char*)g->ext_nodes[i]->base.inputs[0] + delta); + if (g->ext_nodes[i]->base.inputs[1]) + g->ext_nodes[i]->base.inputs[1] = + (ray_op_t*)((char*)g->ext_nodes[i]->base.inputs[1] + delta); + /* Fix structural op column pointers */ + switch (g->ext_nodes[i]->base.opcode) { + case OP_GROUP: + for (uint8_t k = 0; k < g->ext_nodes[i]->n_keys; k++) + if (g->ext_nodes[i]->keys[k]) + g->ext_nodes[i]->keys[k] = + (ray_op_t*)((char*)g->ext_nodes[i]->keys[k] + delta); + for (uint8_t a = 0; a < g->ext_nodes[i]->n_aggs; a++) + if (g->ext_nodes[i]->agg_ins[a]) + g->ext_nodes[i]->agg_ins[a] = + (ray_op_t*)((char*)g->ext_nodes[i]->agg_ins[a] + delta); + break; + case OP_SORT: + case OP_SELECT: + for (uint8_t k = 0; k < g->ext_nodes[i]->sort.n_cols; k++) + if (g->ext_nodes[i]->sort.columns[k]) + g->ext_nodes[i]->sort.columns[k] = + (ray_op_t*)((char*)g->ext_nodes[i]->sort.columns[k] + delta); + break; + case OP_JOIN: + for (uint8_t k = 0; k < g->ext_nodes[i]->join.n_join_keys; k++) { + if (g->ext_nodes[i]->join.left_keys[k]) + g->ext_nodes[i]->join.left_keys[k] = + (ray_op_t*)((char*)g->ext_nodes[i]->join.left_keys[k] + delta); + if (g->ext_nodes[i]->join.right_keys && + g->ext_nodes[i]->join.right_keys[k]) + g->ext_nodes[i]->join.right_keys[k] = + (ray_op_t*)((char*)g->ext_nodes[i]->join.right_keys[k] + delta); + } + break; + case OP_WINDOW_JOIN: + if (g->ext_nodes[i]->asof.time_key) + g->ext_nodes[i]->asof.time_key = (ray_op_t*)((char*)g->ext_nodes[i]->asof.time_key + delta); + for (uint8_t k = 0; k < g->ext_nodes[i]->asof.n_eq_keys; k++) + if (g->ext_nodes[i]->asof.eq_keys[k]) + g->ext_nodes[i]->asof.eq_keys[k] = (ray_op_t*)((char*)g->ext_nodes[i]->asof.eq_keys[k] + delta); + break; + case OP_ANTIJOIN: + for (uint8_t k = 0; k < g->ext_nodes[i]->join.n_join_keys; k++) { + if (g->ext_nodes[i]->join.left_keys[k]) + g->ext_nodes[i]->join.left_keys[k] = + (ray_op_t*)((char*)g->ext_nodes[i]->join.left_keys[k] + delta); + if (g->ext_nodes[i]->join.right_keys && + g->ext_nodes[i]->join.right_keys[k]) + g->ext_nodes[i]->join.right_keys[k] = + (ray_op_t*)((char*)g->ext_nodes[i]->join.right_keys[k] + delta); + } + break; + case OP_PIVOT: + for (uint8_t k = 0; k < g->ext_nodes[i]->pivot.n_index; k++) + if (g->ext_nodes[i]->pivot.index_cols[k]) + g->ext_nodes[i]->pivot.index_cols[k] = + (ray_op_t*)((char*)g->ext_nodes[i]->pivot.index_cols[k] + delta); + if (g->ext_nodes[i]->pivot.pivot_col) + g->ext_nodes[i]->pivot.pivot_col = + (ray_op_t*)((char*)g->ext_nodes[i]->pivot.pivot_col + delta); + if (g->ext_nodes[i]->pivot.value_col) + g->ext_nodes[i]->pivot.value_col = + (ray_op_t*)((char*)g->ext_nodes[i]->pivot.value_col + delta); + break; + case OP_WINDOW: + for (uint8_t k = 0; k < g->ext_nodes[i]->window.n_part_keys; k++) + if (g->ext_nodes[i]->window.part_keys[k]) + g->ext_nodes[i]->window.part_keys[k] = + (ray_op_t*)((char*)g->ext_nodes[i]->window.part_keys[k] + delta); + for (uint8_t k = 0; k < g->ext_nodes[i]->window.n_order_keys; k++) + if (g->ext_nodes[i]->window.order_keys[k]) + g->ext_nodes[i]->window.order_keys[k] = + (ray_op_t*)((char*)g->ext_nodes[i]->window.order_keys[k] + delta); + for (uint8_t f = 0; f < g->ext_nodes[i]->window.n_funcs; f++) + if (g->ext_nodes[i]->window.func_inputs[f]) + g->ext_nodes[i]->window.func_inputs[f] = + (ray_op_t*)((char*)g->ext_nodes[i]->window.func_inputs[f] + delta); + break; + default: + break; + } + } + } + } + } + ray_op_t* n = &g->nodes[g->node_count]; + memset(n, 0, sizeof(ray_op_t)); + n->id = g->node_count; + g->node_count++; + return n; +} + +/* Count how many live nodes use node_id as an input. + * Returns the consumer count (0 if unreferenced). */ +static int count_node_consumers(ray_graph_t* g, uint32_t node_id) { + int count = 0; + uint32_t nc = g->node_count; + for (uint32_t j = 0; j < nc; j++) { + ray_op_t* c = &g->nodes[j]; + if (c->flags & OP_FLAG_DEAD) continue; + for (int k = 0; k < c->arity && k < 2; k++) { + if (c->inputs[k] && c->inputs[k]->id == node_id) { + count++; + break; /* count each consumer node once */ + } + } + } + for (uint32_t j = 0; j < g->ext_count; j++) { + if (!g->ext_nodes[j]) continue; + ray_op_t* c = &g->ext_nodes[j]->base; + if (c->flags & OP_FLAG_DEAD) continue; + if (c->id < nc) continue; /* already counted in nodes[] */ + for (int k = 0; k < c->arity && k < 2; k++) { + if (c->inputs[k] && c->inputs[k]->id == node_id) { + count++; + break; + } + } + } + return count; +} + +/* -------------------------------------------------------------------------- + * Pass: Predicate pushdown + * + * Move OP_FILTER nodes below PROJECT/SELECT, GROUP (key-only), JOIN + * (one-sided), and EXPAND (source-only) to reduce rows flowing through + * expensive operators. + * -------------------------------------------------------------------------- */ + +/* Collect all OP_SCAN node IDs referenced by a predicate subtree. + * Returns count on success, -1 if traversal was truncated (stack or result + * overflow) — caller must treat -1 as "unknown" and skip optimisation. */ +static int collect_pred_scans(ray_graph_t* g, ray_op_t* pred, + uint32_t* scan_ids, int max) { + if (!pred || max <= 0) return 0; + int n = 0; + + uint32_t stack[64]; + int sp = 0; + stack[sp++] = pred->id; + + bool visited[4096]; + uint32_t nc = g->node_count; + if (nc > 4096) return -1; /* safety: skip for huge graphs */ + memset(visited, 0, nc * sizeof(bool)); + + while (sp > 0) { + uint32_t nid = stack[--sp]; + if (nid >= nc || visited[nid]) continue; + visited[nid] = true; + ray_op_t* node = &g->nodes[nid]; + if (node->flags & OP_FLAG_DEAD) continue; + + if (node->opcode == OP_SCAN) { + if (n >= max) return -1; /* result overflow */ + scan_ids[n++] = nid; + continue; + } + for (int i = 0; i < node->arity && i < 2; i++) { + if (node->inputs[i]) { + if (sp >= 64) return -1; /* stack overflow */ + stack[sp++] = node->inputs[i]->id; + } + } + /* Walk ext-stored operands for multi-input ops */ + ray_op_ext_t* ext = find_ext(g, nid); + if (ext) { + switch (node->opcode) { + case OP_IF: + case OP_SUBSTR: + case OP_REPLACE: { + uint32_t third_id = (uint32_t)(uintptr_t)ext->literal; + if (third_id < nc && !visited[third_id]) { + if (sp >= 64) return -1; + stack[sp++] = third_id; + } + break; + } + case OP_CONCAT: + if (ext->sym >= 2) { + int n_args = (int)ext->sym; + uint32_t* trail = (uint32_t*)((char*)(ext + 1)); + for (int j = 2; j < n_args; j++) { + uint32_t arg_id = trail[j - 2]; + if (arg_id < nc && !visited[arg_id]) { + if (sp >= 64) return -1; + stack[sp++] = arg_id; + } + } + } + break; + default: + break; + } + } + } + return n; +} + +/* Check if target_id is reachable from start by walking inputs. + * Returns true if target_id is in the subgraph rooted at start. */ +static bool is_reachable_from(ray_graph_t* g, ray_op_t* start, uint32_t target_id) { + if (!start) return false; + if (start->id == target_id) return true; + + uint32_t nc = g->node_count; + if (nc > 4096) return false; + + bool visited[4096]; + memset(visited, 0, nc * sizeof(bool)); + + uint32_t stack[64]; + int sp = 0; + stack[sp++] = start->id; + + while (sp > 0) { + uint32_t nid = stack[--sp]; + if (nid >= nc || visited[nid]) continue; + visited[nid] = true; + if (nid == target_id) return true; + ray_op_t* node = &g->nodes[nid]; + if (node->flags & OP_FLAG_DEAD) continue; + for (int i = 0; i < node->arity && i < 2; i++) { + if (node->inputs[i] && sp < 64) + stack[sp++] = node->inputs[i]->id; + } + /* Walk ext-stored operands for multi-input ops */ + ray_op_ext_t* ext = find_ext(g, nid); + if (ext) { + switch (node->opcode) { + case OP_IF: + case OP_SUBSTR: + case OP_REPLACE: { + uint32_t third_id = (uint32_t)(uintptr_t)ext->literal; + if (third_id < nc && !visited[third_id] && sp < 64) + stack[sp++] = third_id; + break; + } + case OP_CONCAT: + if (ext->sym >= 2) { + int n_args = (int)ext->sym; + uint32_t* trail = (uint32_t*)((char*)(ext + 1)); + for (int j = 2; j < n_args; j++) { + uint32_t arg_id = trail[j - 2]; + if (arg_id < nc && !visited[arg_id] && sp < 64) + stack[sp++] = arg_id; + } + } + break; + default: + break; + } + } + } + return false; +} + +/* Redirect all consumers of old_id to point to new_target instead. + * Skips nodes with IDs skip_a and skip_b (the swapped pair). + * Updates both g->nodes[] and g->ext_nodes[].base.inputs[]. */ +static void redirect_consumers(ray_graph_t* g, uint32_t old_id, + ray_op_t* new_target, + uint32_t skip_a, uint32_t skip_b) { + uint32_t nc = g->node_count; + for (uint32_t j = 0; j < nc; j++) { + ray_op_t* c = &g->nodes[j]; + if (c->flags & OP_FLAG_DEAD || j == skip_a || j == skip_b) continue; + for (int k = 0; k < c->arity && k < 2; k++) { + if (c->inputs[k] && c->inputs[k]->id == old_id) + c->inputs[k] = new_target; + } + } + /* Also update ext_node heap copies to keep them in sync */ + for (uint32_t j = 0; j < g->ext_count; j++) { + if (!g->ext_nodes[j]) continue; + ray_op_t* c = &g->ext_nodes[j]->base; + if (c->flags & OP_FLAG_DEAD) continue; + if (c->id == skip_a || c->id == skip_b) continue; + for (int k = 0; k < c->arity && k < 2; k++) { + if (c->inputs[k] && c->inputs[k]->id == old_id) + c->inputs[k] = new_target; + } + } +} + +static ray_op_t* pass_predicate_pushdown(ray_graph_t* g, ray_op_t* root) { + if (!g || !root) return root; + + /* Multiple iterations: pushdown may enable further pushdowns */ + for (int iter = 0; iter < 4; iter++) { + bool changed = false; + uint32_t nc = g->node_count; + + for (uint32_t i = 0; i < nc; i++) { + ray_op_t* n = &g->nodes[i]; + if (n->flags & OP_FLAG_DEAD) continue; + if (n->opcode != OP_FILTER || n->arity != 2) continue; + + ray_op_t* child = n->inputs[0]; + ray_op_t* pred = n->inputs[1]; + if (!child || !pred) continue; + + /* Push past SELECT/ALIAS (only if child is single-consumer, + * otherwise mutating child->inputs[0] would corrupt other branches) */ + if (child->opcode == OP_SELECT || + child->opcode == OP_ALIAS) { + if (count_node_consumers(g, child->id) > 1) continue; + /* Swap: FILTER(pred, SELECT(x)) -> SELECT(FILTER(pred, x)) */ + ray_op_t* proj_input = child->inputs[0]; + n->inputs[0] = proj_input; + child->inputs[0] = n; + redirect_consumers(g, n->id, child, child->id, n->id); + if (n->id == root->id) root = child; + changed = true; + continue; + } + + /* GROUP pushdown disabled: the executor's key/agg scans + * bypass the filter, producing wrong results. Needs executor + * support for filtered scan propagation before enabling. */ + + /* Push past EXPAND (source-side predicates, single-consumer only) */ + if (child->opcode == OP_EXPAND) { + if (count_node_consumers(g, child->id) > 1) continue; + uint32_t scan_ids[64]; + int n_scans = collect_pred_scans(g, pred, scan_ids, 64); + if (n_scans <= 0) continue; /* 0 = no scans, -1 = truncated */ + + /* All predicate scans must be reachable from the expand's + * source input (inputs[0]). Walk the source subtree. */ + ray_op_t* expand_src_tree = child->inputs[0]; + bool all_source = true; + for (int s = 0; s < n_scans; s++) { + if (!is_reachable_from(g, expand_src_tree, scan_ids[s])) { + all_source = false; + break; + } + } + if (!all_source) continue; + + /* Swap: FILTER(pred, EXPAND(src, rel)) -> EXPAND(FILTER(pred, src), rel) */ + ray_op_t* expand_src = child->inputs[0]; + n->inputs[0] = expand_src; + child->inputs[0] = n; + redirect_consumers(g, n->id, child, child->id, n->id); + if (n->id == root->id) root = child; + changed = true; + continue; + } + } + if (!changed) break; + } + return root; +} + +/* Score a predicate subtree: lower = cheaper = execute first. */ +static int filter_cost(ray_graph_t* g, ray_op_t* pred) { + (void)g; + if (!pred) return 99; + int cost = 0; + + /* Constant comparison: one input is OP_CONST */ + bool has_const = false; + for (int i = 0; i < pred->arity && i < 2; i++) { + if (pred->inputs[i] && pred->inputs[i]->opcode == OP_CONST) + has_const = true; + } + if (!has_const) cost += 4; /* col-col comparison */ + + /* Type width cost */ + int8_t t = pred->out_type; + if (pred->arity >= 1 && pred->inputs[0]) + t = pred->inputs[0]->out_type; + switch (t) { + case RAY_BOOL: case RAY_U8: cost += 0; break; + case RAY_I16: cost += 1; break; + case RAY_I32: case RAY_DATE: case RAY_TIME: cost += 2; break; + default: cost += 3; break; /* I64, F64, SYM, STR */ + } + + /* Comparison type cost */ + switch (pred->opcode) { + case OP_EQ: case OP_NE: cost += 0; break; + case OP_LT: case OP_LE: + case OP_GT: case OP_GE: cost += 2; break; + case OP_LIKE: case OP_ILIKE: cost += 4; break; + default: cost += 1; break; + } + + return cost; +} + +/* Split FILTER(AND(a, b), input) into FILTER(a, FILTER(b, input)). + * Returns the new outer filter node, or the original if no split. */ +static ray_op_t* split_and_filter(ray_graph_t* g, ray_op_t* filter_node) { + if (!filter_node || filter_node->opcode != OP_FILTER) return filter_node; + if (filter_node->arity != 2) return filter_node; + + ray_op_t* pred = filter_node->inputs[1]; + if (!pred || pred->opcode != OP_AND || pred->arity != 2) return filter_node; + + ray_op_t* pred_a = pred->inputs[0]; + ray_op_t* pred_b = pred->inputs[1]; + ray_op_t* input = filter_node->inputs[0]; + if (!pred_a || !pred_b || !input) return filter_node; + + /* Save IDs before potential realloc */ + uint32_t filter_id = filter_node->id; + uint32_t pred_a_id = pred_a->id; + uint32_t pred_b_id = pred_b->id; + + /* Allocate new outer filter first, before mutating existing nodes */ + ray_op_t* outer = graph_alloc_node_opt(g); + if (!outer) return &g->nodes[filter_id]; /* OOM: leave unsplit */ + + /* Re-fetch after potential realloc */ + filter_node = &g->nodes[filter_id]; + pred_a = &g->nodes[pred_a_id]; + pred_b = &g->nodes[pred_b_id]; + + /* Rewrite: filter_node becomes FILTER(pred_a, input) */ + filter_node->inputs[1] = pred_a; + + outer->opcode = OP_FILTER; + outer->arity = 2; + outer->inputs[0] = filter_node; + outer->inputs[1] = pred_b; + outer->out_type = filter_node->out_type; + outer->est_rows = filter_node->est_rows; + + return outer; +} + +/* Collect a chain of OP_FILTER nodes. Returns count (max 64). */ +static int collect_filter_chain(ray_op_t* top, ray_op_t** chain, int max) { + int n = 0; + ray_op_t* cur = top; + while (cur && cur->opcode == OP_FILTER && n < max) { + chain[n++] = cur; + cur = cur->inputs[0]; + } + return n; +} + +static ray_op_t* pass_filter_reorder(ray_graph_t* g, ray_op_t* root) { + if (!g || !root) return root; + + uint32_t root_id = root->id; + + /* First pass: split AND predicates in filters. + * Iterate until no more splits occur so nested ANDs like + * AND(AND(a,b), c) are fully decomposed into individual filters. */ + for (int split_iter = 0; split_iter < 16; split_iter++) { + bool split_changed = false; + uint32_t nc = g->node_count; + for (uint32_t i = 0; i < nc; i++) { + ray_op_t* n = &g->nodes[i]; + if (n->flags & OP_FLAG_DEAD) continue; + if (n->opcode != OP_FILTER) continue; + if (n->arity != 2 || !n->inputs[1]) continue; + if (n->inputs[1]->opcode != OP_AND) continue; + + /* Split AND and update consumers to point to new outer. + * split_and_filter may realloc g->nodes, so re-fetch n afterwards. */ + uint32_t orig_id = i; + ray_op_t* new_outer = split_and_filter(g, n); + n = &g->nodes[orig_id]; /* re-fetch after potential realloc */ + if (new_outer->id != orig_id) { + redirect_consumers(g, orig_id, new_outer, new_outer->id, orig_id); + if (orig_id == root_id) root_id = new_outer->id; + split_changed = true; + } + } + if (!split_changed) break; + } + + /* Second pass: reorder filter chains by cost. + * Use insertion sort on chain arrays (chains are typically short). */ + uint32_t nc = g->node_count; /* may have grown from splits */ + bool* visited = NULL; + bool visited_stack[256]; + if (nc <= 256) { + visited = visited_stack; + } else { + visited = (bool*)ray_sys_alloc(nc * sizeof(bool)); + if (!visited) return &g->nodes[root_id]; + } + memset(visited, 0, nc * sizeof(bool)); + + for (uint32_t i = 0; i < nc; i++) { + ray_op_t* n = &g->nodes[i]; + if (n->flags & OP_FLAG_DEAD) continue; + if (n->opcode != OP_FILTER) continue; + if (visited[i]) continue; + + /* Collect the filter chain starting at this node */ + ray_op_t* chain[64]; + int chain_len = collect_filter_chain(n, chain, 64); + if (chain_len < 2) { + for (int c = 0; c < chain_len; c++) visited[chain[c]->id] = true; + continue; + } + + /* Mark all as visited */ + for (int c = 0; c < chain_len; c++) visited[chain[c]->id] = true; + + /* Skip reordering if any filter in the chain has multiple consumers, + * since swapping predicates would change semantics for other branches */ + bool has_shared = false; + for (int c = 0; c < chain_len; c++) { + if (count_node_consumers(g, chain[c]->id) > 1) { + has_shared = true; + break; + } + } + if (has_shared) continue; + + /* Score each filter's predicate */ + int costs[64]; + for (int c = 0; c < chain_len; c++) + costs[c] = filter_cost(g, chain[c]->inputs[1]); + + /* Insertion sort predicates by cost descending (stable: preserves + * original order for equal costs). Expensive predicates go to + * chain[0] (outer, runs last), cheap go to chain[N-1] (inner, + * runs first). We swap predicates, not filter nodes. */ + for (int c = 1; c < chain_len; c++) { + ray_op_t* pred = chain[c]->inputs[1]; + int cost = costs[c]; + int j = c - 1; + while (j >= 0 && costs[j] < cost) { + chain[j + 1]->inputs[1] = chain[j]->inputs[1]; + costs[j + 1] = costs[j]; + j--; + } + chain[j + 1]->inputs[1] = pred; + costs[j + 1] = cost; + } + } + + if (nc > 256) ray_sys_free(visited); + return &g->nodes[root_id]; +} + +/* -------------------------------------------------------------------------- + * Pass 7: Projection pushdown + * + * BFS from root collecting all reachable node IDs (following inputs and + * ext-node children). Any node not reachable is marked DEAD so the DCE + * pass can clean it up. + * -------------------------------------------------------------------------- */ + +static bool pass_projection_pushdown(ray_graph_t* g, ray_op_t* root) { + if (!g || !root) return false; + uint32_t nc = g->node_count; + + bool live_stack[256]; + bool* live = nc <= 256 ? live_stack : (bool*)ray_sys_alloc(nc * sizeof(bool)); + uint32_t q_stack[256]; + uint32_t* q = nc <= 256 ? q_stack : (uint32_t*)ray_sys_alloc(nc * sizeof(uint32_t)); + if (!live || !q) { if (nc > 256) { ray_sys_free(live); ray_sys_free(q); } return false; } + memset(live, 0, nc * sizeof(bool)); + + /* BFS from root */ + int qh = 0, qt = 0; + q[qt++] = root->id; + live[root->id] = true; + + while (qh < qt) { + uint32_t nid = q[qh++]; + ray_op_t* n = &g->nodes[nid]; + + /* Follow standard inputs */ + for (int i = 0; i < 2 && i < n->arity; i++) { + if (n->inputs[i] && !live[n->inputs[i]->id]) { + live[n->inputs[i]->id] = true; + if (qt < (int)nc) q[qt++] = n->inputs[i]->id; + } + } + + /* Follow ext node children (mirrors pass_type_inference traversal) */ + ray_op_ext_t* ext = find_ext(g, nid); + if (ext) { + switch (n->opcode) { + case OP_GROUP: + for (uint8_t k = 0; k < ext->n_keys; k++) + if (ext->keys[k] && !live[ext->keys[k]->id]) { + live[ext->keys[k]->id] = true; + if (qt < (int)nc) q[qt++] = ext->keys[k]->id; + } + for (uint8_t a = 0; a < ext->n_aggs; a++) + if (ext->agg_ins[a] && !live[ext->agg_ins[a]->id]) { + live[ext->agg_ins[a]->id] = true; + if (qt < (int)nc) q[qt++] = ext->agg_ins[a]->id; + } + break; + case OP_SORT: + case OP_SELECT: + for (uint8_t k = 0; k < ext->sort.n_cols; k++) + if (ext->sort.columns[k] && !live[ext->sort.columns[k]->id]) { + live[ext->sort.columns[k]->id] = true; + if (qt < (int)nc) q[qt++] = ext->sort.columns[k]->id; + } + break; + case OP_JOIN: + for (uint8_t k = 0; k < ext->join.n_join_keys; k++) { + if (ext->join.left_keys[k] && !live[ext->join.left_keys[k]->id]) { + live[ext->join.left_keys[k]->id] = true; + if (qt < (int)nc) q[qt++] = ext->join.left_keys[k]->id; + } + if (ext->join.right_keys && ext->join.right_keys[k] && + !live[ext->join.right_keys[k]->id]) { + live[ext->join.right_keys[k]->id] = true; + if (qt < (int)nc) q[qt++] = ext->join.right_keys[k]->id; + } + } + break; + case OP_WINDOW_JOIN: { + ray_op_ext_t* wj_ext = find_ext(g, n->id); + if (wj_ext) { + if (wj_ext->asof.time_key && !live[wj_ext->asof.time_key->id]) { + live[wj_ext->asof.time_key->id] = true; + if (qt < (int)nc) q[qt++] = wj_ext->asof.time_key->id; + } + for (uint8_t k = 0; k < wj_ext->asof.n_eq_keys; k++) { + if (wj_ext->asof.eq_keys[k] && !live[wj_ext->asof.eq_keys[k]->id]) { + live[wj_ext->asof.eq_keys[k]->id] = true; + if (qt < (int)nc) q[qt++] = wj_ext->asof.eq_keys[k]->id; + } + } + } + break; + } + case OP_WINDOW: + for (uint8_t k = 0; k < ext->window.n_part_keys; k++) + if (ext->window.part_keys[k] && !live[ext->window.part_keys[k]->id]) { + live[ext->window.part_keys[k]->id] = true; + if (qt < (int)nc) q[qt++] = ext->window.part_keys[k]->id; + } + for (uint8_t k = 0; k < ext->window.n_order_keys; k++) + if (ext->window.order_keys[k] && !live[ext->window.order_keys[k]->id]) { + live[ext->window.order_keys[k]->id] = true; + if (qt < (int)nc) q[qt++] = ext->window.order_keys[k]->id; + } + for (uint8_t f = 0; f < ext->window.n_funcs; f++) + if (ext->window.func_inputs[f] && !live[ext->window.func_inputs[f]->id]) { + live[ext->window.func_inputs[f]->id] = true; + if (qt < (int)nc) q[qt++] = ext->window.func_inputs[f]->id; + } + break; + case OP_IF: + case OP_SUBSTR: + case OP_REPLACE: { + uint32_t third_id = (uint32_t)(uintptr_t)ext->literal; + if (third_id < nc && !live[third_id]) { + live[third_id] = true; + if (qt < (int)nc) q[qt++] = third_id; + } + break; + } + case OP_CONCAT: + if (ext->sym >= 2) { + int n_args = (int)ext->sym; + uint32_t* trail = (uint32_t*)((char*)(ext + 1)); + for (int j = 2; j < n_args; j++) { + uint32_t arg_id = trail[j - 2]; + if (arg_id < nc && !live[arg_id]) { + live[arg_id] = true; + if (qt < (int)nc) q[qt++] = arg_id; + } + } + } + break; + default: + break; + } + } + } + + /* Mark unreachable nodes DEAD */ + for (uint32_t i = 0; i < nc; i++) { + if (!live[i]) + g->nodes[i].flags |= OP_FLAG_DEAD; + } + + if (nc > 256) { ray_sys_free(live); ray_sys_free(q); } + return true; +} + +/* -------------------------------------------------------------------------- + * Pass 8: Partition pruning + * + * Recognize FILTER(EQ(SCAN(mapcommon_col), CONST(val))) patterns and set + * est_rows=1 to hint that most partitions can be skipped at execution time. + * -------------------------------------------------------------------------- */ + +static void pass_partition_pruning(ray_graph_t* g, ray_op_t* root) { + if (!g || !root) return; + (void)root; + + for (uint32_t i = 0; i < g->node_count; i++) { + ray_op_t* n = &g->nodes[i]; + if (n->flags & OP_FLAG_DEAD) continue; + if (n->opcode != OP_FILTER || n->arity != 2) continue; + + ray_op_t* pred = n->inputs[1]; + if (!pred || pred->arity != 2) continue; + + uint16_t cmp_op = pred->opcode; + if (cmp_op != OP_EQ && cmp_op != OP_NE && + cmp_op != OP_LT && cmp_op != OP_GT && + cmp_op != OP_LE && cmp_op != OP_GE && + cmp_op != OP_IN && cmp_op != OP_NOT_IN) continue; + + ray_op_t* lhs = pred->inputs[0]; + ray_op_t* rhs = pred->inputs[1]; + if (!lhs || !rhs) continue; + + ray_op_t* scan_node = NULL; + ray_op_t* const_node = NULL; + bool swapped = false; + if (lhs->opcode == OP_SCAN && rhs->opcode == OP_CONST) { + scan_node = lhs; const_node = rhs; + } else if (rhs->opcode == OP_SCAN && lhs->opcode == OP_CONST) { + scan_node = rhs; const_node = lhs; swapped = true; + } else continue; + + if (scan_node->out_type != RAY_MAPCOMMON) continue; + + ray_op_ext_t* scan_ext = find_ext(g, scan_node->id); + if (!scan_ext) continue; + + /* Resolve table */ + uint16_t stored_table_id = 0; + memcpy(&stored_table_id, scan_ext->base.pad, sizeof(uint16_t)); + ray_t* tbl; + if (stored_table_id > 0 && g->tables && (stored_table_id - 1) < g->n_tables) + tbl = g->tables[stored_table_id - 1]; + else + tbl = g->table; + if (!tbl) continue; + + ray_t* mc_col = ray_table_get_col(tbl, scan_ext->sym); + if (!mc_col || mc_col->type != RAY_MAPCOMMON) continue; + + /* Extract constant value */ + ray_op_ext_t* const_ext = find_ext(g, const_node->id); + if (!const_ext || !const_ext->literal) continue; + ray_t* lit = const_ext->literal; + + /* Read partition keys from MAPCOMMON: [key_values, row_counts] */ + if (mc_col->len < 2) continue; + ray_t** mc_ptrs = (ray_t**)ray_data(mc_col); + ray_t* key_values = mc_ptrs[0]; + if (!key_values) continue; + int64_t n_parts = key_values->len; + if (n_parts <= 0) continue; + + /* Type-class check: partition keys and the literal must live in + * the same value namespace, otherwise comparisons are nonsense. + * - SYM keys are interned IDs; they can only be compared to + * SYM set elements. + * - int-family keys (I16/I32/I64/DATE/TIME/TIMESTAMP/BOOL/U8) + * compare only to other int-family values. + * - mixing the two is always wrong at the raw-bits level, + * so skip pruning (the executor filter still runs). */ + int8_t pkey_t = key_values->type; + int8_t lit_base = lit->type < 0 ? (int8_t)(-lit->type) : lit->type; + bool pkey_is_sym = (pkey_t == RAY_SYM); + bool lit_is_sym = (lit_base == RAY_SYM); + if (pkey_is_sym != lit_is_sym) { + continue; + } + + /* Allocate seg_mask bitmap */ + uint32_t n_words = (uint32_t)((n_parts + 63) / 64); + uint64_t* mask = (uint64_t*)ray_sys_alloc(n_words * sizeof(uint64_t)); + if (!mask) continue; + memset(mask, 0, n_words * sizeof(uint64_t)); + + /* OP_IN / OP_NOT_IN expects a literal vector const on the RHS. + * For the scalar ops, the const is a single atom or 1-elem vec. */ + bool is_in = (cmp_op == OP_IN); + bool is_nin = (cmp_op == OP_NOT_IN); + + /* For IN/NOT_IN the scan must be the LHS (col IN set), not + * swapped — we never pruned on `const IN col_set` anyway. */ + if ((is_in || is_nin) && swapped) { ray_sys_free(mask); continue; } + + /* Extract constant(s) for comparison. Scalar ops take one + * value; IN ops take an array of values read from the vec + * literal. We normalize all values to int64_t (which covers + * I64, TIMESTAMP, SYM interned IDs, and sign-extended I32/ + * DATE/TIME). Atoms store the value in the header; vectors + * store it in data. */ + int64_t const_val = 0; /* for scalar ops */ + int64_t set_stack[32]; + int64_t* set_vals = set_stack; /* for IN/NOT_IN */ + int64_t set_len = 0; + ray_t* set_heap = NULL; + + int8_t lt = lit->type < 0 ? (int8_t)(-lit->type) : lit->type; + bool narrow32 = (lt == RAY_I32 || lt == RAY_DATE || lt == RAY_TIME); + bool wide64 = (lt == RAY_I64 || lt == RAY_TIMESTAMP || lt == RAY_SYM); + if (!narrow32 && !wide64) { + ray_sys_free(mask); + continue; /* unsupported type for partition pruning */ + } + + if (is_in || is_nin) { + /* Literal must be a vector (ray_const_vec carries the vec + * pointer unchanged in ext->literal). */ + if (lit->type <= 0) { ray_sys_free(mask); continue; } + set_len = lit->len; + if (set_len <= 0) { + /* Empty set: for IN no partition can match → mask stays 0 + * and we attach it below (skipping all segments). For + * NOT_IN every partition passes → set all bits. */ + if (is_nin) { + for (int64_t p = 0; p < n_parts; p++) + mask[p / 64] |= (1ULL << (p % 64)); + } + goto attach_mask; + } + if (set_len > 32) { + set_heap = ray_alloc((size_t)set_len * sizeof(int64_t)); + if (!set_heap) { ray_sys_free(mask); continue; } + set_vals = (int64_t*)ray_data(set_heap); + } + /* Read set elements — skip nulls in the literal so a null + * sentinel can never match a partition key. */ + int64_t next = 0; + bool set_has_nulls = (lit->attrs & RAY_ATTR_HAS_NULLS) != 0; + for (int64_t i = 0; i < set_len; i++) { + if (set_has_nulls && ray_vec_is_null(lit, i)) continue; + if (narrow32) { + int32_t v32; + memcpy(&v32, (char*)ray_data(lit) + i * sizeof(int32_t), sizeof(int32_t)); + set_vals[next++] = v32; + } else { + int64_t v64; + memcpy(&v64, (char*)ray_data(lit) + i * sizeof(int64_t), sizeof(int64_t)); + set_vals[next++] = v64; + } + } + set_len = next; + /* Also handle the degenerate case where all set elements + * were null — treat like empty set. */ + if (set_len == 0) { + if (is_nin) { + for (int64_t p = 0; p < n_parts; p++) + mask[p / 64] |= (1ULL << (p % 64)); + } + if (set_heap) ray_free(set_heap); + goto attach_mask; + } + } else { + /* Scalar const path (EQ/NE/LT/GT/LE/GE). */ + if (wide64) { + if (lit->type < 0) const_val = lit->i64; + else memcpy(&const_val, ray_data(lit), sizeof(int64_t)); + } else { + int32_t v32; + if (lit->type < 0) v32 = lit->i32; + else memcpy(&v32, ray_data(lit), sizeof(int32_t)); + const_val = v32; + } + } + + /* Effective comparison: if swapped, reverse direction + * (IN/NOT_IN are never swapped — gated above). */ + uint16_t eff_op = cmp_op; + if (swapped) { + if (cmp_op == OP_LT) eff_op = OP_GT; + else if (cmp_op == OP_GT) eff_op = OP_LT; + else if (cmp_op == OP_LE) eff_op = OP_GE; + else if (cmp_op == OP_GE) eff_op = OP_LE; + } + + for (int64_t p = 0; p < n_parts; p++) { + int64_t pkey = 0; + if (key_values->type == RAY_DATE || key_values->type == RAY_I32 || key_values->type == RAY_TIME) { + int32_t v32; + memcpy(&v32, (char*)ray_data(key_values) + p * sizeof(int32_t), sizeof(int32_t)); + pkey = v32; + } else { + memcpy(&pkey, (char*)ray_data(key_values) + p * sizeof(int64_t), sizeof(int64_t)); + } + + bool pass = false; + if (is_in || is_nin) { + bool found = false; + for (int64_t j = 0; j < set_len; j++) { + if (pkey == set_vals[j]) { found = true; break; } + } + pass = is_in ? found : !found; + } else { + switch (eff_op) { + case OP_EQ: pass = (pkey == const_val); break; + case OP_NE: pass = (pkey != const_val); break; + case OP_LT: pass = (pkey < const_val); break; + case OP_GT: pass = (pkey > const_val); break; + case OP_LE: pass = (pkey <= const_val); break; + case OP_GE: pass = (pkey >= const_val); break; + default: break; + } + } + if (pass) + mask[p / 64] |= (1ULL << (p % 64)); + } + if (set_heap) ray_free(set_heap); + attach_mask:; + + /* Attach seg_mask to OP_SCAN nodes reading parted columns from same table. + * When !any_active the mask is all-zeros — attach it anyway so the + * segment loop in ray_execute skips all segments and hits the + * empty-table path instead of reading every partition. */ + bool mask_owned = false; + for (uint32_t s = 0; s < g->node_count; s++) { + ray_op_t* sn = &g->nodes[s]; + if (sn->flags & OP_FLAG_DEAD || sn->opcode != OP_SCAN) continue; + if (sn == scan_node) continue; + + ray_op_ext_t* sn_ext = find_ext(g, sn->id); + if (!sn_ext) continue; + + uint16_t sn_tid = 0; + memcpy(&sn_tid, sn_ext->base.pad, sizeof(uint16_t)); + if (sn_tid != stored_table_id) continue; + + ray_t* sn_col = ray_table_get_col(tbl, sn_ext->sym); + if (!sn_col || !RAY_IS_PARTED(sn_col->type)) continue; + + if (sn_ext->seg_mask) { + /* AND with existing mask (conjunctive filters) */ + uint32_t exist_w = (uint32_t)((sn_ext->seg_mask_count + 63) / 64); + uint32_t min_w = n_words < exist_w ? n_words : exist_w; + for (uint32_t w = 0; w < min_w; w++) + sn_ext->seg_mask[w] &= mask[w]; + /* Zero out words beyond new mask (prune extra segments) */ + for (uint32_t w = min_w; w < exist_w; w++) + sn_ext->seg_mask[w] = 0; + /* Tighten count to the smaller partition set */ + if (n_parts < sn_ext->seg_mask_count) + sn_ext->seg_mask_count = n_parts; + } else { + sn_ext->seg_mask = mask; + sn_ext->seg_mask_count = n_parts; + mask_owned = true; + } + } + if (!mask_owned) ray_sys_free(mask); + + n->est_rows = 1; + } +} + +/* -------------------------------------------------------------------------- + * ray_optimize — run all passes in order, return (possibly updated) root + * -------------------------------------------------------------------------- */ + +ray_op_t* ray_optimize(ray_graph_t* g, ray_op_t* root) { + if (!g || !root) return root; + + ray_profile_span_start("optimize"); + + /* Pass 1: Type inference */ + pass_type_inference(g, root); + ray_profile_tick("type inference"); + + /* Pass 2: Constant folding */ + pass_constant_fold(g, root); + ray_profile_tick("constant fold"); + + /* Pass 3: SIP (graph-aware sideways information passing) */ + sip_pass(g, root); + ray_profile_tick("SIP"); + + /* Pass 4: Factorized detection (OP_EXPAND → OP_GROUP optimization) */ + factorize_pass(g, root); + ray_profile_tick("factorize"); + + /* Pass 5: Predicate pushdown (may change root) */ + root = pass_predicate_pushdown(g, root); + ray_profile_tick("predicate pushdown"); + + /* Pass 6: Filter reordering (split ANDs + reorder by cost, may change root) */ + root = pass_filter_reorder(g, root); + ray_profile_tick("filter reorder"); + + /* Pass 7: Projection pushdown (mark unreachable nodes dead) */ + bool proj_ok = pass_projection_pushdown(g, root); + ray_profile_tick("projection pushdown"); + + /* Pass 8: Partition pruning (set est_rows hints for mapcommon filters). + * Only safe to run if projection pushdown completed: pruning walks all + * nodes and would attach seg_masks to disconnected branches otherwise. */ + if (proj_ok) + pass_partition_pruning(g, root); + ray_profile_tick("partition pruning"); + + /* Pass 9: Fusion */ + ray_fuse_pass(g, root); + ray_profile_tick("fusion"); + + /* Pass 10: DCE */ + pass_dce(g, root); + ray_profile_tick("DCE"); + + ray_profile_span_end("optimize"); + + return root; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/opt.h b/crates/rayforce-sys/vendor/rayforce/src/ops/opt.h new file mode 100644 index 0000000..af3956b --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/opt.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_OPT_H +#define RAY_OPT_H + +#include "ops.h" + +#endif /* RAY_OPT_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/pipe.c b/crates/rayforce-sys/vendor/rayforce/src/ops/pipe.c new file mode 100644 index 0000000..1c04342 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/pipe.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe.h" +#include "mem/sys.h" +#include +#ifndef RAY_OS_WINDOWS +#include +#endif + +/* -------------------------------------------------------------------------- + * ray_pipe_new + * + * Allocate a new pipe structure with all fields zeroed and spill_fd = -1. + * -------------------------------------------------------------------------- */ + +ray_pipe_t* ray_pipe_new(void) { + ray_pipe_t* p = (ray_pipe_t*)ray_sys_alloc(sizeof(ray_pipe_t)); + if (!p) return NULL; + /* L3: Zero-init the entire struct before setting individual fields, + ensuring no uninitialized pointers or state. */ + memset(p, 0, sizeof(*p)); + p->spill_fd = -1; + + return p; +} + +/* -------------------------------------------------------------------------- + * ray_pipe_free + * + * Free a pipe. Closes the spill file descriptor if it was opened. + * Does NOT recursively free upstream input pipes. + * -------------------------------------------------------------------------- */ + +void ray_pipe_free(ray_pipe_t* pipe) { + if (!pipe) return; + + if (pipe->spill_fd >= 0) { + close(pipe->spill_fd); + } + + ray_sys_free(pipe); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/pipe.h b/crates/rayforce-sys/vendor/rayforce/src/ops/pipe.h new file mode 100644 index 0000000..088bcfe --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/pipe.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_PIPE_H +#define RAY_PIPE_H + +/* + * pipe.h -- Pipeline infrastructure. + * + * A pipe connects operation nodes in the executor pipeline. Each pipe + * holds a morsel iterator state, optional materialized intermediate, + * and upstream input pipe references. + */ + +#include "ops.h" + +/* Allocate and initialize a new pipe (all fields zeroed, spill_fd = -1). */ +ray_pipe_t* ray_pipe_new(void); + +/* Free a pipe. Closes spill_fd if open. Does NOT free upstream pipes. */ +void ray_pipe_free(ray_pipe_t* pipe); + +#endif /* RAY_PIPE_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/pivot.c b/crates/rayforce-sys/vendor/rayforce/src/ops/pivot.c new file mode 100644 index 0000000..778123c --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/pivot.c @@ -0,0 +1,666 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ops/internal.h" + +/* ============================================================================ + * OP_IF: ternary select result[i] = cond[i] ? then[i] : else[i] + * ============================================================================ */ + +ray_t* exec_if(ray_graph_t* g, ray_op_t* op) { + /* cond = inputs[0], then = inputs[1], else_id stored in ext->literal */ + ray_t* cond_v = exec_node(g, op->inputs[0]); + ray_t* then_v = exec_node(g, op->inputs[1]); + + ray_op_ext_t* ext = find_ext(g, op->id); + uint32_t else_id = (uint32_t)(uintptr_t)ext->literal; + ray_t* else_v = exec_node(g, &g->nodes[else_id]); + + if (!cond_v || RAY_IS_ERR(cond_v)) { + if (then_v && !RAY_IS_ERR(then_v)) ray_release(then_v); + if (else_v && !RAY_IS_ERR(else_v)) ray_release(else_v); + return cond_v; + } + if (!then_v || RAY_IS_ERR(then_v)) { + ray_release(cond_v); + if (else_v && !RAY_IS_ERR(else_v)) ray_release(else_v); + return then_v; + } + if (!else_v || RAY_IS_ERR(else_v)) { + ray_release(cond_v); ray_release(then_v); + return else_v; + } + + int64_t len = cond_v->len; + bool then_scalar = ray_is_atom(then_v) || (then_v->type > 0 && then_v->len == 1); + bool else_scalar = ray_is_atom(else_v) || (else_v->type > 0 && else_v->len == 1); + if (then_scalar && !else_scalar) len = else_v->len; + if (!then_scalar) len = then_v->len; + + int8_t out_type = op->out_type; + ray_t* result = ray_vec_new(out_type, len); + if (!result || RAY_IS_ERR(result)) { + ray_release(cond_v); ray_release(then_v); ray_release(else_v); + return result; + } + result->len = len; + + uint8_t* cond_p = (uint8_t*)ray_data(cond_v); + + if (out_type == RAY_F64) { + double t_scalar = then_scalar ? (ray_is_atom(then_v) ? then_v->f64 : ((double*)ray_data(then_v))[0]) : 0; + double e_scalar = else_scalar ? (ray_is_atom(else_v) ? else_v->f64 : ((double*)ray_data(else_v))[0]) : 0; + double* t_arr = then_scalar ? NULL : (double*)ray_data(then_v); + double* e_arr = else_scalar ? NULL : (double*)ray_data(else_v); + double* dst = (double*)ray_data(result); + for (int64_t i = 0; i < len; i++) + dst[i] = cond_p[i] ? (t_arr ? t_arr[i] : t_scalar) + : (e_arr ? e_arr[i] : e_scalar); + } else if (out_type == RAY_I64) { + int64_t t_scalar = then_scalar ? (ray_is_atom(then_v) ? then_v->i64 : ((int64_t*)ray_data(then_v))[0]) : 0; + int64_t e_scalar = else_scalar ? (ray_is_atom(else_v) ? else_v->i64 : ((int64_t*)ray_data(else_v))[0]) : 0; + int64_t* t_arr = then_scalar ? NULL : (int64_t*)ray_data(then_v); + int64_t* e_arr = else_scalar ? NULL : (int64_t*)ray_data(else_v); + int64_t* dst = (int64_t*)ray_data(result); + for (int64_t i = 0; i < len; i++) + dst[i] = cond_p[i] ? (t_arr ? t_arr[i] : t_scalar) + : (e_arr ? e_arr[i] : e_scalar); + } else if (out_type == RAY_I32) { + int32_t t_scalar = then_scalar ? (ray_is_atom(then_v) ? then_v->i32 : ((int32_t*)ray_data(then_v))[0]) : 0; + int32_t e_scalar = else_scalar ? (ray_is_atom(else_v) ? else_v->i32 : ((int32_t*)ray_data(else_v))[0]) : 0; + int32_t* t_arr = then_scalar ? NULL : (int32_t*)ray_data(then_v); + int32_t* e_arr = else_scalar ? NULL : (int32_t*)ray_data(else_v); + int32_t* dst = (int32_t*)ray_data(result); + for (int64_t i = 0; i < len; i++) + dst[i] = cond_p[i] ? (t_arr ? t_arr[i] : t_scalar) + : (e_arr ? e_arr[i] : e_scalar); + } else if (out_type == RAY_STR) { + /* RAY_STR: resolve each side to string data and ray_str_vec_append. + * Scalars may be -RAY_STR or RAY_SYM atoms. */ + result->len = 0; /* ray_str_vec_append manages len */ + for (int64_t i = 0; i < len; i++) { + const char* sp; + size_t sl; + if (cond_p[i]) { + if (then_scalar) { + if (then_v->type == -RAY_STR) { + sp = ray_str_ptr(then_v); + sl = ray_str_len(then_v); + } else if (then_v->type == RAY_STR) { + sp = ray_str_vec_get(then_v, 0, &sl); + if (!sp) { sp = ""; sl = 0; } + } else if (RAY_IS_SYM(then_v->type)) { + ray_t* s = ray_sym_str(then_v->i64); + sp = s ? ray_str_ptr(s) : ""; + sl = s ? ray_str_len(s) : 0; + } else { sp = ""; sl = 0; } + } else if (then_v->type == RAY_STR) { + sp = ray_str_vec_get(then_v, i, &sl); + if (!sp) { sp = ""; sl = 0; } + } else { + /* RAY_SYM column */ + int64_t sid = ray_read_sym(ray_data(then_v), i, then_v->type, then_v->attrs); + ray_t* sa = ray_sym_str(sid); + sp = sa ? ray_str_ptr(sa) : ""; + sl = sa ? ray_str_len(sa) : 0; + } + } else { + if (else_scalar) { + if (else_v->type == -RAY_STR) { + sp = ray_str_ptr(else_v); + sl = ray_str_len(else_v); + } else if (else_v->type == RAY_STR) { + sp = ray_str_vec_get(else_v, 0, &sl); + if (!sp) { sp = ""; sl = 0; } + } else if (RAY_IS_SYM(else_v->type)) { + ray_t* s = ray_sym_str(else_v->i64); + sp = s ? ray_str_ptr(s) : ""; + sl = s ? ray_str_len(s) : 0; + } else { sp = ""; sl = 0; } + } else if (else_v->type == RAY_STR) { + sp = ray_str_vec_get(else_v, i, &sl); + if (!sp) { sp = ""; sl = 0; } + } else { + /* RAY_SYM column */ + int64_t sid = ray_read_sym(ray_data(else_v), i, else_v->type, else_v->attrs); + ray_t* sa = ray_sym_str(sid); + sp = sa ? ray_str_ptr(sa) : ""; + sl = sa ? ray_str_len(sa) : 0; + } + } + result = ray_str_vec_append(result, sp, sl); + if (RAY_IS_ERR(result)) break; + } + } else if (out_type == RAY_SYM) { + /* SYM columns may have narrow widths (W8/W16/W32) — use ray_read_sym. + * Scalars may be string atoms that need interning. Output is always W64. */ + int64_t t_scalar = 0, e_scalar = 0; + if (then_scalar) { + if (then_v->type == -RAY_STR) { + t_scalar = ray_sym_intern(ray_str_ptr(then_v), ray_str_len(then_v)); + } else { + t_scalar = then_v->i64; + } + } + if (else_scalar) { + if (else_v->type == -RAY_STR) { + e_scalar = ray_sym_intern(ray_str_ptr(else_v), ray_str_len(else_v)); + } else { + e_scalar = else_v->i64; + } + } + int64_t* dst = (int64_t*)ray_data(result); + for (int64_t i = 0; i < len; i++) { + int64_t tv = then_scalar ? t_scalar + : ray_read_sym(ray_data(then_v), i, then_v->type, then_v->attrs); + int64_t ev = else_scalar ? e_scalar + : ray_read_sym(ray_data(else_v), i, else_v->type, else_v->attrs); + dst[i] = cond_p[i] ? tv : ev; + } + } else if (out_type == RAY_BOOL || out_type == RAY_U8) { + uint8_t t_scalar = then_scalar ? then_v->b8 : 0; + uint8_t e_scalar = else_scalar ? else_v->b8 : 0; + uint8_t* t_arr = then_scalar ? NULL : (uint8_t*)ray_data(then_v); + uint8_t* e_arr = else_scalar ? NULL : (uint8_t*)ray_data(else_v); + uint8_t* dst = (uint8_t*)ray_data(result); + for (int64_t i = 0; i < len; i++) + dst[i] = cond_p[i] ? (t_arr ? t_arr[i] : t_scalar) + : (e_arr ? e_arr[i] : e_scalar); + } else if (out_type == RAY_TIMESTAMP || out_type == RAY_TIME || out_type == RAY_DATE) { + /* TIMESTAMP is 8B like I64; DATE and TIME are 4B like I32 */ + if (out_type == RAY_TIMESTAMP) { + int64_t t_scalar2 = then_scalar ? then_v->i64 : 0; + int64_t e_scalar2 = else_scalar ? else_v->i64 : 0; + int64_t* t_arr = then_scalar ? NULL : (int64_t*)ray_data(then_v); + int64_t* e_arr = else_scalar ? NULL : (int64_t*)ray_data(else_v); + int64_t* dst = (int64_t*)ray_data(result); + for (int64_t i = 0; i < len; i++) + dst[i] = cond_p[i] ? (t_arr ? t_arr[i] : t_scalar2) + : (e_arr ? e_arr[i] : e_scalar2); + } else { + int32_t t_scalar2 = then_scalar ? then_v->i32 : 0; + int32_t e_scalar2 = else_scalar ? else_v->i32 : 0; + int32_t* t_arr = then_scalar ? NULL : (int32_t*)ray_data(then_v); + int32_t* e_arr = else_scalar ? NULL : (int32_t*)ray_data(else_v); + int32_t* dst = (int32_t*)ray_data(result); + for (int64_t i = 0; i < len; i++) + dst[i] = cond_p[i] ? (t_arr ? t_arr[i] : t_scalar2) + : (e_arr ? e_arr[i] : e_scalar2); + } + } else if (out_type == RAY_I16) { + int16_t t_scalar = then_scalar ? (int16_t)then_v->i32 : 0; + int16_t e_scalar = else_scalar ? (int16_t)else_v->i32 : 0; + int16_t* t_arr = then_scalar ? NULL : (int16_t*)ray_data(then_v); + int16_t* e_arr = else_scalar ? NULL : (int16_t*)ray_data(else_v); + int16_t* dst = (int16_t*)ray_data(result); + for (int64_t i = 0; i < len; i++) + dst[i] = cond_p[i] ? (t_arr ? t_arr[i] : t_scalar) + : (e_arr ? e_arr[i] : e_scalar); + } + + ray_release(cond_v); ray_release(then_v); ray_release(else_v); + return result; +} + +/* ============================================================================ + * exec_pivot — single-pass hash-aggregated pivot table + * + * Groups by (index_cols, pivot_col), aggregates value_col, then unstacks + * pivot values into separate output columns. + * ============================================================================ */ + +ray_t* exec_pivot(ray_graph_t* g, ray_op_t* op, ray_t* tbl) { + if (!tbl || RAY_IS_ERR(tbl)) return tbl; + + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + uint8_t n_idx = ext->pivot.n_index; + uint16_t agg_op = ext->pivot.agg_op; + int64_t nrows = ray_table_nrows(tbl); + + /* Resolve input columns */ + ray_t* idx_vecs[16]; + for (uint8_t i = 0; i < n_idx; i++) { + ray_op_ext_t* ie = find_ext(g, ext->pivot.index_cols[i]->id); + idx_vecs[i] = (ie && ie->base.opcode == OP_SCAN) + ? ray_table_get_col(tbl, ie->sym) : NULL; + if (!idx_vecs[i]) return ray_error("domain", "pivot: index column not found"); + } + + ray_op_ext_t* pe = find_ext(g, ext->pivot.pivot_col->id); + ray_t* pcol = (pe && pe->base.opcode == OP_SCAN) + ? ray_table_get_col(tbl, pe->sym) : NULL; + if (!pcol) return ray_error("domain", "pivot: pivot column not found"); + + ray_op_ext_t* ve = find_ext(g, ext->pivot.value_col->id); + ray_t* vcol = (ve && ve->base.opcode == OP_SCAN) + ? ray_table_get_col(tbl, ve->sym) : NULL; + if (!vcol) return ray_error("domain", "pivot: value column not found"); + + if (nrows == 0) return ray_table_new(0); + + /* Combined keys: index_cols + pivot_col */ + uint8_t n_keys = n_idx + 1; + if (n_keys > 8) return ray_error("limit", "pivot: too many index columns"); + + /* Wide-key resolution: for RAY_GUID the HT slot holds a source row + * index rather than the 16 raw bytes, so phase2 dedupe and emit + * route wide keys through the source column (key_data[k]). */ + bool idx_wide[8] = {0}; + for (uint8_t k = 0; k < n_idx; k++) + idx_wide[k] = (idx_vecs[k]->type == RAY_GUID); + bool pvt_wide = (pcol->type == RAY_GUID); + + void* key_data[8]; + int8_t key_types[8]; + uint8_t key_attrs[8]; + ray_t* key_vecs[8]; + for (uint8_t k = 0; k < n_idx; k++) { + key_data[k] = ray_data(idx_vecs[k]); + key_types[k] = idx_vecs[k]->type; + key_attrs[k] = idx_vecs[k]->attrs; + key_vecs[k] = idx_vecs[k]; + } + key_data[n_idx] = ray_data(pcol); + key_types[n_idx] = pcol->type; + key_attrs[n_idx] = pcol->attrs; + key_vecs[n_idx] = pcol; + + /* Single agg input: value column */ + ray_t* agg_vecs[1] = { vcol }; + uint16_t agg_ops[1] = { agg_op }; + + /* Compute need_flags for the agg op */ + uint8_t need_flags = GHT_NEED_SUM; /* always need sum (used for FIRST/LAST too) */ + if (agg_op == OP_MIN) need_flags |= GHT_NEED_MIN; + if (agg_op == OP_MAX) need_flags |= GHT_NEED_MAX; + + ght_layout_t ly = ght_compute_layout(n_keys, 1, agg_vecs, need_flags, agg_ops, key_types); + + /* Hash-aggregate all rows via the shared radix pipeline — parallel + * across thread-pool workers for n_scan ≥ RAY_PARALLEL_THRESHOLD, + * sequential single-HT for smaller inputs. */ + ray_progress_update("pivot", "hash-aggregate", 0, (uint64_t)nrows); + pivot_ingest_t pg; + if (!pivot_ingest_run(&pg, &ly, key_data, key_types, key_attrs, + key_vecs, agg_vecs, nrows)) { + pivot_ingest_free(&pg); + return ray_error("oom", NULL); + } + ray_progress_update("pivot", "dedupe", 0, (uint64_t)pg.total_grps); + if (ray_interrupted()) { pivot_ingest_free(&pg); return ray_error("cancel", "interrupted"); } + uint32_t grp_count = pg.total_grps; + if (grp_count == 0) { pivot_ingest_free(&pg); return ray_table_new(0); } + + /* Phase 2: Collect distinct pivot values and distinct index keys. + * Each group row layout: [hash:8][key0:8]...[keyN-1:8][null_mask:8][accum...] + * where the keys region holds n_idx index keys + 1 pivot key, + * followed by the key-null bitmap written by group_rows_range. */ + + /* SQL PIVOT treats a null pivot key as "no column" — drop those groups. */ + const uint8_t pvt_null_bit = (uint8_t)(1u << n_idx); + + /* Collect distinct pivot values */ + uint32_t pv_cap = 64, pv_count = 0; + ray_t* pv_hdr = NULL; + int64_t* pv_vals = (int64_t*)scratch_alloc(&pv_hdr, pv_cap * sizeof(int64_t)); + if (!pv_vals) { pivot_ingest_free(&pg); return ray_error("oom", NULL); } + + const char* pvt_base = pvt_wide ? (const char*)key_data[n_idx] : NULL; + for (uint32_t _p = 0; _p < pg.n_parts; _p++) { + group_ht_t* ph = &pg.part_hts[_p]; + uint32_t pcount = ph->grp_count; + for (uint32_t gi_local = 0; gi_local < pcount; gi_local++) { + const char* row = ph->rows + (size_t)gi_local * pg.row_stride; + const int64_t* rkeys = (const int64_t*)(row + 8); + int64_t nmask = rkeys[n_keys]; + if (nmask & pvt_null_bit) continue; + int64_t pval = rkeys[n_idx]; + bool found = false; + for (uint32_t p = 0; p < pv_count; p++) { + if (pvt_wide) { + if (memcmp(pvt_base + (size_t)pv_vals[p] * 16, + pvt_base + (size_t)pval * 16, 16) == 0) { found = true; break; } + } else { + if (pv_vals[p] == pval) { found = true; break; } + } + } + if (!found) { + if (pv_count >= pv_cap) { + uint32_t new_cap = pv_cap * 2; + int64_t* new_pv = (int64_t*)scratch_realloc(&pv_hdr, + pv_cap * sizeof(int64_t), new_cap * sizeof(int64_t)); + if (!new_pv) { pivot_ingest_free(&pg); return ray_error("oom", NULL); } + pv_vals = new_pv; + pv_cap = new_cap; + } + pv_vals[pv_count++] = pval; + } + } + } + + /* Collect distinct index keys. + * Flat append-only entry array + secondary open-addressed HT keyed by + * the hash of (idx_keys + idx_null_mask). The HT makes phase2 dedupe + * O(grp_count) instead of the previous O(grp_count * ix_count) + * linear scan which hung on large pivots. + * Entry layout: [hash:8 | idx_keys:8*n_idx | idx_null_mask:8]. */ + uint32_t ix_cap = 256, ix_count = 0; + ray_t* ix_hdr = NULL; + size_t ix_entry = 8 + (size_t)n_idx * 8 + 8; + const uint8_t idx_null_bits = (uint8_t)((1u << n_idx) - 1u); + char* ix_rows = (char*)scratch_alloc(&ix_hdr, ix_cap * ix_entry); + if (!ix_rows) { scratch_free(pv_hdr); pivot_ingest_free(&pg); return ray_error("oom", NULL); } + + /* Secondary HT: hash slot -> ix_row index; empty = UINT32_MAX. */ + uint32_t ix_ht_cap = 256; + while (ix_ht_cap < (uint32_t)grp_count * 2 && ix_ht_cap < (1u << 30)) ix_ht_cap <<= 1; + ray_t* ix_ht_hdr = NULL; + uint32_t* ix_ht = (uint32_t*)scratch_alloc(&ix_ht_hdr, ix_ht_cap * sizeof(uint32_t)); + if (!ix_ht) { + scratch_free(ix_hdr); scratch_free(pv_hdr); pivot_ingest_free(&pg); + return ray_error("oom", NULL); + } + memset(ix_ht, 0xFF, ix_ht_cap * sizeof(uint32_t)); + uint32_t ix_ht_mask = ix_ht_cap - 1; + + /* Map: group_id -> (ix_row, pv_idx) for result cell placement */ + ray_t* map_hdr = NULL; + uint32_t* grp_ix = (uint32_t*)scratch_alloc(&map_hdr, grp_count * 2 * sizeof(uint32_t)); + if (!grp_ix) { scratch_free(ix_ht_hdr); scratch_free(ix_hdr); scratch_free(pv_hdr); pivot_ingest_free(&pg); return ray_error("oom", NULL); } + uint32_t* grp_pv = grp_ix + grp_count; + + for (uint32_t _p = 0; _p < pg.n_parts; _p++) { + group_ht_t* ph = &pg.part_hts[_p]; + uint32_t pcount = ph->grp_count; + uint32_t gi_base = pg.part_offsets[_p]; + /* Progress tick at each partition boundary — time-gated so + * 256 small partitions do not spam the callback. */ + ray_progress_update(NULL, NULL, gi_base, (uint64_t)grp_count); + for (uint32_t gi_local = 0; gi_local < pcount; gi_local++) { + uint32_t gi = gi_base + gi_local; + const char* row = ph->rows + (size_t)gi_local * pg.row_stride; + const int64_t* keys = (const int64_t*)(row + 8); + int64_t nmask = keys[n_keys]; + if (nmask & pvt_null_bit) { + grp_ix[gi] = UINT32_MAX; + grp_pv[gi] = UINT32_MAX; + continue; + } + int64_t idx_nmask = nmask & idx_null_bits; + + /* Hash index keys only (exclude pivot key) + null mask. + * Wide keys (GUID) resolve actual bytes via key_data[k]. */ + uint64_t ih = 0; + for (uint8_t k = 0; k < n_idx; k++) { + uint64_t kh; + if (idx_wide[k]) { + const char* base = (const char*)key_data[k]; + kh = ray_hash_bytes(base + (size_t)keys[k] * 16, 16); + } else if (key_types[k] == RAY_F64) { + kh = ray_hash_f64(*(const double*)&keys[k]); + } else { + kh = ray_hash_i64(keys[k]); + } + ih = (k == 0) ? kh : ray_hash_combine(ih, kh); + } + if (idx_nmask) ih = ray_hash_combine(ih, ray_hash_i64(idx_nmask)); + + /* Open-addressed HT probe. On match, reuse; else insert. */ + uint32_t ix_row = UINT32_MAX; + uint32_t slot = (uint32_t)(ih & ix_ht_mask); + for (;;) { + uint32_t ent = ix_ht[slot]; + if (ent == UINT32_MAX) break; /* empty → insert below */ + const char* ix_entry_p = ix_rows + (size_t)ent * ix_entry; + if (*(const uint64_t*)ix_entry_p == ih) { + const int64_t* ekeys = (const int64_t*)(ix_entry_p + 8); + bool eq = true; + for (uint8_t k = 0; k < n_idx && eq; k++) { + if (idx_wide[k]) { + const char* base = (const char*)key_data[k]; + eq = (memcmp(base + (size_t)ekeys[k] * 16, + base + (size_t)keys[k] * 16, 16) == 0); + } else { + eq = (ekeys[k] == keys[k]); + } + } + int64_t ent_nmask; + memcpy(&ent_nmask, ix_entry_p + 8 + (size_t)n_idx * 8, 8); + if (eq && ent_nmask == idx_nmask) { ix_row = ent; break; } + } + slot = (slot + 1) & ix_ht_mask; + } + if (ix_row == UINT32_MAX) { + if (ix_count >= ix_cap) { + uint32_t new_cap = ix_cap * 2; + char* new_rows = (char*)scratch_realloc(&ix_hdr, + ix_cap * ix_entry, new_cap * ix_entry); + if (!new_rows) { + scratch_free(map_hdr); scratch_free(ix_ht_hdr); + scratch_free(pv_hdr); pivot_ingest_free(&pg); + return ray_error("oom", NULL); + } + ix_rows = new_rows; + ix_cap = new_cap; + } + ix_row = ix_count++; + char* dst = ix_rows + (size_t)ix_row * ix_entry; + *(uint64_t*)dst = ih; + memcpy(dst + 8, keys, (size_t)n_idx * 8); + memcpy(dst + 8 + (size_t)n_idx * 8, &idx_nmask, 8); + ix_ht[slot] = ix_row; + } + + /* Find pivot column index. For wide pivot keys both slot values + * are source row indices — resolve to actual bytes for compare, + * otherwise duplicate GUID pivot values map to the wrong column. */ + int64_t pval = keys[n_idx]; + uint32_t pv_idx = UINT32_MAX; + for (uint32_t p = 0; p < pv_count; p++) { + if (pvt_wide) { + if (memcmp(pvt_base + (size_t)pv_vals[p] * 16, + pvt_base + (size_t)pval * 16, 16) == 0) { pv_idx = p; break; } + } else { + if (pv_vals[p] == pval) { pv_idx = p; break; } + } + } + + grp_ix[gi] = ix_row; + grp_pv[gi] = pv_idx; + } + } + + /* Phase 3: Build output table */ + ray_progress_update("pivot", "scatter", 0, (uint64_t)pv_count); + bool val_is_f64 = vcol->type == RAY_F64; + int8_t out_agg_type; + switch (agg_op) { + case OP_AVG: out_agg_type = RAY_F64; break; + case OP_COUNT: out_agg_type = RAY_I64; break; + case OP_SUM: out_agg_type = val_is_f64 ? RAY_F64 : RAY_I64; break; + default: out_agg_type = vcol->type; break; + } + + int64_t out_ncols = (int64_t)n_idx + (int64_t)pv_count; + ray_t* result = ray_table_new(out_ncols); + if (!result || RAY_IS_ERR(result)) goto pivot_cleanup; + + /* Index columns */ + for (uint8_t k = 0; k < n_idx; k++) { + ray_t* new_col = col_vec_new(idx_vecs[k], (int64_t)ix_count); + if (!new_col || RAY_IS_ERR(new_col)) { ray_release(result); result = ray_error("oom", NULL); goto pivot_cleanup; } + new_col->len = (int64_t)ix_count; + uint8_t esz = col_esz(idx_vecs[k]); + int8_t kt = idx_vecs[k]->type; + const char* src_base = idx_wide[k] ? (const char*)key_data[k] : NULL; + for (uint32_t r = 0; r < ix_count; r++) { + const char* ix_entry_p = ix_rows + r * ix_entry; + int64_t kv = ((const int64_t*)(ix_entry_p + 8))[k]; + int64_t ent_nmask; + memcpy(&ent_nmask, ix_entry_p + 8 + (size_t)n_idx * 8, 8); + if (ent_nmask & (int64_t)(1u << k)) { + ray_vec_set_null(new_col, (int64_t)r, true); + continue; + } + if (idx_wide[k]) { + /* kv is a source row index; copy the 16 raw bytes. */ + memcpy((char*)ray_data(new_col) + (size_t)r * esz, + src_base + (size_t)kv * 16, 16); + } else if (kt == RAY_F64) { + memcpy((char*)ray_data(new_col) + (size_t)r * esz, &kv, 8); + } else { + write_col_i64(ray_data(new_col), (int64_t)r, kv, kt, new_col->attrs); + } + } + if (idx_vecs[k]->type == RAY_STR) + col_propagate_str_pool(new_col, idx_vecs[k]); + + ray_op_ext_t* ie = find_ext(g, ext->pivot.index_cols[k]->id); + result = ray_table_add_col(result, ie->sym, new_col); + ray_release(new_col); + if (RAY_IS_ERR(result)) goto pivot_cleanup; + } + + /* Value columns — one per distinct pivot value */ + { + int8_t s = ly.agg_val_slot[0]; /* single agg input -> slot 0 */ + for (uint32_t p = 0; p < pv_count; p++) { + ray_t* new_col = (out_agg_type == vcol->type) + ? col_vec_new(vcol, (int64_t)ix_count) + : ray_vec_new(out_agg_type, (int64_t)ix_count); + if (!new_col || RAY_IS_ERR(new_col)) { ray_release(result); result = ray_error("oom", NULL); goto pivot_cleanup; } + new_col->len = (int64_t)ix_count; + + /* Initialize with zero (missing cells get 0) */ + memset(ray_data(new_col), 0, (size_t)ix_count * (out_agg_type == RAY_F64 ? 8 : (size_t)col_esz(new_col))); + + for (uint32_t _pp = 0; _pp < pg.n_parts; _pp++) { + group_ht_t* ph = &pg.part_hts[_pp]; + uint32_t pcount = ph->grp_count; + uint32_t gi_base = pg.part_offsets[_pp]; + for (uint32_t gi_local = 0; gi_local < pcount; gi_local++) { + uint32_t gi = gi_base + gi_local; + if (grp_pv[gi] != p) continue; + uint32_t r = grp_ix[gi]; + const char* row = ph->rows + (size_t)gi_local * pg.row_stride; + int64_t cnt = *(const int64_t*)(const void*)row; + + if (out_agg_type == RAY_F64) { + double v; + switch (agg_op) { + case OP_SUM: + v = val_is_f64 ? ROW_RD_F64(row, ly.off_sum, s) + : (double)ROW_RD_I64(row, ly.off_sum, s); + break; + case OP_AVG: + v = val_is_f64 ? ROW_RD_F64(row, ly.off_sum, s) / cnt + : (double)ROW_RD_I64(row, ly.off_sum, s) / cnt; + break; + case OP_MIN: + v = val_is_f64 ? ROW_RD_F64(row, ly.off_min, s) + : (double)ROW_RD_I64(row, ly.off_min, s); + break; + case OP_MAX: + v = val_is_f64 ? ROW_RD_F64(row, ly.off_max, s) + : (double)ROW_RD_I64(row, ly.off_max, s); + break; + case OP_FIRST: case OP_LAST: + v = val_is_f64 ? ROW_RD_F64(row, ly.off_sum, s) + : (double)ROW_RD_I64(row, ly.off_sum, s); + break; + default: v = 0.0; break; + } + ((double*)ray_data(new_col))[r] = v; + } else { + int64_t v; + switch (agg_op) { + case OP_SUM: v = ROW_RD_I64(row, ly.off_sum, s); break; + case OP_COUNT: v = cnt; break; + case OP_MIN: v = ROW_RD_I64(row, ly.off_min, s); break; + case OP_MAX: v = ROW_RD_I64(row, ly.off_max, s); break; + case OP_FIRST: case OP_LAST: v = ROW_RD_I64(row, ly.off_sum, s); break; + default: v = 0; break; + } + write_col_i64(ray_data(new_col), (int64_t)r, v, out_agg_type, new_col->attrs); + } + } + } + + /* Column name from pivot value — match pivot_val_to_sym semantics */ + int64_t pval = pv_vals[p]; + int64_t col_sym; + if (pcol->type == RAY_SYM) { + col_sym = pval; + } else if (pvt_wide) { + /* GUID: format 16 bytes as xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx. + * pval is a source row index into pvt_base. */ + static const char hex[] = "0123456789abcdef"; + static const int groups[] = {4, 2, 2, 2, 6}; + char buf[37]; + const uint8_t* bytes = (const uint8_t*)pvt_base + (size_t)pval * 16; + int pos = 0, bpos = 0; + for (int g = 0; g < 5; g++) { + if (g > 0) buf[bpos++] = '-'; + for (int j = 0; j < groups[g]; j++) { + buf[bpos++] = hex[bytes[pos] >> 4]; + buf[bpos++] = hex[bytes[pos] & 0x0F]; + pos++; + } + } + col_sym = ray_sym_intern(buf, (size_t)bpos); + } else { + char buf[128]; + int len = 0; + int8_t pt = key_types[n_idx]; + if (pt == RAY_F64) { + double fv; + memcpy(&fv, &pval, 8); + if (fv == 0.0 && signbit(fv)) fv = 0.0; + len = snprintf(buf, sizeof(buf), "%g", fv); + } else if (pt == RAY_BOOL) { + len = snprintf(buf, sizeof(buf), "%s", pval ? "true" : "false"); + } else if (pt == RAY_I64 || pt == RAY_I32 || pt == RAY_I16 || + pt == RAY_DATE || pt == RAY_TIME || pt == RAY_TIMESTAMP) { + len = snprintf(buf, sizeof(buf), "%ld", (long)pval); + } else { + len = snprintf(buf, sizeof(buf), "col%ld", (long)pval); + } + col_sym = ray_sym_intern(buf, (size_t)len); + } + + result = ray_table_add_col(result, col_sym, new_col); + ray_release(new_col); + if (RAY_IS_ERR(result)) goto pivot_cleanup; + } + } + +pivot_cleanup: + scratch_free(map_hdr); + scratch_free(ix_ht_hdr); + scratch_free(ix_hdr); + scratch_free(pv_hdr); + pivot_ingest_free(&pg); + return result; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/plan.c b/crates/rayforce-sys/vendor/rayforce/src/ops/plan.c new file mode 100644 index 0000000..ba92a5f --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/plan.c @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "plan.h" + +/* -------------------------------------------------------------------------- + * Plan: linearize DAG into execution order + * + * For now, the executor recursively evaluates nodes (exec.c). This file + * is a placeholder for future topological sort + pipeline planning. + * -------------------------------------------------------------------------- */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/plan.h b/crates/rayforce-sys/vendor/rayforce/src/ops/plan.h new file mode 100644 index 0000000..0d0edfc --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/plan.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_PLAN_H +#define RAY_PLAN_H + +#include "ops.h" + +#endif /* RAY_PLAN_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/query.c b/crates/rayforce-sys/vendor/rayforce/src/ops/query.c new file mode 100644 index 0000000..17aaf0f --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/query.c @@ -0,0 +1,6329 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** Query bridge: select, update, insert, upsert, join operations. + * Extracted from eval.c. + */ + +#include "lang/internal.h" +#include "lang/eval.h" +#include "lang/env.h" +#include "ops/ops.h" +#include "ops/internal.h" +#include "ops/hash.h" +#include "ops/temporal.h" +#include "table/sym.h" +#include "table/dict.h" +#include "mem/sys.h" + +#include +#include +#include + +/* ══════════════════════════════════════════ + * Select query — DAG bridge + * ══════════════════════════════════════════ */ + +/* Helper: look up a key in a select-clause dict (RAY_DICT). + * Returns the value expression (unevaluated), or NULL if not found. */ +static ray_t* dict_get(ray_t* dict, const char* key) { + if (!dict || dict->type != RAY_DICT) return NULL; + int64_t key_id = ray_sym_intern(key, strlen(key)); + return ray_dict_probe_sym_borrowed(dict, key_id); +} + +/* Flatten a RAY_DICT (keys SYM vec + vals LIST) into a transient + * [k0,v0,k1,v1,...] array view so the existing dict-walking loops in + * ray_select_fn et al. can iterate without rewriting every site. + * + * Caller passes stack-local buffers sized at DICT_VIEW_MAX. If the dict + * has more pairs than fits, sets `*out_n = -1` to flag overflow — every + * call site checks this and returns a "domain" error rather than letting + * the writes spill past the buffers. The previous version of this helper + * had no such guard and silently corrupted the stack on user-controlled + * dicts with > 64 pairs. + * + * `key_atoms` must hold at least DICT_VIEW_MAX entries; `out_elems` at + * least 2 * DICT_VIEW_MAX. Keys are synthesized as -RAY_SYM atoms in + * `key_atoms`; values are borrowed from the dict's vals list. */ +#define DICT_VIEW_MAX 256 +static void dict_pair_view(ray_t* d, ray_t* key_atoms, ray_t** out_elems, int64_t* out_n) { + *out_n = 0; + if (!d || d->type != RAY_DICT) return; + ray_t* keys = ray_dict_keys(d); + ray_t* vals = ray_dict_vals(d); + if (!keys || keys->type != RAY_SYM || !vals || vals->type != RAY_LIST) return; + int64_t n = keys->len; + if (n > DICT_VIEW_MAX) { *out_n = -1; return; } + void* kbase = ray_data(keys); + ray_t** vptrs = (ray_t**)ray_data(vals); + for (int64_t i = 0; i < n; i++) { + memset(&key_atoms[i], 0, sizeof(ray_t)); + key_atoms[i].type = -RAY_SYM; + key_atoms[i].i64 = ray_read_sym(kbase, i, RAY_SYM, keys->attrs); + out_elems[i*2] = &key_atoms[i]; + out_elems[i*2+1] = vptrs[i]; + } + *out_n = 2 * n; +} + +#define DICT_VIEW_DECL(name) \ + ray_t name##_keybuf[DICT_VIEW_MAX]; \ + ray_t* name[DICT_VIEW_MAX * 2]; \ + int64_t name##_n +#define DICT_VIEW_OPEN(d, name) \ + dict_pair_view((d), name##_keybuf, name, &name##_n) +/* Returns true if the open exceeded DICT_VIEW_MAX — caller should + * `ray_release(tbl); return ray_error("domain", "clause too big");`. */ +#define DICT_VIEW_OVERFLOW(name) ((name##_n) < 0) + +/* Convert a RAY_DICT (keys, vals) into a transient interleaved + * [k0_atom, v0, k1_atom, v1, …] RAY_LIST. Used by select's group-by + * aggregation paths which were written for the old in-place pair-array + * representation of grouping output. Returns an owned RAY_LIST (rc=1). + * Atom keys are freshly boxed for typed-vector key columns (sym, i64, + * etc.); for RAY_LIST keys they are retained borrows. */ +static ray_t* groups_to_pair_list(ray_t* d) { + if (!d || d->type != RAY_DICT) return ray_error("type", NULL); + ray_t* keys = ray_dict_keys(d); + ray_t* vals = ray_dict_vals(d); + int64_t n = keys ? keys->len : 0; + ray_t* out = ray_list_new(n * 2); + if (!out || RAY_IS_ERR(out)) return out ? out : ray_error("oom", NULL); + ray_t** vptrs = (vals && vals->type == RAY_LIST) ? (ray_t**)ray_data(vals) : NULL; + for (int64_t i = 0; i < n; i++) { + ray_t* k = NULL; + if (!keys) { + k = NULL; + } else if (keys->type == RAY_LIST) { + k = ((ray_t**)ray_data(keys))[i]; + if (k) ray_retain(k); + } else { + void* base = ray_data(keys); + switch (keys->type) { + case RAY_SYM: k = ray_sym(ray_read_sym(base, i, RAY_SYM, keys->attrs)); break; + case RAY_I64: + case RAY_TIMESTAMP: k = ray_i64(((int64_t*)base)[i]); break; + case RAY_I32: + case RAY_DATE: + case RAY_TIME: k = ray_i32(((int32_t*)base)[i]); break; + case RAY_I16: k = ray_i16(((int16_t*)base)[i]); break; + case RAY_BOOL: + case RAY_U8: k = ray_u8(((uint8_t*)base)[i]); break; + case RAY_F64: k = ray_f64(((double*)base)[i]); break; + case RAY_STR: { size_t sl = 0; const char* sp = ray_str_vec_get(keys, i, &sl); + k = ray_str(sp ? sp : "", sp ? sl : 0); break; } + case RAY_GUID: k = ray_guid(((uint8_t*)base) + i * 16); break; + default: k = NULL; break; + } + } + out = ray_list_append(out, k); + if (k) ray_release(k); + ray_t* v = vptrs ? vptrs[i] : NULL; + out = ray_list_append(out, v); + } + return out; +} + +/* Map a Rayfall builtin name to a DAG binary op constructor */ +typedef ray_op_t* (*dag_binary_ctor)(ray_graph_t*, ray_op_t*, ray_op_t*); +typedef ray_op_t* (*dag_unary_ctor)(ray_graph_t*, ray_op_t*); + +static dag_binary_ctor resolve_binary_dag(int64_t sym_id) { + ray_t* s = ray_sym_str(sym_id); + if (!s) return NULL; + const char* name = ray_str_ptr(s); + size_t len = ray_str_len(s); + if (len == 1) { + switch (name[0]) { + case '+': return ray_add; + case '-': return ray_sub; + case '*': return ray_mul; + case '/': return ray_div; + case '%': return ray_mod; + case '>': return ray_gt; + case '<': return ray_lt; + } + } else if (len == 2) { + if (name[0] == '>' && name[1] == '=') return ray_ge; + if (name[0] == '<' && name[1] == '=') return ray_le; + if (name[0] == '=' && name[1] == '=') return ray_eq; + if (name[0] == '!' && name[1] == '=') return ray_ne; + if (name[0] == 'o' && name[1] == 'r') return ray_or; + if (name[0] == 'i' && name[1] == 'n') return ray_in; + } else if (len == 3) { + if (memcmp(name, "and", 3) == 0) return ray_and; + } else if (len == 4) { + if (memcmp(name, "like", 4) == 0) return ray_like; + } else if (len == 5) { + if (memcmp(name, "ilike", 5) == 0) return ray_ilike; + } else if (len == 6) { + if (memcmp(name, "not-in", 6) == 0) return ray_not_in; + } + return NULL; +} + +static dag_unary_ctor resolve_unary_dag(int64_t sym_id) { + ray_t* s = ray_sym_str(sym_id); + if (!s) return NULL; + const char* name = ray_str_ptr(s); + size_t len = ray_str_len(s); + if (len == 3) { + if (memcmp(name, "neg", 3) == 0) return ray_neg; + if (memcmp(name, "not", 3) == 0) return ray_not; + if (memcmp(name, "abs", 3) == 0) return ray_abs; + if (memcmp(name, "exp", 3) == 0) return ray_exp_op; + if (memcmp(name, "log", 3) == 0) return ray_log_op; + } else if (len == 4) { + if (memcmp(name, "ceil", 4) == 0) return ray_ceil_op; + if (memcmp(name, "sqrt", 4) == 0) return ray_sqrt_op; + if (memcmp(name, "trim", 4) == 0) return ray_trim_op; + } else if (len == 5) { + if (memcmp(name, "floor", 5) == 0) return ray_floor_op; + if (memcmp(name, "round", 5) == 0) return ray_round_op; + if (memcmp(name, "upper", 5) == 0) return ray_upper; + if (memcmp(name, "lower", 5) == 0) return ray_lower; + } else if (len == 6) { + if (memcmp(name, "strlen", 6) == 0) return ray_strlen; + } + /* NOTE: no DAG wiring for nil?/isnull yet. The eval-level + * builtin `nil?` (src/lang/eval.c:2029) is atom-only — it + * returns false when applied to a column vec. OP_ISNULL in + * the DAG is per-element. Wiring `nil?` here would diverge + * from the eval fallback. A proper pass should first add an + * element-wise null-check builtin at eval level, then map it + * here. */ + return NULL; +} + +/* Map Rayfall aggregation name to DAG opcode */ +static uint16_t resolve_agg_opcode(int64_t sym_id) { + ray_t* s = ray_sym_str(sym_id); + if (!s) return 0; + const char* name = ray_str_ptr(s); + size_t len = ray_str_len(s); + if (len == 3 && memcmp(name, "sum", 3) == 0) return OP_SUM; + if (len == 3 && memcmp(name, "avg", 3) == 0) return OP_AVG; + if (len == 3 && memcmp(name, "min", 3) == 0) return OP_MIN; + if (len == 3 && memcmp(name, "max", 3) == 0) return OP_MAX; + if (len == 3 && memcmp(name, "dev", 3) == 0) return OP_STDDEV; + if (len == 3 && memcmp(name, "var", 3) == 0) return OP_VAR; + if (len == 4 && memcmp(name, "prod", 4) == 0) return OP_PROD; + if (len == 4 && memcmp(name, "last", 4) == 0) return OP_LAST; + if (len == 5 && memcmp(name, "count", 5) == 0) return OP_COUNT; + if (len == 5 && memcmp(name, "first", 5) == 0) return OP_FIRST; + if (len == 6 && memcmp(name, "stddev",6) == 0) return OP_STDDEV; + if (len == 7 && memcmp(name, "dev_pop", 7) == 0) return OP_STDDEV_POP; + if (len == 7 && memcmp(name, "var_pop", 7) == 0) return OP_VAR_POP; + if (len == 10 && memcmp(name, "stddev_pop", 10) == 0) return OP_STDDEV_POP; + return 0; +} + +/* Apply sort (asc/desc) and take clauses to a materialized result table. + * Used by eval-level paths that bypass the DAG (e.g., LIST/STR group keys). + * Builds a temporary DAG for sorting (supports per-column direction flags) + * and applies take via ray_head/ray_tail or ray_take_fn. */ +static ray_t* apply_sort_take(ray_t* result, ray_t** dict_elems, int64_t dict_n, + int64_t asc_id, int64_t desc_id, int64_t take_id) { + if (!result || RAY_IS_ERR(result)) return result; + + /* Check for sort/take clauses */ + bool has_sort = false; + ray_t* take_val_expr = NULL; + for (int64_t i = 0; i + 1 < dict_n; i += 2) { + int64_t kid = dict_elems[i]->i64; + if (kid == asc_id || kid == desc_id) has_sort = true; + if (kid == take_id) take_val_expr = dict_elems[i + 1]; + } + if (!has_sort && !take_val_expr) return result; + + /* Build temporary DAG on the materialized result */ + ray_graph_t* g = ray_graph_new(result); + if (!g) return result; + ray_op_t* root = ray_const_table(g, result); + + /* Sort */ + if (has_sort) { + ray_op_t* sort_keys[16]; + uint8_t sort_descs[16]; + uint8_t n_sort = 0; + for (int64_t i = 0; i + 1 < dict_n && n_sort < 16; i += 2) { + int64_t kid = dict_elems[i]->i64; + uint8_t is_desc = 0; + if (kid == asc_id) is_desc = 0; + else if (kid == desc_id) is_desc = 1; + else continue; + ray_t* val = dict_elems[i + 1]; + if (val->type == -RAY_SYM) { + ray_t* s = ray_sym_str(val->i64); + sort_keys[n_sort] = ray_scan(g, ray_str_ptr(s)); + sort_descs[n_sort] = is_desc; + n_sort++; + } else if (ray_is_vec(val) && val->type == RAY_SYM) { + for (int64_t c = 0; c < val->len && n_sort < 16; c++) { + int64_t sid = ray_read_sym(ray_data(val), c, val->type, val->attrs); + ray_t* s = ray_sym_str(sid); + sort_keys[n_sort] = ray_scan(g, ray_str_ptr(s)); + sort_descs[n_sort] = is_desc; + n_sort++; + } + } + } + if (n_sort > 0) + root = ray_sort_op(g, root, sort_keys, sort_descs, NULL, n_sort); + } + + /* Take: avoid the DAG ray_head/ray_tail op — it can't handle + * tables with LIST columns (from non-agg scatter). Use + * ray_take_fn, but convert the atom form into a `[start amount]` + * range so we get CLAMP semantics (group-by take), + * not the wrap/pad behavior of atom-n take on a short table. */ + ray_t* take_range = NULL; /* [start amount] literal form */ + int take_is_atom = 0; + int64_t atom_n = 0; + if (take_val_expr) { + ray_t* tv = ray_eval(take_val_expr); + if (!tv || RAY_IS_ERR(tv)) { + ray_graph_free(g); ray_release(result); + return tv ? tv : ray_error("domain", NULL); + } + if (ray_is_atom(tv) && (tv->type == -RAY_I64 || tv->type == -RAY_I32)) { + atom_n = (tv->type == -RAY_I64) ? tv->i64 : tv->i32; + take_is_atom = 1; + ray_release(tv); + } else if (ray_is_vec(tv) && (tv->type == RAY_I64 || tv->type == RAY_I32) && tv->len == 2) { + take_range = tv; + } else { + ray_release(tv); ray_graph_free(g); ray_release(result); + return ray_error("domain", NULL); + } + } + + root = ray_optimize(g, root); + ray_t* sorted = ray_execute(g, root); + ray_graph_free(g); + ray_release(result); + + if (take_is_atom && sorted && !RAY_IS_ERR(sorted)) { + /* Build [start, amount] so ray_take_fn uses its range + * branch, which clamps to the available length. */ + int64_t nrows = (sorted->type == RAY_TABLE) + ? ray_table_nrows(sorted) + : (ray_is_vec(sorted) ? sorted->len : 0); + int64_t start, amount; + if (atom_n >= 0) { + start = 0; + amount = atom_n < nrows ? atom_n : nrows; + } else { + int64_t want = -atom_n; + amount = want < nrows ? want : nrows; + start = nrows - amount; + } + ray_t* rng = ray_vec_new(RAY_I64, 2); + if (!rng || RAY_IS_ERR(rng)) { + ray_release(sorted); + return rng ? rng : ray_error("oom", NULL); + } + ((int64_t*)ray_data(rng))[0] = start; + ((int64_t*)ray_data(rng))[1] = amount; + rng->len = 2; + ray_t* sliced = ray_take_fn(sorted, rng); + ray_release(sorted); + ray_release(rng); + return sliced; + } + if (take_range && sorted && !RAY_IS_ERR(sorted)) { + ray_t* sliced = ray_take_fn(sorted, take_range); + ray_release(sorted); + ray_release(take_range); + return sliced; + } + if (take_range) ray_release(take_range); + return sorted; +} + +/* -------------------------------------------------------------------------- + * Compile-time local env helpers for lambda / let inlining. + * + * compile_expr_dag hangs a small stack of {formal_sym_id → node_id} + * bindings on the graph. When the recursive walker encounters a + * name reference, it checks the env first; if the name is bound, + * return &g->nodes[node_id] — otherwise fall through to ray_scan. + * + * Store IDs, not pointers: g->nodes is a dynamically-resized array, + * and any realloc between push and lookup would dangle stored + * pointers. IDs are stable across reallocs; we re-resolve + * &g->nodes[id] on every lookup. + * + * Shadowing is automatic: nested lambda / let pushes appear later in + * the stack, and cexpr_env_lookup walks top-down so the innermost + * binding wins. Pops are counted — never partial rewinds. + * + * No retain/release: op nodes live in g->nodes and are freed + * uniformly by ray_graph_free. + * -------------------------------------------------------------------------- */ +static ray_op_t* cexpr_env_lookup(ray_graph_t* g, int64_t sym) { + for (int i = g->cexpr_env_top - 1; i >= 0; i--) + if (g->cexpr_env[i].sym == sym) + return &g->nodes[g->cexpr_env[i].node_id]; + return NULL; +} + +static bool cexpr_env_push(ray_graph_t* g, int64_t sym, ray_op_t* node) { + if (g->cexpr_env_top >= 32) return false; + g->cexpr_env[g->cexpr_env_top].sym = sym; + g->cexpr_env[g->cexpr_env_top].node_id = node->id; + g->cexpr_env_top++; + return true; +} + +static void cexpr_env_pop(ray_graph_t* g, int n) { + g->cexpr_env_top -= n; + if (g->cexpr_env_top < 0) g->cexpr_env_top = 0; /* defensive */ +} + +/* Re-resolve a ray_op_t* by its stable node ID. Use this whenever + * a pointer to an op node has been held across another DAG-building + * call (which may grow g->nodes via graph_alloc_node and invalidate + * all previously-returned pointers). The ID is stable; only the + * backing address may change. */ + +/* Compile a Rayfall AST expression into a DAG node */ +static ray_op_t* compile_expr_dag(ray_graph_t* g, ray_t* expr) { + if (!expr) return NULL; + + /* Atom literal → const node. Handle non-null scalar literals + * via the dedicated ctors that carry just the raw value; typed + * null atoms (e.g. `0Nl`, `0Nf`) must go through ray_const_atom + * so the null flag in atom->nullmap rides along — otherwise + * downstream comparisons lose the null-ness and fall back to + * sentinel-value equality. */ + if (expr->type == -RAY_I64 && !RAY_ATOM_IS_NULL(expr)) + return ray_const_i64(g, expr->i64); + if (expr->type == -RAY_F64 && !RAY_ATOM_IS_NULL(expr)) + return ray_const_f64(g, expr->f64); + if (expr->type == -RAY_BOOL && !RAY_ATOM_IS_NULL(expr)) + return ray_const_bool(g, expr->b8); + if (expr->type == -RAY_STR && !RAY_ATOM_IS_NULL(expr)) { + const char *ptr = ray_str_ptr(expr); + size_t len = ray_str_len(expr); + return ray_const_str(g, ptr, len); + } + + /* Name reference → local env first, then column scan, then + * global env (for set-bound constants). Local env holds lambda + * / let bindings and takes precedence so formals shadow columns + * naturally. Global env is a last resort — it catches cases + * like `(set threshold 50)` used inside a lambda body. */ + if (expr->type == -RAY_SYM && (expr->attrs & RAY_ATTR_NAME)) { + ray_op_t* bound = cexpr_env_lookup(g, expr->i64); + if (bound) return bound; + ray_t* s = ray_sym_str(expr->i64); + if (!s) return NULL; + + /* Dotted name — desugar at compile time by walking the + * segments: emit a scan for the head column, then for each + * subsequent segment look up the name's registered DAG-level + * emitter and chain it. `col.ss` → scan(col) → extract(SS), + * `col.date` → scan(col) → date_trunc(DAY), etc. Segment + * resolution uses the same name table as the runtime + * `(ss col)` form, so adding a new accessor means registering + * one unary builtin (temporal or otherwise) — no bespoke sym + * → field map here. Anything we can't lower returns NULL + * (compile error), avoiding the old crash path where unknown + * dotted names became scans of non-existent columns. */ + if (ray_sym_is_dotted(expr->i64)) { + const int64_t* segs; + int nsegs = ray_sym_segs(expr->i64, &segs); + if (nsegs < 2) return NULL; + if (!g->table || g->table->type != RAY_TABLE) return NULL; + if (!ray_table_get_col(g->table, segs[0])) return NULL; + ray_t* head_name = ray_sym_str(segs[0]); + if (!head_name) return NULL; + ray_op_t* op = ray_scan(g, ray_str_ptr(head_name)); + if (!op) return NULL; + for (int i = 1; i < nsegs; i++) { + int field = ray_temporal_field_from_sym(segs[i]); + if (field >= 0) { + op = ray_extract(g, op, field); + if (!op) return NULL; + continue; + } + int trunc_kind = ray_temporal_trunc_from_sym(segs[i]); + if (trunc_kind >= 0) { + op = ray_date_trunc(g, op, trunc_kind); + if (!op) return NULL; + continue; + } + return NULL; + } + return op; + } + + /* Column names on the bound table shadow global env — + * matches eval-level name-resolution order. */ + if (g->table && g->table->type == RAY_TABLE && + ray_table_get_col(g->table, expr->i64)) + return ray_scan(g, ray_str_ptr(s)); + /* Global env: atom literals / typed vectors compile as + * const nodes. Lambdas only make sense as call heads + * and are handled in the list branch below. */ + ray_t* gv = ray_env_get(expr->i64); + if (gv) { + if (ray_is_atom(gv)) return ray_const_atom(g, gv); + if (ray_is_vec(gv)) return ray_const_vec(g, gv); + } + /* Unknown name — let ray_scan produce a column-not-found + * error at exec time, matching prior behavior. */ + return ray_scan(g, ray_str_ptr(s)); + } + + /* Symbol literal (no RAY_ATTR_NAME) → const atom node. */ + if (expr->type == -RAY_SYM) + return ray_const_atom(g, expr); + + /* Other atom literal types → const atom node. Also falls + * through to here for typed null I64/F64/BOOL/STR atoms + * (which the fast-path branches above rejected via + * RAY_ATOM_IS_NULL). */ + if (ray_is_atom(expr) && !(expr->attrs & RAY_ATTR_NAME)) + return ray_const_atom(g, expr); + + /* Typed-vector literal (e.g. [1 2 3], [AAPL MSFT], ["a" "b"]) → + * const vector node. ray_const_vec already stores any ray_t* + * vec in ext->literal, and the OP_CONST executor returns it + * directly — so this unlocks every typed literal vector as a + * DAG operand (crucial for OP_IN set operands). */ + if (ray_is_vec(expr) && !(expr->attrs & RAY_ATTR_NAME)) + return ray_const_vec(g, expr); + + /* List → function call: (fn arg1 arg2 ...) */ + if (expr->type == RAY_LIST) { + int64_t n = ray_len(expr); + if (n == 0) return NULL; + ray_t** elems = (ray_t**)ray_data(expr); + ray_t* head = elems[0]; + + /* Lambda invocation: `((fn [formals] body) a1 a2 …)`. + * β-reduce at the DAG-node level — compile each actual + * arg into its own op (in the current env), push the + * {formal_i → actual_op_i} frame, recurse into the body + * (which reads the env via cexpr_env_lookup when it hits + * a name reference), then pop. Sub-expression sharing is + * automatic: multiple uses of a formal all resolve to the + * single compiled actual op. */ + if (head->type == RAY_LIST) { + int64_t hn = ray_len(head); + if (hn != 3) return NULL; + ray_t** hel = (ray_t**)ray_data(head); + if (hel[0]->type != -RAY_SYM) return NULL; + ray_t* hname_str = ray_sym_str(hel[0]->i64); + if (!hname_str || ray_str_len(hname_str) != 2 || + memcmp(ray_str_ptr(hname_str), "fn", 2) != 0) return NULL; + + ray_t* formals = hel[1]; + ray_t* body = hel[2]; + if (!ray_is_vec(formals) || formals->type != RAY_SYM) return NULL; + int64_t nf = formals->len; + if (n - 1 != nf) return NULL; /* arity mismatch */ + if (nf > 16) return NULL; /* too many formals */ + if (g->cexpr_env_top + (int)nf > 32) return NULL; /* env overflow */ + + /* Compile actuals in the CURRENT env, before pushing. + * Snapshot IDs, not pointers — g->nodes can realloc + * between successive compile_expr_dag calls so any + * raw ray_op_t* saved from an earlier iteration may + * dangle by the time we push it. */ + uint32_t actual_ids[16]; + for (int64_t i = 0; i < nf; i++) { + ray_op_t* a = compile_expr_dag(g, elems[i + 1]); + if (!a) return NULL; + actual_ids[i] = a->id; + } + int64_t* fids = (int64_t*)ray_data(formals); + int pushed = 0; + for (int64_t i = 0; i < nf; i++) { + if (g->cexpr_env_top >= 32) { + cexpr_env_pop(g, pushed); + return NULL; + } + g->cexpr_env[g->cexpr_env_top].sym = fids[i]; + g->cexpr_env[g->cexpr_env_top].node_id = actual_ids[i]; + g->cexpr_env_top++; + pushed++; + } + ray_op_t* result = compile_expr_dag(g, body); + cexpr_env_pop(g, pushed); + return result; + } + + /* Named-lambda call: `(f a1 a2 …)` where `f` is globally + * bound to a RAY_LAMBDA with a single-expression body. + * Inline exactly like the literal `((fn …) …)` case. + * Shadowing order matches the value-position name-ref + * branch: local cexpr_env > table columns > globals. A + * column named `f` isn't callable, but we still must honor + * shadowing so the exec-time error is consistent. */ + if (head->type == -RAY_SYM && (head->attrs & RAY_ATTR_NAME) && + cexpr_env_lookup(g, head->i64) == NULL && + !(g->table && g->table->type == RAY_TABLE && + ray_table_get_col(g->table, head->i64))) { + ray_t* gv = ray_env_get(head->i64); + if (gv && gv->type == RAY_LAMBDA) { + ray_t* formals = LAMBDA_PARAMS(gv); + ray_t* body_lst = LAMBDA_BODY(gv); + if (formals && body_lst && body_lst->type == RAY_LIST && + ray_len(body_lst) == 1 && + ray_is_vec(formals) && formals->type == RAY_SYM) { + int64_t nf = formals->len; + if (n - 1 == nf && nf <= 16 && + g->cexpr_env_top + (int)nf <= 32) { + ray_t* body = ((ray_t**)ray_data(body_lst))[0]; + uint32_t actual_ids[16]; + for (int64_t i = 0; i < nf; i++) { + ray_op_t* a = compile_expr_dag(g, elems[i + 1]); + if (!a) return NULL; + actual_ids[i] = a->id; + } + int64_t* fids = (int64_t*)ray_data(formals); + int pushed = 0; + for (int64_t i = 0; i < nf; i++) { + g->cexpr_env[g->cexpr_env_top].sym = fids[i]; + g->cexpr_env[g->cexpr_env_top].node_id = actual_ids[i]; + g->cexpr_env_top++; + pushed++; + } + ray_op_t* result = compile_expr_dag(g, body); + cexpr_env_pop(g, pushed); + return result; + } + } + } + } + + /* Head must be a name referencing a builtin */ + if (head->type != -RAY_SYM) return NULL; + int64_t fn_sym = head->i64; + + /* Check for xbar */ + ray_t* fn_name_str = ray_sym_str(fn_sym); + const char* fname = fn_name_str ? ray_str_ptr(fn_name_str) : NULL; + size_t fname_len = fn_name_str ? ray_str_len(fn_name_str) : 0; + + if (fname_len == 4 && memcmp(fname, "xbar", 4) == 0) { + if (n != 3) return NULL; + ray_op_t* col = compile_expr_dag(g, elems[1]); + if (!col) return NULL; + uint32_t col_id = col->id; + ray_op_t* bucket = compile_expr_dag(g, elems[2]); + if (!bucket) return NULL; + col = &g->nodes[col_id]; + /* xbar(x, b) = x - (x % b) (stays in integer domain) */ + ray_op_t* m = ray_mod(g, col, bucket); + if (!m) return NULL; + col = &g->nodes[col_id]; + return ray_sub(g, col, m); + } + + /* (if cond then else) — 4 elements (fn + 3 args). Compiles + * to OP_IF which is supported by the element-wise fusion + * pipeline. */ + if (fname_len == 2 && memcmp(fname, "if", 2) == 0) { + if (n != 4) return NULL; + ray_op_t* c = compile_expr_dag(g, elems[1]); + if (!c) return NULL; + uint32_t c_id = c->id; + ray_op_t* t = compile_expr_dag(g, elems[2]); + if (!t) return NULL; + uint32_t t_id = t->id; + ray_op_t* e = compile_expr_dag(g, elems[3]); + if (!e) return NULL; + c = &g->nodes[c_id]; + t = &g->nodes[t_id]; + return ray_if(g, c, t, e); + } + + /* (substr str start len) — 4 elements. */ + if (fname_len == 6 && memcmp(fname, "substr", 6) == 0) { + if (n != 4) return NULL; + ray_op_t* str = compile_expr_dag(g, elems[1]); + if (!str) return NULL; + uint32_t str_id = str->id; + ray_op_t* start = compile_expr_dag(g, elems[2]); + if (!start) return NULL; + uint32_t start_id = start->id; + ray_op_t* ln = compile_expr_dag(g, elems[3]); + if (!ln) return NULL; + str = &g->nodes[str_id]; + start = &g->nodes[start_id]; + return ray_substr(g, str, start, ln); + } + + /* (replace str from to) — 4 elements. */ + if (fname_len == 7 && memcmp(fname, "replace", 7) == 0) { + if (n != 4) return NULL; + ray_op_t* str = compile_expr_dag(g, elems[1]); + if (!str) return NULL; + uint32_t str_id = str->id; + ray_op_t* from = compile_expr_dag(g, elems[2]); + if (!from) return NULL; + uint32_t from_id = from->id; + ray_op_t* to = compile_expr_dag(g, elems[3]); + if (!to) return NULL; + str = &g->nodes[str_id]; + from = &g->nodes[from_id]; + return ray_replace(g, str, from, to); + } + + /* (concat a b ...) — variadic string concat. */ + if (fname_len == 6 && memcmp(fname, "concat", 6) == 0) { + if (n < 2 || n - 1 > 16) return NULL; + uint32_t arg_ids[16]; + for (int64_t i = 1; i < n; i++) { + ray_op_t* a = compile_expr_dag(g, elems[i]); + if (!a) return NULL; + arg_ids[i - 1] = a->id; + } + ray_op_t* args[16]; + for (int64_t i = 0; i < n - 1; i++) + args[i] = &g->nodes[arg_ids[i]]; + return ray_concat(g, args, (int)(n - 1)); + } + + /* (as 'TYPE col) — cast. The type is a sym literal like 'I64 / 'F64. */ + if (fname_len == 2 && memcmp(fname, "as", 2) == 0) { + if (n != 3) return NULL; + ray_t* type_expr = elems[1]; + if (type_expr->type != -RAY_SYM) return NULL; + int8_t tgt = -1; + ray_t* ts = ray_sym_str(type_expr->i64); + if (ts) { + const char* tn = ray_str_ptr(ts); + size_t tl = ray_str_len(ts); + if (tl == 3 && memcmp(tn, "I64", 3) == 0) tgt = RAY_I64; + else if (tl == 3 && memcmp(tn, "F64", 3) == 0) tgt = RAY_F64; + else if (tl == 3 && memcmp(tn, "I32", 3) == 0) tgt = RAY_I32; + else if (tl == 3 && memcmp(tn, "I16", 3) == 0) tgt = RAY_I16; + else if (tl == 3 && memcmp(tn, "F32", 3) == 0) tgt = RAY_F32; + else if (tl == 2 && memcmp(tn, "U8", 2) == 0) tgt = RAY_U8; + else if (tl == 4 && memcmp(tn, "BOOL", 4) == 0) tgt = RAY_BOOL; + } + if (tgt < 0) return NULL; + ray_op_t* col = compile_expr_dag(g, elems[2]); + if (!col) return NULL; + return ray_cast(g, col, tgt); + } + + /* Temporal extract: (year col), (month col), (day col), ... */ + if (n == 2) { + int64_t field = -1; + if (fname_len == 4 && memcmp(fname, "year", 4) == 0) field = RAY_EXTRACT_YEAR; + else if (fname_len == 5 && memcmp(fname, "month", 5) == 0) field = RAY_EXTRACT_MONTH; + else if (fname_len == 3 && memcmp(fname, "day", 3) == 0) field = RAY_EXTRACT_DAY; + else if (fname_len == 4 && memcmp(fname, "hour", 4) == 0) field = RAY_EXTRACT_HOUR; + else if (fname_len == 6 && memcmp(fname, "minute",6) == 0) field = RAY_EXTRACT_MINUTE; + else if (fname_len == 6 && memcmp(fname, "second",6) == 0) field = RAY_EXTRACT_SECOND; + else if (fname_len == 9 && memcmp(fname, "dayofweek",9) == 0) field = RAY_EXTRACT_DOW; + else if (fname_len == 9 && memcmp(fname, "dayofyear",9) == 0) field = RAY_EXTRACT_DOY; + if (field >= 0) { + ray_op_t* col = compile_expr_dag(g, elems[1]); + if (!col) return NULL; + return ray_extract(g, col, field); + } + } + + /* (do e1 e2 ... en) → compile only the last expression. + * Earlier expressions can't have side-effects in DAG context; + * if they do, they'll be silently dropped. Use eval-level + * for side-effectful scripts. */ + if (fname_len == 2 && memcmp(fname, "do", 2) == 0) { + if (n < 2) return NULL; + return compile_expr_dag(g, elems[n - 1]); + } + + /* (let var val body) — compile `val` in the current env, + * bind var → val_op by ID (pointer-safe across reallocs), + * compile `body`, pop. Same β-reduction mechanism as + * lambda inlining, just with a single binding. */ + if (fname_len == 3 && memcmp(fname, "let", 3) == 0) { + if (n != 4) return NULL; + ray_t* var_expr = elems[1]; + if (var_expr->type != -RAY_SYM) return NULL; + int64_t var_sym = var_expr->i64; + ray_op_t* val_op = compile_expr_dag(g, elems[2]); + if (!val_op) return NULL; + /* cexpr_env_push already snapshots node->id, which is + * stable across subsequent graph reallocations. */ + if (!cexpr_env_push(g, var_sym, val_op)) return NULL; + ray_op_t* body_op = compile_expr_dag(g, elems[3]); + cexpr_env_pop(g, 1); + return body_op; + } + + /* (cond (p1 e1) (p2 e2) ... (else en)) → nested OP_IF. */ + if (fname_len == 4 && memcmp(fname, "cond", 4) == 0) { + if (n < 2) return NULL; + /* Walk right-to-left, building an OP_IF chain. The last + * clause must be an `else` form. */ + uint32_t chain_id = UINT32_MAX; + for (int64_t i = n - 1; i >= 1; i--) { + ray_t* clause = elems[i]; + if (clause->type != RAY_LIST || ray_len(clause) != 2) return NULL; + ray_t** cpair = (ray_t**)ray_data(clause); + int is_else = 0; + if (cpair[0]->type == -RAY_SYM) { + ray_t* ns = ray_sym_str(cpair[0]->i64); + if (ns && ray_str_len(ns) == 4 && + memcmp(ray_str_ptr(ns), "else", 4) == 0) + is_else = 1; + } + if (is_else) { + if (i != n - 1) return NULL; + ray_op_t* c = compile_expr_dag(g, cpair[1]); + if (!c) return NULL; + chain_id = c->id; + } else { + if (chain_id == UINT32_MAX) return NULL; + ray_op_t* pred = compile_expr_dag(g, cpair[0]); + if (!pred) return NULL; + uint32_t pred_id = pred->id; + ray_op_t* body = compile_expr_dag(g, cpair[1]); + if (!body) return NULL; + pred = &g->nodes[pred_id]; + ray_op_t* chain = &g->nodes[chain_id]; + ray_op_t* r = ray_if(g, pred, body, chain); + if (!r) return NULL; + chain_id = r->id; + } + } + if (chain_id == UINT32_MAX) return NULL; + return &g->nodes[chain_id]; + } + + /* Variadic `and`/`or`: fold into a balanced binary tree. + * `(and a b c d)` → `(and (and a b) (and c d))` — depth log2(N). + * Without this, n>=4 falls through `compile_expr_dag` and the + * caller (e.g. select WHERE) reports "WHERE predicate not + * supported by DAG compiler". The fused-expr executor evaluates + * the resulting tree as a sequence of binary AND/OR instructions + * sharing scratch registers — no extra column allocations vs + * what hand-nested binary forms already do. + * + * Balanced tree (rather than left-fold) keeps the canonical + * shape symmetric and minimises dependency-chain depth, which + * future OoO / parallel-instruction executors can exploit. */ + if (n >= 4) { + bool is_and = (fname_len == 3 && memcmp(fname, "and", 3) == 0); + bool is_or = (fname_len == 2 && memcmp(fname, "or", 2) == 0); + if (is_and || is_or) { + int64_t k = n - 1; + if (k > 64) return NULL; /* depth/space guard */ + uint32_t arg_ids[64]; + for (int64_t i = 0; i < k; i++) { + ray_op_t* a = compile_expr_dag(g, elems[i + 1]); + if (!a) return NULL; + arg_ids[i] = a->id; + } + dag_binary_ctor ctor = is_and ? ray_and : ray_or; + /* Iterative pairwise reduction: at each round, fold + * adjacent pairs into a single node, halving the count. + * Equivalent to recursive bisect but avoids a helper. */ + int64_t cnt = k; + while (cnt > 1) { + int64_t out = 0; + for (int64_t i = 0; i + 1 < cnt; i += 2) { + /* make_binary re-resolves both inputs via stored + * IDs after its own potential realloc, so the + * pointers we pass here are safe to use. */ + ray_op_t* l = &g->nodes[arg_ids[i]]; + ray_op_t* r = &g->nodes[arg_ids[i + 1]]; + ray_op_t* combined = ctor(g, l, r); + if (!combined) return NULL; + arg_ids[out++] = combined->id; + } + if (cnt & 1) /* carry odd tail */ + arg_ids[out++] = arg_ids[cnt - 1]; + cnt = out; + } + return &g->nodes[arg_ids[0]]; + } + } + + /* Binary op? */ + if (n == 3) { + dag_binary_ctor ctor = resolve_binary_dag(fn_sym); + if (ctor) { + ray_op_t* left = compile_expr_dag(g, elems[1]); + if (!left) return NULL; + uint32_t left_id = left->id; + ray_op_t* right = compile_expr_dag(g, elems[2]); + if (!right) return NULL; + left = &g->nodes[left_id]; + return ctor(g, left, right); + } + } + + /* Unary op or aggregation? */ + if (n == 2) { + /* Check for unary DAG ops */ + dag_unary_ctor uctor = resolve_unary_dag(fn_sym); + if (uctor) { + ray_op_t* arg = compile_expr_dag(g, elems[1]); + return arg ? uctor(g, arg) : NULL; + } + /* Aggregation functions return DAG agg nodes */ + uint16_t agg_op = resolve_agg_opcode(fn_sym); + if (agg_op) { + ray_op_t* arg = compile_expr_dag(g, elems[1]); + if (!arg) return NULL; + switch (agg_op) { + case OP_SUM: return ray_sum(g, arg); + case OP_AVG: return ray_avg(g, arg); + case OP_MIN: return ray_min_op(g, arg); + case OP_MAX: return ray_max_op(g, arg); + case OP_COUNT: return ray_count(g, arg); + case OP_FIRST: return ray_first(g, arg); + case OP_LAST: return ray_last(g, arg); + case OP_PROD: return ray_prod(g, arg); + case OP_STDDEV: return ray_stddev(g, arg); + case OP_STDDEV_POP: return ray_stddev_pop(g, arg); + case OP_VAR: return ray_var(g, arg); + case OP_VAR_POP: return ray_var_pop(g, arg); + default: return NULL; + } + } + } + } + + return NULL; +} + +/* Walk an expression tree and bind any name-symbols that match table columns + * into the current local scope. Recurses into list sub-expressions. */ +static void expr_bind_table_names(ray_t* expr, ray_t* tbl) { + if (!expr) return; + if (expr->type == -RAY_SYM && (expr->attrs & RAY_ATTR_NAME)) { + /* Plain column reference — bind the column into local scope. */ + ray_t* col = ray_table_get_col(tbl, expr->i64); + if (col) { ray_env_set_local(expr->i64, col); return; } + /* Dotted reference (e.g. `Timestamp.ss`) — the whole dotted + * sym isn't a column name, but its HEAD segment might be. + * Bind the head so ray_env_resolve's dotted walk can reach + * it when ray_eval fires on this expression. Non-column + * heads (globals, locals) fall through to env_resolve's + * normal scope-chain lookup. */ + if (ray_sym_is_dotted(expr->i64)) { + const int64_t* segs; + int nsegs = ray_sym_segs(expr->i64, &segs); + if (nsegs >= 1) { + ray_t* head_col = ray_table_get_col(tbl, segs[0]); + if (head_col) ray_env_set_local(segs[0], head_col); + } + } + return; + } + if (expr->type == RAY_LIST) { + ray_t** elems = (ray_t**)ray_data(expr); + int64_t n = ray_len(expr); + for (int64_t i = 0; i < n; i++) + expr_bind_table_names(elems[i], tbl); + } +} + +static int is_agg_expr(ray_t* expr); /* defined below */ + +/* Return 1 if expr references a table column in a position where the + * column is expected to flow through row-by-row (not reduced by an + * enclosing aggregation). Used to decide whether a non-agg expression + * is expected to produce a row-aligned result — pure constants and + * aggregation-reduced expressions (e.g. `(+ 1 (sum p))`) legitimately + * produce scalars/short-length results that must be broadcast. + * + * The walker stops recursing when it hits an aggregation call: any + * column refs inside get reduced to a scalar, so they don't drive the + * row-alignment expectation. + * + * Lambda call forms `((fn ...) actuals)` are also treated as + * "unknown shape" — even if the actuals reference columns, the + * body may reduce them via an enclosed aggregation. Returning 0 + * here means the scatter will rely purely on the runtime shape + * check (row-aligned → gather, else broadcast) instead of + * erroring. This loses a bug-catching net for lambda calls whose + * body IS row-preserving but returns a mismatched-length result, + * but that's a niche case compared to the common "lambda wrapping + * an agg" pattern users actually write. */ +static int expr_refs_row_column(ray_t* expr, ray_t* tbl) { + if (!expr) return 0; + if (expr->type == -RAY_SYM && (expr->attrs & RAY_ATTR_NAME)) { + if (ray_table_get_col(tbl, expr->i64)) return 1; + /* Dotted name whose head is a column is a row-aligned ref — + * `Timestamp.ss` flows through row-by-row the same as plain + * `Timestamp` would, so the scatter must treat it as one. */ + if (ray_sym_is_dotted(expr->i64)) { + const int64_t* segs; + int nsegs = ray_sym_segs(expr->i64, &segs); + if (nsegs >= 1 && ray_table_get_col(tbl, segs[0])) return 1; + } + return 0; + } + if (expr->type == RAY_LIST) { + /* If this call is itself an aggregation, its column refs + * collapse to a scalar — don't recurse. The whole subtree + * is treated as a constant from the row-alignment POV. */ + if (is_agg_expr(expr)) return 0; + ray_t** elems = (ray_t**)ray_data(expr); + int64_t n = ray_len(expr); + if (n == 0) return 0; + /* Lambda call form: head is itself a LIST. We can't tell + * from the outside whether the body is row-preserving or + * aggregating, so surrender row-alignment enforcement. */ + if (elems[0]->type == RAY_LIST) return 0; + /* Skip elems[0] — it's the function name, not a column. */ + for (int64_t i = 1; i < n; i++) + if (expr_refs_row_column(elems[i], tbl)) return 1; + } + return 0; +} + +/* Check if an expression is an aggregation call (head is an agg function) */ +static int is_agg_expr(ray_t* expr) { + if (!expr || expr->type != RAY_LIST) return 0; + if (expr->type == RAY_DICT) return 0; + int64_t n = ray_len(expr); + if (n < 2) return 0; + ray_t** elems = (ray_t**)ray_data(expr); + if (elems[0]->type != -RAY_SYM) return 0; + return resolve_agg_opcode(elems[0]->i64) != 0; +} + +/* True for `(fn arg ...)` where fn resolves to a RAY_UNARY marked + * RAY_FN_AGGR — i.e. a builtin aggregator (sum/avg/min/max/count and + * the non-whitelisted med/dev/var/stddev/etc). Used to route these + * through the streaming-style per-group AGG branch rather than the + * full ray_eval per-group fallback. This is a SUPERSET of is_agg_expr: + * it includes everything resolve_agg_opcode names plus the AGGR + * builtins that lack a streaming-engine opcode. */ +static int is_aggr_unary_call(ray_t* expr) { + if (!expr || expr->type != RAY_LIST) return 0; + int64_t n = ray_len(expr); + if (n < 2) return 0; + ray_t** elems = (ray_t**)ray_data(expr); + if (elems[0]->type != -RAY_SYM) return 0; + ray_t* fn_obj = ray_env_get(elems[0]->i64); + if (!fn_obj || fn_obj->type != RAY_UNARY) return 0; + return (fn_obj->attrs & RAY_FN_AGGR) != 0; +} + +/* Walk expr once, gather unique column-ref symbol ids that resolve to + * columns of `tbl`. Dotted refs (`Timestamp.ss`) record the head + * segment. Caps at `max_out` entries (16 is plenty for s: clauses); + * returns the count gathered. Used by the per-group fallback to slice + * each ref exactly once per group instead of re-walking the AST. */ +static int collect_col_refs(ray_t* expr, ray_t* tbl, + int64_t* out_syms, int max_out, int n) { + if (!expr || n >= max_out) return n; + if (expr->type == -RAY_SYM && (expr->attrs & RAY_ATTR_NAME)) { + int64_t want = -1; + if (ray_table_get_col(tbl, expr->i64)) { + want = expr->i64; + } else if (ray_sym_is_dotted(expr->i64)) { + const int64_t* segs; + int nsegs = ray_sym_segs(expr->i64, &segs); + if (nsegs >= 1 && ray_table_get_col(tbl, segs[0])) want = segs[0]; + } + if (want >= 0) { + for (int i = 0; i < n; i++) if (out_syms[i] == want) return n; + if (n < max_out) out_syms[n++] = want; + } + return n; + } + if (expr->type == RAY_LIST) { + ray_t** elems = (ray_t**)ray_data(expr); + int64_t cnt = ray_len(expr); + for (int64_t i = 0; i < cnt && n < max_out; i++) + n = collect_col_refs(elems[i], tbl, out_syms, max_out, n); + } + return n; +} + +/* Bind a single column-id to a slice of its column under `idx_list`. + * Helper used inside the per-group hot loop (slices the table's column + * via ray_at_fn, hands the slice to env_bind_local which retains, then + * drops our ref). Returns 0 on success, error ray_t* on failure. */ +static ray_t* bind_col_slice(int64_t sym, ray_t* col, ray_t* idx_list) { + ray_t* slice = ray_at_fn(col, idx_list); + if (!slice || RAY_IS_ERR(slice)) { + return slice ? slice : ray_error("oom", NULL); + } + ray_env_set_local(sym, slice); + ray_release(slice); + return NULL; +} + +/* Convert a partly-filled typed vec (indices 0..fill-1 valid) back into + * a LIST of n_groups owned atom refs (only first `fill` initialized). + * Used by the per-group eval fallback when the probe-typed-direct path + * detects a mid-loop type mismatch and has to demote to a list. */ +static ray_t* typed_vec_to_list(ray_t* tv, int64_t fill, int64_t n_groups) { + ray_t* list_col = ray_alloc(n_groups * sizeof(ray_t*)); + if (!list_col) return ray_error("oom", NULL); + list_col->type = RAY_LIST; + list_col->len = 0; + ray_t** out = (ray_t**)ray_data(list_col); + for (int64_t k = 0; k < fill; k++) { + int allocated = 0; + ray_t* atom = collection_elem(tv, k, &allocated); + if (!allocated && atom) ray_retain(atom); + out[k] = atom; + list_col->len = k + 1; + } + return list_col; +} + +/* Inner per-group eval body shared by the LIST-`groups` and `idx_buf` + * variants. Pre-collects unique column refs, pushes ONE local scope + * around the whole loop, and probes the first cell: + * - scalar atom of a typed-vec primitive → write directly into a + * pre-allocated typed vec (no list intermediate, no post-collapse); + * - otherwise → collect into a LIST column. + * If the typed-direct path hits a mid-loop type mismatch, it demotes + * to a LIST cleanly (one-time cost). `feeder` produces the per-group + * idx_list ray_t* (caller controls its lifetime / reuse); the closure + * over `feeder_state` lets the buf variant reuse a single I64 wrapper. + * + * Returns either a typed vec (homogeneous scalars) or a LIST col. */ +typedef ray_t* (*idx_feeder_fn)(int64_t gi, void* state); + +static ray_t* nonagg_eval_per_group_core(ray_t* expr, ray_t* tbl, + idx_feeder_fn feeder, void* fstate, + int64_t n_groups) { + int64_t col_syms[16]; + int n_cols = collect_col_refs(expr, tbl, col_syms, 16, 0); + ray_t* cols[16]; + for (int i = 0; i < n_cols; i++) + cols[i] = ray_table_get_col(tbl, col_syms[i]); + + if (ray_env_push_scope() != RAY_OK) return ray_error("oom", NULL); + + ray_t* result = NULL; /* typed vec OR list col */ + int direct_typed = 0; /* non-zero → result is a typed vec */ + int8_t typed_t = 0; /* atom type sentinel for the typed path */ + + for (int64_t gi = 0; gi < n_groups; gi++) { + ray_t* idx_list = feeder(gi, fstate); + if (!idx_list) { + ray_env_pop_scope(); + if (result) ray_release(result); + return ray_error("oom", NULL); + } + for (int i = 0; i < n_cols; i++) { + ray_t* err = bind_col_slice(col_syms[i], cols[i], idx_list); + if (err) { + ray_env_pop_scope(); + if (result) ray_release(result); + return err; + } + } + ray_t* cell = ray_eval(expr); + if (!cell || RAY_IS_ERR(cell)) { + ray_env_pop_scope(); + if (result) ray_release(result); + return cell ? cell : ray_error("domain", NULL); + } + + if (gi == 0) { + int8_t t = cell->type; + int collapsable = (t < 0 && t != -RAY_SYM && t != -RAY_STR && t != -RAY_GUID); + if (collapsable) { + int8_t vt = (int8_t)(-t); + result = ray_vec_new(vt, n_groups); + if (!result || RAY_IS_ERR(result)) { + ray_env_pop_scope(); ray_release(cell); + return result ? result : ray_error("oom", NULL); + } + result->len = n_groups; + if (store_typed_elem(result, 0, cell) == 0) { + direct_typed = 1; typed_t = t; + ray_release(cell); + } else { + /* type unsupported by store_typed_elem → fall to list */ + ray_release(result); result = NULL; + collapsable = 0; + } + } + if (!collapsable) { + result = ray_alloc(n_groups * sizeof(ray_t*)); + if (!result) { + ray_env_pop_scope(); ray_release(cell); + return ray_error("oom", NULL); + } + result->type = RAY_LIST; + result->len = 0; + ((ray_t**)ray_data(result))[0] = cell; + result->len = 1; + } + continue; + } + + if (direct_typed) { + if (cell->type == typed_t && store_typed_elem(result, gi, cell) == 0) { + ray_release(cell); + } else { + /* Demote: convert typed vec [0..gi-1] to list, append cell, continue as list. */ + ray_t* list_col = typed_vec_to_list(result, gi, n_groups); + ray_release(result); + if (RAY_IS_ERR(list_col)) { + ray_env_pop_scope(); ray_release(cell); + return list_col; + } + result = list_col; + ((ray_t**)ray_data(result))[gi] = cell; /* takes ownership */ + result->len = gi + 1; + direct_typed = 0; + } + } else { + ((ray_t**)ray_data(result))[gi] = cell; /* takes ownership */ + result->len = gi + 1; + } + } + + ray_env_pop_scope(); + return result; +} + +/* idx_feeder for the eval-fallback's LIST `groups` layout. */ +typedef struct { ray_t** items; } groups_state_t; +static ray_t* groups_idx_feed(int64_t gi, void* st) { + groups_state_t* s = (groups_state_t*)st; + return s->items[gi * 2 + 1]; +} + +static ray_t* nonagg_eval_per_group(ray_t* expr, ray_t* tbl, + ray_t* groups, int64_t n_groups) { + groups_state_t st = { .items = (ray_t**)ray_data(groups) }; + return nonagg_eval_per_group_core(expr, tbl, groups_idx_feed, &st, n_groups); +} + +/* idx_feeder for the DAG fast-path's idx_buf+offsets+grp_cnt layout. + * Reuses a single RAY_I64 wrapper across all groups: just retargets the + * data pointer-equivalent by memcpy'ing into its data area and adjusting + * `len`. Saves n_groups vec allocs/frees. */ +typedef struct { + const int64_t* idx_buf; + const int64_t* offsets; + const int64_t* grp_cnt; + ray_t* scratch; /* RAY_I64 vec, sized to max grp_cnt */ +} buf_state_t; + +static ray_t* buf_idx_feed(int64_t gi, void* st) { + buf_state_t* s = (buf_state_t*)st; + int64_t cnt = s->grp_cnt[gi]; + s->scratch->len = cnt; + if (cnt > 0) { + memcpy(ray_data(s->scratch), &s->idx_buf[s->offsets[gi]], + (size_t)cnt * sizeof(int64_t)); + } + return s->scratch; +} + +static ray_t* nonagg_eval_per_group_buf(ray_t* expr, ray_t* tbl, + const int64_t* idx_buf, + const int64_t* offsets, + const int64_t* grp_cnt, + int64_t n_groups) { + int64_t max_cnt = 0; + for (int64_t gi = 0; gi < n_groups; gi++) + if (grp_cnt[gi] > max_cnt) max_cnt = grp_cnt[gi]; + ray_t* scratch = ray_vec_new(RAY_I64, max_cnt > 0 ? max_cnt : 1); + if (!scratch || RAY_IS_ERR(scratch)) + return scratch ? scratch : ray_error("oom", NULL); + buf_state_t st = { idx_buf, offsets, grp_cnt, scratch }; + ray_t* res = nonagg_eval_per_group_core(expr, tbl, buf_idx_feed, &st, n_groups); + ray_release(scratch); + return res; +} + +/* Streaming-style per-group AGG body, DAG flavor. For an expression + * like `(med v)` (head is RAY_FN_AGGR + RAY_UNARY, second elem is a + * column ref or full-table-eval-able sub-expression), slice src per + * group via ray_at_fn, call the unary fn directly, store the scalar + * result into a pre-sized typed vec. Mirrors the eval-fallback's AGG + * branch (`query.c:~1955`) but with the idx_buf+offsets+grp_cnt + * layout the DAG path produces. */ +static ray_t* aggr_unary_per_group_buf(ray_t* expr, ray_t* tbl, + const int64_t* idx_buf, + const int64_t* offsets, + const int64_t* grp_cnt, + int64_t n_groups) { + ray_t** elems = (ray_t**)ray_data(expr); + ray_t* fn_name = elems[0]; + ray_t* col_expr = elems[1]; + + ray_t* fn_obj = ray_env_get(fn_name->i64); + if (!fn_obj || fn_obj->type != RAY_UNARY) + return ray_error("type", NULL); + ray_unary_fn uf = (ray_unary_fn)(uintptr_t)fn_obj->i64; + + /* Resolve the source column: either a direct column ref (no copy) + * or a full-table eval of the sub-expression. */ + ray_t* src = NULL; + if (col_expr->type == -RAY_SYM && (col_expr->attrs & RAY_ATTR_NAME)) { + src = ray_table_get_col(tbl, col_expr->i64); + if (src) ray_retain(src); + } + if (!src) { + /* Bind table cols and eval — same pattern as the existing path. */ + if (ray_env_push_scope() != RAY_OK) return ray_error("oom", NULL); + expr_bind_table_names(col_expr, tbl); + src = ray_eval(col_expr); + ray_env_pop_scope(); + if (!src || RAY_IS_ERR(src)) return src ? src : ray_error("domain", NULL); + } + + /* Reusable I64 idx wrapper. */ + int64_t max_cnt = 0; + for (int64_t gi = 0; gi < n_groups; gi++) + if (grp_cnt[gi] > max_cnt) max_cnt = grp_cnt[gi]; + ray_t* idx_vec = ray_vec_new(RAY_I64, max_cnt > 0 ? max_cnt : 1); + if (!idx_vec || RAY_IS_ERR(idx_vec)) { + ray_release(src); + return idx_vec ? idx_vec : ray_error("oom", NULL); + } + + ray_t* agg_vec = NULL; + int8_t agg_atom_t = 0; + + for (int64_t gi = 0; gi < n_groups; gi++) { + idx_vec->len = grp_cnt[gi]; + if (grp_cnt[gi] > 0) { + memcpy(ray_data(idx_vec), &idx_buf[offsets[gi]], + (size_t)grp_cnt[gi] * sizeof(int64_t)); + } + ray_t* subset = ray_at_fn(src, idx_vec); + if (!subset || RAY_IS_ERR(subset)) continue; + ray_t* agg_val = uf(subset); + ray_release(subset); + if (!agg_val || RAY_IS_ERR(agg_val)) continue; + + if (!agg_vec) { + agg_atom_t = agg_val->type; + int8_t vt = (int8_t)(-agg_atom_t); + agg_vec = ray_vec_new(vt, n_groups); + if (!agg_vec || RAY_IS_ERR(agg_vec)) { + ray_release(agg_val); ray_release(idx_vec); ray_release(src); + return agg_vec ? agg_vec : ray_error("oom", NULL); + } + agg_vec->len = n_groups; + } + if (agg_val->type != agg_atom_t || store_typed_elem(agg_vec, gi, agg_val) != 0) { + /* Fallback: shouldn't happen for well-behaved aggregators; if it + * does, demote to a list so we don't return a partly-typed vec. + * Convert what we have so far to a list, then reattempt as a + * generic non-streaming eval. */ + ray_release(agg_val); + ray_release(idx_vec); ray_release(src); + ray_release(agg_vec); + return nonagg_eval_per_group_buf(expr, tbl, idx_buf, offsets, grp_cnt, n_groups); + } + ray_release(agg_val); + } + + ray_release(idx_vec); ray_release(src); + if (!agg_vec) { + /* No groups produced a value (all empty?) — return an empty typed + * vec sized n_groups; default to I64 for lack of a better guess. */ + agg_vec = ray_vec_new(RAY_I64, n_groups); + if (agg_vec && !RAY_IS_ERR(agg_vec)) agg_vec->len = n_groups; + } + return agg_vec; +} + +/* Forward declarations for eval-level groupby fallback */ + +/* (select {from: t [where: pred] [by: key] [col: expr ...]}) + * Special form — receives unevaluated dict arg. */ +ray_t* ray_select_fn(ray_t** args, int64_t n) { + if (n < 1) return ray_error("domain", NULL); + ray_t* dict = args[0]; + if (!dict || dict->type != RAY_DICT) + return ray_error("type", NULL); + + /* Evaluate 'from:' to get the source table */ + ray_t* from_expr = dict_get(dict, "from"); + if (!from_expr) return ray_error("domain", NULL); + ray_t* tbl = ray_eval(from_expr); + if (RAY_IS_ERR(tbl)) return tbl; + if (tbl->type != RAY_TABLE) { ray_release(tbl); return ray_error("type", NULL); } + + ray_t* where_expr = dict_get(dict, "where"); + ray_t* by_expr = dict_get(dict, "by"); + ray_t* take_expr = dict_get(dict, "take"); + ray_t* nearest_expr = dict_get(dict, "nearest"); + + /* Collect output columns (keys that are not reserved). The dict's + * physical layout is [keys, vals] but the iteration loops below were + * written for the old interleaved [k0,v0,...] form — open a transient + * pair view so the existing code keeps working. */ + DICT_VIEW_DECL(dv); + DICT_VIEW_OPEN(dict, dv); + if (DICT_VIEW_OVERFLOW(dv)) { + ray_release(tbl); + return ray_error("domain", "select clause has too many keys"); + } + int64_t dict_n = dv_n; + ray_t** dict_elems = dv; + int64_t from_id = ray_sym_intern("from", 4); + int64_t where_id = ray_sym_intern("where", 5); + int64_t by_id = ray_sym_intern("by", 2); + int64_t take_id = ray_sym_intern("take", 4); + int64_t asc_id = ray_sym_intern("asc", 3); + int64_t desc_id = ray_sym_intern("desc", 4); + int64_t nearest_id = ray_sym_intern("nearest", 7); + + /* Check for asc/desc presence */ + bool has_sort = false; + for (int64_t i = 0; i + 1 < dict_n; i += 2) { + int64_t kid = dict_elems[i]->i64; + if (kid == asc_id || kid == desc_id) { has_sort = true; break; } + } + + /* `nearest` is mutually exclusive with `asc`/`desc`/`by` — ANN + * ordering is an index scan, not a column sort, and cannot be + * composed with group-by in this phase. */ + if (nearest_expr) { + if (has_sort) { + ray_release(tbl); + return ray_error("domain", + "select: `nearest` cannot be combined with asc/desc"); + } + if (by_expr) { + ray_release(tbl); + return ray_error("domain", + "select: `nearest` cannot be combined with `by`"); + } + } + + /* Count output columns */ + int n_out = 0; + for (int64_t i = 0; i + 1 < dict_n; i += 2) { + int64_t kid = dict_elems[i]->i64; + if (kid != from_id && kid != where_id && kid != by_id && + kid != take_id && kid != asc_id && kid != desc_id && + kid != nearest_id) + n_out++; + } + + /* Simple case: no clauses at all → return table as-is */ + if (n_out == 0 && !where_expr && !by_expr && !take_expr && !has_sort && !nearest_expr) + return tbl; + + /* Dict-form by-clause pre-evaluation: MUST happen before we + * build the DAG, so the graph sees the augmented table with + * the materialised dict-val columns already present. + * (select {... by: {o: OrderId b: (xbar Ts 1000)} ...}) + * Dict values can be any expression; we eval each against tbl + * with its columns bound as locals, add the result as a new + * column named after the dict key, then rewrite by_expr as a + * plain RAY_SYM vector of the dict keys so the rest of + * ray_select_fn sees a standard multi-key group-by. */ + ray_t* by_sym_vec_owned = NULL; + DICT_VIEW_DECL(byv); + if (by_expr && by_expr->type == RAY_DICT) { + DICT_VIEW_OPEN(by_expr, byv); + if (DICT_VIEW_OVERFLOW(byv)) { + ray_release(tbl); + return ray_error("domain", "by-dict has too many keys"); + } + int64_t dlen = byv_n; + int64_t nk = dlen / 2; + if (nk == 0 || nk > 16) { + ray_release(tbl); + return ray_error("domain", "by-dict must have 1..16 keys"); + } + ray_t** d_elems = byv; + + ray_env_push_scope(); + int64_t in_ncols = ray_table_ncols(tbl); + for (int64_t c = 0; c < in_ncols; c++) { + int64_t cn = ray_table_col_name(tbl, c); + ray_t* cv = ray_table_get_col_idx(tbl, c); + if (cv) ray_env_set_local(cn, cv); + } + + by_sym_vec_owned = ray_vec_new(RAY_SYM, nk); + if (!by_sym_vec_owned || RAY_IS_ERR(by_sym_vec_owned)) { + ray_env_pop_scope(); + ray_release(tbl); + return ray_error("oom", NULL); + } + int64_t* sv_data = (int64_t*)ray_data(by_sym_vec_owned); + by_sym_vec_owned->len = nk; + + bool failed = false; + ray_t* fail_err = NULL; + int64_t expected_len = ray_table_nrows(tbl); + for (int64_t i = 0; i < nk; i++) { + ray_t* k = d_elems[i * 2]; + ray_t* v = d_elems[i * 2 + 1]; + if (!k || k->type != -RAY_SYM) { + fail_err = ray_error("domain", "by-dict key must be a symbol name"); + failed = true; break; + } + /* Duplicate key guard: {g: A g: B} would otherwise append + * two cols both named g, then group on the first g twice + * (silently dropping B). Reject explicitly. */ + bool duplicate_key = false; + for (int64_t j = 0; j < i && !duplicate_key; j++) + if (d_elems[j * 2]->i64 == k->i64) duplicate_key = true; + if (duplicate_key) { + fail_err = ray_error("domain", "by-dict has duplicate key"); + failed = true; break; + } + /* Collision check: if the dict key already exists in the + * input table, ray_table_add_col would append a second + * column with the same name and ray_table_get_col finds + * the ORIGINAL, so the group-by would silently scan the + * user's existing column instead of our materialised + * one. The one allowed exception is {x: x}, a trivial + * self-alias: the input column is already exactly what + * we want to group on. */ + bool already_in_tbl = (ray_table_get_col(tbl, k->i64) != NULL); + bool trivial_self = (v->type == -RAY_SYM && v->i64 == k->i64); + if (already_in_tbl && !trivial_self) { + fail_err = ray_error("domain", + "by-dict alias shadows an existing input column"); + failed = true; break; + } + if (trivial_self) { + /* No eval / no add: just group on the existing col. */ + sv_data[i] = k->i64; + continue; + } + ray_t* col_vec = ray_eval(v); + if (!col_vec || RAY_IS_ERR(col_vec)) { + fail_err = col_vec ? col_vec : ray_error("domain", "by-dict val eval"); + failed = true; break; + } + if (!ray_is_vec(col_vec) || ray_len(col_vec) != expected_len) { + ray_release(col_vec); + fail_err = ray_error("length", "by-dict val must be a column vector"); + failed = true; break; + } + ray_t* new_tbl = ray_table_add_col(tbl, k->i64, col_vec); + ray_release(col_vec); + if (!new_tbl || RAY_IS_ERR(new_tbl)) { + fail_err = new_tbl ? new_tbl : ray_error("oom", NULL); + failed = true; break; + } + tbl = new_tbl; + /* Re-bind the newly added column under its dict key so + * later dict vals can reference earlier keys. */ + ray_env_set_local(k->i64, col_vec); + sv_data[i] = k->i64; + } + ray_env_pop_scope(); + if (failed) { + ray_release(by_sym_vec_owned); + ray_release(tbl); + return fail_err; + } + by_expr = by_sym_vec_owned; + } + + /* Build DAG */ + ray_graph_t* g = ray_graph_new(tbl); + if (!g) { + if (by_sym_vec_owned) ray_release(by_sym_vec_owned); + ray_release(tbl); return ray_error("oom", NULL); + } + + ray_op_t* root = ray_const_table(g, tbl); + + /* Non-agg expression tracking for post-DAG scatter (used in GROUP BY) */ + int64_t nonagg_names[16]; + ray_t* nonagg_exprs[16]; + uint8_t n_nonaggs = 0; + int synth_count_col = 0; /* 1 if we synthesized OP_COUNT for group boundaries */ + + /* Apply WHERE filter */ + if (where_expr) { + ray_op_t* pred = compile_expr_dag(g, where_expr); + if (!pred) { + ray_graph_free(g); ray_release(tbl); + return ray_error("domain", + "WHERE predicate not supported by DAG compiler — " + "most common causes: arity mismatch " + "(e.g. `(in v)` instead of `(in col v)`), " + "unknown function name, unsupported special form, " + "or a sub-expression the compiler can't lower"); + } + root = ray_filter(g, root, pred); + } + + /* Apply NEAREST (ANN/KNN) re-ranking. Mutually exclusive with + * asc/desc/by (already rejected above). Runs after WHERE so the + * filter feeds the rerank executor directly. `take k` becomes the + * target result count; the rerank executor handles the take internally + * so the bottom-of-function take block is skipped when nearest is set. */ + float* nearest_query_owned = NULL; /* freed after ray_execute below */ + ray_t* nearest_handle_owned = NULL; /* HNSW handle kept alive for the + * DAG's lifetime; released after + * ray_execute. Without this, an + * inline `(ann (hnsw-build ...) ...)` + * drops the handle's rc to 0 before + * exec runs — the rc→0 hook frees + * the index and the ext's stored + * pointer dangles. */ + if (nearest_expr) { + if (nearest_expr->type != RAY_LIST || ray_len(nearest_expr) < 3) { + ray_graph_free(g); ray_release(tbl); + return ray_error("domain", + "nearest: expected (ann [ef]) or (knn [metric])"); + } + int64_t nlen = ray_len(nearest_expr); + ray_t** nlist = (ray_t**)ray_data(nearest_expr); + ray_t* head = nlist[0]; + if (head->type != -RAY_SYM) { + ray_graph_free(g); ray_release(tbl); + return ray_error("domain", + "nearest: first element must be the symbol `ann` or `knn`"); + } + int64_t ann_sym_id = ray_sym_intern("ann", 3); + int64_t knn_sym_id = ray_sym_intern("knn", 3); + + /* Resolve k from take (default 10). */ + int64_t k_req = 10; + if (take_expr) { + ray_t* tv = ray_eval(take_expr); + if (!tv || RAY_IS_ERR(tv)) { + ray_graph_free(g); ray_release(tbl); + return tv ? tv : ray_error("domain", NULL); + } + if (tv->type == -RAY_I64) k_req = tv->i64; + else if (tv->type == -RAY_I32) k_req = tv->i32; + else { + ray_release(tv); + ray_graph_free(g); ray_release(tbl); + return ray_error("type", "nearest: take must be an integer atom"); + } + ray_release(tv); + if (k_req <= 0) { + ray_graph_free(g); ray_release(tbl); + return ray_error("domain", "nearest: take must be positive"); + } + } + + /* Evaluate the query vector (arg index 2). */ + ray_t* qvec = ray_eval(nlist[2]); + if (!qvec || RAY_IS_ERR(qvec)) { + ray_graph_free(g); ray_release(tbl); + return qvec ? qvec : ray_error("domain", NULL); + } + if (!ray_is_vec(qvec) || + (qvec->type != RAY_F32 && qvec->type != RAY_F64 && + qvec->type != RAY_I32 && qvec->type != RAY_I64)) { + ray_release(qvec); + ray_graph_free(g); ray_release(tbl); + return ray_error("type", "nearest: query must be a numeric vector"); + } + int32_t dim = (int32_t)qvec->len; + if (dim <= 0) { + ray_release(qvec); + ray_graph_free(g); ray_release(tbl); + return ray_error("length", "nearest: query vector is empty"); + } + + /* Copy query into a fresh float[] that the DAG op borrows; freed + * after ray_execute completes. */ + nearest_query_owned = (float*)ray_sys_alloc((size_t)dim * sizeof(float)); + if (!nearest_query_owned) { + ray_release(qvec); + ray_graph_free(g); ray_release(tbl); + return ray_error("oom", NULL); + } + switch (qvec->type) { + case RAY_F32: + memcpy(nearest_query_owned, ray_data(qvec), (size_t)dim * sizeof(float)); + break; + case RAY_F64: { + double* s = (double*)ray_data(qvec); + for (int32_t j = 0; j < dim; j++) nearest_query_owned[j] = (float)s[j]; + break; + } + case RAY_I32: { + int32_t* s = (int32_t*)ray_data(qvec); + for (int32_t j = 0; j < dim; j++) nearest_query_owned[j] = (float)s[j]; + break; + } + case RAY_I64: { + int64_t* s = (int64_t*)ray_data(qvec); + for (int32_t j = 0; j < dim; j++) nearest_query_owned[j] = (float)s[j]; + break; + } + } + ray_release(qvec); + + if (head->i64 == ann_sym_id) { + ray_t* hobj = ray_eval(nlist[1]); + if (!hobj || RAY_IS_ERR(hobj)) { + ray_sys_free(nearest_query_owned); + ray_graph_free(g); ray_release(tbl); + return hobj ? hobj : ray_error("domain", NULL); + } + if (hobj->type != -RAY_I64 || !(hobj->attrs & RAY_ATTR_HNSW)) { + ray_release(hobj); ray_sys_free(nearest_query_owned); + ray_graph_free(g); ray_release(tbl); + return ray_error("type", + "nearest (ann): first arg must be an HNSW handle (from hnsw-build)"); + } + ray_hnsw_t* idx = (ray_hnsw_t*)(uintptr_t)hobj->i64; + if (!idx) { + /* Defensive: attr set but pointer cleared — treat as invalid. */ + ray_release(hobj); ray_sys_free(nearest_query_owned); + ray_graph_free(g); ray_release(tbl); + return ray_error("type", + "nearest (ann): HNSW handle has been freed"); + } + if (idx->dim != dim) { + ray_release(hobj); ray_sys_free(nearest_query_owned); + ray_graph_free(g); ray_release(tbl); + return ray_error("length", + "nearest (ann): query dim does not match index dim"); + } + int32_t ef = HNSW_DEFAULT_EF_S; + if (nlen >= 4) { + ray_t* ev = ray_eval(nlist[3]); + if (!ev || RAY_IS_ERR(ev)) { + ray_release(hobj); ray_sys_free(nearest_query_owned); + ray_graph_free(g); ray_release(tbl); + return ev ? ev : ray_error("domain", + "nearest (ann): ef expression failed to evaluate"); + } + if (ev->type == -RAY_I64) ef = (int32_t)ev->i64; + else if (ev->type == -RAY_I32) ef = ev->i32; + else { + ray_release(ev); ray_release(hobj); + ray_sys_free(nearest_query_owned); + ray_graph_free(g); ray_release(tbl); + return ray_error("type", + "nearest (ann): ef must be an integer atom"); + } + ray_release(ev); + } + if ((int64_t)ef < k_req) ef = (int32_t)k_req; + root = ray_ann_rerank(g, root, idx, nearest_query_owned, dim, k_req, ef); + /* Steal the retain from ray_eval — the ext now borrows `idx` + * through hobj. Released in the common exit path after + * ray_execute has completed. */ + nearest_handle_owned = hobj; + } else if (head->i64 == knn_sym_id) { + ray_t* col_expr = nlist[1]; + if (col_expr->type != -RAY_SYM) { + ray_sys_free(nearest_query_owned); + ray_graph_free(g); ray_release(tbl); + return ray_error("type", + "nearest (knn): first arg must be an unquoted column name"); + } + int64_t col_sym = col_expr->i64; + ray_hnsw_metric_t metric = RAY_HNSW_COSINE; + if (nlen >= 4) { + ray_t* mv = nlist[3]; + if (mv && mv->type == -RAY_SYM) { + int64_t mid = mv->i64; + if (mid == ray_sym_find("l2", 2)) metric = RAY_HNSW_L2; + else if (mid == ray_sym_find("ip", 2)) metric = RAY_HNSW_IP; + else if (mid == ray_sym_find("cosine", 6)) metric = RAY_HNSW_COSINE; + else { + ray_sys_free(nearest_query_owned); + ray_graph_free(g); ray_release(tbl); + return ray_error("domain", + "nearest (knn): metric must be 'cosine, 'l2, or 'ip"); + } + } + } + root = ray_knn_rerank(g, root, col_sym, nearest_query_owned, dim, k_req, metric); + } else { + ray_sys_free(nearest_query_owned); + ray_graph_free(g); ray_release(tbl); + return ray_error("domain", + "nearest: expected `ann` or `knn` as the first element"); + } + if (!root) { + if (nearest_handle_owned) ray_release(nearest_handle_owned); + ray_sys_free(nearest_query_owned); + ray_graph_free(g); ray_release(tbl); + return ray_error("oom", NULL); + } + + /* When the user didn't specify output columns, project only the + * source schema — NOT the rerank's synthetic `_dist`. This keeps + * `(select {from: t nearest: ...})` shape-compatible with + * `(select {from: t})`; users who want `_dist` must name it + * explicitly (e.g. `{from: t d: _dist ...}`). + * + * Must handle arbitrarily wide tables (up to ray_select's uint8 + * limit of 255 cols) — a silent 16-col cap would let `_dist` + * leak through for real-world tables. */ + if (n_out == 0) { + int64_t src_ncols = ray_table_ncols(tbl); + if (src_ncols > 255) { + if (nearest_handle_owned) ray_release(nearest_handle_owned); + ray_sys_free(nearest_query_owned); + ray_graph_free(g); ray_release(tbl); + return ray_error("limit", + "nearest: implicit projection exceeds 255 source columns — " + "specify output columns explicitly"); + } + if (src_ncols > 0) { + ray_op_t** col_ops = (ray_op_t**)ray_sys_alloc( + (size_t)src_ncols * sizeof(ray_op_t*)); + if (!col_ops) { + if (nearest_handle_owned) ray_release(nearest_handle_owned); + ray_sys_free(nearest_query_owned); + ray_graph_free(g); ray_release(tbl); + return ray_error("oom", NULL); + } + int nc = 0; + bool scan_err = false; + for (int64_t c = 0; c < src_ncols; c++) { + int64_t name_id = ray_table_col_name(tbl, c); + ray_t* s = ray_sym_str(name_id); + if (!s) continue; + ray_op_t* scan_op = ray_scan(g, ray_str_ptr(s)); + if (!scan_op) { scan_err = true; break; } + col_ops[nc++] = scan_op; + } + if (scan_err) { + ray_sys_free(col_ops); + if (nearest_handle_owned) ray_release(nearest_handle_owned); + ray_sys_free(nearest_query_owned); + ray_graph_free(g); ray_release(tbl); + return ray_error("oom", NULL); + } + root = ray_select(g, root, col_ops, (uint8_t)nc); + ray_sys_free(col_ops); + if (!root) { + if (nearest_handle_owned) ray_release(nearest_handle_owned); + ray_sys_free(nearest_query_owned); + ray_graph_free(g); ray_release(tbl); + return ray_error("oom", NULL); + } + } + } + } + + /* GROUP BY */ + if (by_expr) { + /* Resolve a "single key" sym id when by_expr is either a + * scalar -RAY_SYM name or a single-element RAY_SYM vector. + * The eval_group branch and several downstream sites used to + * read `by_expr->i64` directly, which is garbage when by_expr + * is a vector — use by_key_sym instead. */ + int64_t by_key_sym = -1; + if (by_expr->type == -RAY_SYM && (by_expr->attrs & RAY_ATTR_NAME)) + by_key_sym = by_expr->i64; + else if (by_expr->type == RAY_SYM && ray_len(by_expr) == 1) + by_key_sym = ((int64_t*)ray_data(by_expr))[0]; + + /* Detect non-aggregate expressions before routing so we can + * decide whether GUID keys go to the DAG HT path or fall back + * to eval-level. */ + int any_nonagg = 0; + if (n_out > 0) { + for (int64_t i = 0; i + 1 < dict_n; i += 2) { + int64_t kid = dict_elems[i]->i64; + if (kid == from_id || kid == where_id || kid == by_id || + kid == take_id || kid == asc_id || kid == desc_id) continue; + if (!is_agg_expr(dict_elems[i + 1])) { any_nonagg = 1; break; } + } + } + + /* Decide routing. LIST/STR always fall to the eval-level + * grouping because the DAG HT path can't pack them into + * 8-byte key slots. GUID is packed via row-indirection in + * the HT layout (wide_key_mask), so it uses the parallel DAG + * path *except* for queries with non-aggregate expressions + * (the non-agg scatter still requires 8-byte-packable key + * reads through its KEY_READ macro). */ + int use_eval_group = 0; + if (by_key_sym >= 0) { + ray_t* key_col = ray_table_get_col(tbl, by_key_sym); + if (key_col) { + int8_t kct = key_col->type; + if (RAY_IS_PARTED(kct)) kct = (int8_t)RAY_PARTED_BASETYPE(kct); + if (kct == RAY_LIST || kct == RAY_STR) + use_eval_group = 1; + else if (kct == RAY_GUID && (any_nonagg || n_out == 0)) + /* RAY_GUID routes to eval-level ray_group_fn only + * for (a) non-agg expression queries (existing + * behavior) and (b) the "no output columns" form + * `(select {from: t by: guid})` which otherwise + * lands in the DAG no-agg-no-nonagg branch whose + * first-occurrence scanner is O(N × n_groups) and + * truncates wide keys to 8 bytes via ray_read_sym. + * Pure-agg group-bys with GUID keys still take the + * DAG path (exec_group handles wide keys correctly + * and stays parallel / segment-streamed on parted + * tables). */ + use_eval_group = 1; + } + } + /* Non-aggregation expressions (arithmetic, lambda, etc.) are + * handled post-DAG: aggs go through the parallel GROUP pipeline, + * then non-agg results are evaluated on the full table and + * scattered per-group into LIST columns. The scatter block + * only handles single scalar-key by-clauses — for multi-key + * or computed-key groupings, fall back to eval-level so the + * non-agg scatter has a well-defined row→group mapping. */ + if (!use_eval_group && any_nonagg) { + /* Fast path requires a single scalar-named key column. + * Multi-key and computed-key by-clauses with non-agg + * expressions are not yet supported. */ + int single_scalar_key = 0; + if (by_expr->type == -RAY_SYM && (by_expr->attrs & RAY_ATTR_NAME)) { + single_scalar_key = 1; + } else if (by_expr->type == RAY_SYM && ray_len(by_expr) == 1) { + single_scalar_key = 1; + } + if (!single_scalar_key) { + ray_graph_free(g); ray_release(tbl); + return ray_error("nyi", "non-agg expression with multi-key or computed group key"); + } + } + if (use_eval_group) { + /* Apply WHERE filter first (if any), then eval-level groupby */ + ray_t* eval_tbl = tbl; + if (where_expr) { + root = ray_optimize(g, root); + ray_t* fres = ray_execute(g, root); + ray_graph_free(g); g = NULL; + if (!fres || RAY_IS_ERR(fres)) { ray_release(tbl); return fres ? fres : ray_error("domain", NULL); } + if (ray_is_lazy(fres)) fres = ray_lazy_materialize(fres); + if (!fres || RAY_IS_ERR(fres)) { ray_release(tbl); return fres ? fres : ray_error("domain", NULL); } + eval_tbl = fres; + } else { + ray_graph_free(g); g = NULL; + } + /* eval_group path supports only simple scalar / [col] by-forms; + * multi-key and computed keys shouldn't land here. */ + if (by_key_sym < 0) { + if (eval_tbl != tbl) ray_release(eval_tbl); + ray_release(tbl); + return ray_error("nyi", "eval-level groupby requires scalar key"); + } + ray_t* key_col = ray_table_get_col(eval_tbl, by_key_sym); + + /* Fast path: (select {from: t by: k}) with no aggs and + * no non-agg expressions — we only need first-of-group + * for each non-key column, not full per-group index + * lists. Scan the key column once, record the first + * row index of each distinct key in a hash table, then + * gather that index list from every other column. This + * avoids ray_group_fn's per-group ray_vec_append churn + * which dominated the cost on 10M-row / 1M-group + * workloads. */ + if (n_out == 0 && key_col && key_col->type == RAY_GUID) { + int64_t n = key_col->len; + const uint8_t* kb = (const uint8_t*)ray_data(key_col); + uint32_t cap = 64; + while ((uint64_t)cap < (uint64_t)n * 2 && cap < (1u << 28)) cap <<= 1; + uint32_t mask = cap - 1; + ray_t* ht_hdr = ray_alloc((size_t)cap * sizeof(uint32_t)); + if (!ht_hdr) { if (eval_tbl != tbl) ray_release(eval_tbl); ray_release(tbl); return ray_error("oom", NULL); } + uint32_t* ht = (uint32_t*)ray_data(ht_hdr); + memset(ht, 0xFF, (size_t)cap * sizeof(uint32_t)); + + int64_t fi_cap = n < 1024 ? 1024 : (n < (1 << 20) ? n : (1 << 20)); + if (fi_cap < 256) fi_cap = 256; + ray_t* fi_hdr = ray_alloc((size_t)fi_cap * sizeof(int64_t)); + if (!fi_hdr) { ray_free(ht_hdr); if (eval_tbl != tbl) ray_release(eval_tbl); ray_release(tbl); return ray_error("oom", NULL); } + int64_t* fi = (int64_t*)ray_data(fi_hdr); + int64_t ngroups = 0; + + for (int64_t i = 0; i < n; i++) { + if ((i & 65535) == 0) { + if (ray_interrupted()) { + ray_free(fi_hdr); + ray_free(ht_hdr); + if (eval_tbl != tbl) ray_release(eval_tbl); + ray_release(tbl); + return ray_error("cancel", "interrupted"); + } + ray_progress_update("select", "by: first-of-group", + (uint64_t)i, (uint64_t)n); + } + const uint8_t* cur = kb + (size_t)i * 16; + uint64_t h; memcpy(&h, cur, 8); h ^= h >> 33; h *= 0xff51afd7ed558ccdULL; + uint32_t slot = (uint32_t)(h & mask); + uint32_t gi = UINT32_MAX; + while (ht[slot] != UINT32_MAX) { + uint32_t cand = ht[slot]; + if (memcmp(kb + (size_t)fi[cand] * 16, cur, 16) == 0) { gi = cand; break; } + slot = (slot + 1) & mask; + } + if (gi == UINT32_MAX) { + if (ngroups >= fi_cap) { + int64_t new_cap = fi_cap * 2; + ray_t* new_hdr = ray_alloc((size_t)new_cap * sizeof(int64_t)); + if (!new_hdr) { ray_free(fi_hdr); ray_free(ht_hdr); if (eval_tbl != tbl) ray_release(eval_tbl); ray_release(tbl); return ray_error("oom", NULL); } + memcpy(ray_data(new_hdr), fi, (size_t)ngroups * sizeof(int64_t)); + ray_free(fi_hdr); + fi_hdr = new_hdr; + fi = (int64_t*)ray_data(fi_hdr); + fi_cap = new_cap; + } + fi[ngroups] = i; + ht[slot] = (uint32_t)ngroups; + ngroups++; + } + } + ray_free(ht_hdr); + + /* Build result table: key column first (gathered from + * the original at fi[]), then every other column the + * same way. Allocation failures and width mismatches + * must propagate — partial results silently dropping + * columns would be a correctness bug. */ + int64_t nc_src = ray_table_ncols(eval_tbl); + ray_t* res = ray_table_new(nc_src); + ray_t* first_err = NULL; + if (!res || RAY_IS_ERR(res)) { + first_err = res && RAY_IS_ERR(res) ? res : ray_error("oom", NULL); + res = NULL; + goto fog_cleanup; + } + + for (int64_t pass = 0; pass < nc_src + 1 && !first_err; pass++) { + int64_t cn; + if (pass == 0) cn = by_key_sym; + else { + cn = ray_table_col_name(eval_tbl, pass - 1); + if (cn == by_key_sym) continue; + } + ray_t* sc = ray_table_get_col(eval_tbl, cn); + if (!sc) continue; + ray_t* dst = NULL; + int8_t sct = sc->type; + if (RAY_IS_PARTED(sct)) sct = (int8_t)RAY_PARTED_BASETYPE(sct); + + if (sct == RAY_STR) { + dst = ray_vec_new(RAY_STR, ngroups); + for (int64_t gi = 0; gi < ngroups && dst && !RAY_IS_ERR(dst); gi++) { + size_t slen = 0; + const char* sp = ray_str_vec_get(sc, fi[gi], &slen); + dst = ray_str_vec_append(dst, sp ? sp : "", sp ? slen : 0); + } + } else if (sct == RAY_LIST) { + dst = ray_list_new((int32_t)ngroups); + if (dst && !RAY_IS_ERR(dst)) { + ray_t** sitems = (ray_t**)ray_data(sc); + ray_t** dout = (ray_t**)ray_data(dst); + for (int64_t gi = 0; gi < ngroups; gi++) { + dout[gi] = sitems[fi[gi]]; + ray_retain(dout[gi]); + } + dst->len = ngroups; + } + } else if (sct == RAY_SYM) { + /* Preserve the source sym-width from attrs so + * narrow sym columns (1/2/4-byte indices) + * memcpy the same esz on both sides. */ + dst = ray_sym_vec_new(sc->attrs & RAY_SYM_W_MASK, ngroups); + if (dst && !RAY_IS_ERR(dst)) { + dst->len = ngroups; + uint8_t esz = ray_sym_elem_size(sct, dst->attrs); + const char* sb = (const char*)ray_data(sc); + char* db = (char*)ray_data(dst); + bool src_has_nulls = (sc->attrs & RAY_ATTR_HAS_NULLS) != 0; + for (int64_t gi = 0; gi < ngroups; gi++) { + memcpy(db + (size_t)gi * esz, + sb + (size_t)fi[gi] * esz, esz); + if (src_has_nulls && ray_vec_is_null(sc, fi[gi])) + ray_vec_set_null(dst, gi, true); + } + } + } else { + dst = ray_vec_new(sct, ngroups); + if (dst && !RAY_IS_ERR(dst)) { + dst->len = ngroups; + uint8_t esz = ray_sym_elem_size(sct, sc->attrs); + const char* sb = (const char*)ray_data(sc); + char* db = (char*)ray_data(dst); + bool src_has_nulls = (sc->attrs & RAY_ATTR_HAS_NULLS) != 0; + for (int64_t gi = 0; gi < ngroups; gi++) { + memcpy(db + (size_t)gi * esz, + sb + (size_t)fi[gi] * esz, esz); + if (src_has_nulls && ray_vec_is_null(sc, fi[gi])) + ray_vec_set_null(dst, gi, true); + } + } + } + + if (!dst || RAY_IS_ERR(dst)) { + first_err = (dst && RAY_IS_ERR(dst)) ? dst : ray_error("oom", NULL); + if (dst && !RAY_IS_ERR(dst)) ray_release(dst); + break; + } + res = ray_table_add_col(res, cn, dst); + ray_release(dst); + if (RAY_IS_ERR(res)) { first_err = res; res = NULL; break; } + } + + fog_cleanup: + ray_free(fi_hdr); + if (eval_tbl != tbl) ray_release(eval_tbl); + ray_release(tbl); + if (first_err) { + if (res) ray_release(res); + return first_err; + } + return apply_sort_take(res, dict_elems, dict_n, asc_id, desc_id, take_id); + } + + ray_t* groups_dict = ray_group_fn(key_col); + if (RAY_IS_ERR(groups_dict)) { if (eval_tbl != tbl) ray_release(eval_tbl); ray_release(tbl); return groups_dict; } + /* Flatten the dict into the legacy [k0,v0,…] interleaved LIST + * representation that the rest of this branch was written for. */ + ray_t* groups = groups_to_pair_list(groups_dict); + ray_release(groups_dict); + if (RAY_IS_ERR(groups)) { if (eval_tbl != tbl) ray_release(eval_tbl); ray_release(tbl); return groups; } + + int64_t gn = ray_len(groups); + int64_t n_groups = gn / 2; + + /* Empty groups with no explicit aggs: return empty table with full schema */ + if (n_groups == 0 && n_out == 0) { + ray_release(groups); + int64_t nc0 = ray_table_ncols(eval_tbl); + ray_t* empty = ray_table_new(nc0); + if (!RAY_IS_ERR(empty)) { + /* Key column first */ + { ray_t* sc = ray_table_get_col(eval_tbl, by_key_sym); + if (sc) { + ray_t* ev = ray_vec_new(sc->type, 0); + if (ev && !RAY_IS_ERR(ev)) { empty = ray_table_add_col(empty, by_key_sym, ev); ray_release(ev); } + } + } + for (int64_t c = 0; c < nc0; c++) { + int64_t cn = ray_table_col_name(eval_tbl, c); + if (cn == by_key_sym) continue; + ray_t* sc = ray_table_get_col_idx(eval_tbl, c); + ray_t* ev = (sc->type == RAY_STR) ? ray_vec_new(RAY_STR, 0) : + (sc->type == RAY_LIST) ? ray_list_new(0) : + ray_vec_new(sc->type, 0); + if (ev && !RAY_IS_ERR(ev)) { empty = ray_table_add_col(empty, cn, ev); ray_release(ev); } + } + } + if (eval_tbl != tbl) ray_release(eval_tbl); + ray_release(tbl); + return empty; + } + + /* Collect aggregation results */ + int n_agg_out = 0; + int64_t agg_names[16]; + ray_t* agg_results[16]; + for (int64_t i = 0; i + 1 < dict_n && n_agg_out < 16; i += 2) { + int64_t kid = dict_elems[i]->i64; + if (kid == from_id || kid == where_id || kid == by_id || kid == take_id || kid == asc_id || kid == desc_id) continue; + ray_t* val_expr_item = dict_elems[i + 1]; + + if (is_aggr_unary_call(val_expr_item)) { + /* Streaming-style per-group AGG branch. Accepts both + * the resolve_agg_opcode whitelist (sum/avg/min/max/...) + * and the broader RAY_FN_AGGR + RAY_UNARY set + * (med/dev/var/stddev/...) — for the eval-fallback path + * the only thing the body needs is a unary fn pointer + * to call directly with the per-group slice. */ + ray_t** agg_elems = (ray_t**)ray_data(val_expr_item); + ray_t* agg_fn_name = agg_elems[0]; + ray_t* agg_col_expr = agg_elems[1]; + + /* Resolve source column from filtered table */ + ray_t* src_col_val = NULL; + if (agg_col_expr->type == -RAY_SYM && (agg_col_expr->attrs & RAY_ATTR_NAME)) { + src_col_val = ray_table_get_col(eval_tbl, agg_col_expr->i64); + if (src_col_val) ray_retain(src_col_val); + } + if (!src_col_val) { + src_col_val = ray_eval(agg_col_expr); + if (RAY_IS_ERR(src_col_val)) { + for (int ai = 0; ai < n_agg_out; ai++) { if (agg_results[ai]) ray_release(agg_results[ai]); } + ray_release(groups); if (eval_tbl != tbl) ray_release(eval_tbl); ray_release(tbl); return src_col_val; + } + } + + /* For each group, compute aggregation */ + ray_t* agg_vec = NULL; + ray_t** grp_items = (ray_t**)ray_data(groups); + for (int64_t gi = 0; gi < n_groups; gi++) { + ray_t* idx_list = grp_items[gi * 2 + 1]; + ray_t* subset = ray_at_fn(src_col_val, idx_list); + if (RAY_IS_ERR(subset)) continue; + ray_t* agg_val = NULL; + ray_t* fn_obj = ray_env_get(agg_fn_name->i64); + if (fn_obj && fn_obj->type == RAY_UNARY) { + ray_unary_fn uf = (ray_unary_fn)(uintptr_t)fn_obj->i64; + agg_val = uf(subset); + } + ray_release(subset); + if (!agg_val || RAY_IS_ERR(agg_val)) continue; + + if (!agg_vec) { + int8_t vt = -(agg_val->type); + agg_vec = ray_vec_new(vt, n_groups); + if (RAY_IS_ERR(agg_vec)) { ray_release(agg_val); break; } + agg_vec->len = n_groups; + } + store_typed_elem(agg_vec, gi, agg_val); + ray_release(agg_val); + } + ray_release(src_col_val); + agg_names[n_agg_out] = kid; + agg_results[n_agg_out] = agg_vec; + n_agg_out++; + } else { + /* Non-aggregation expression: evaluate on full table, + * then gather per-group subsets into a LIST column + * (non-agg produces list-of-vectors). */ + if (ray_env_push_scope() != RAY_OK) { + for (int ai = 0; ai < n_agg_out; ai++) { if (agg_results[ai]) ray_release(agg_results[ai]); } + ray_release(groups); if (eval_tbl != tbl) ray_release(eval_tbl); ray_release(tbl); + return ray_error("oom", NULL); + } + expr_bind_table_names(val_expr_item, eval_tbl); + ray_t* full_val = ray_eval(val_expr_item); + ray_env_pop_scope(); + if (RAY_IS_ERR(full_val)) { + for (int ai = 0; ai < n_agg_out; ai++) { if (agg_results[ai]) ray_release(agg_results[ai]); } + ray_release(groups); if (eval_tbl != tbl) ray_release(eval_tbl); ray_release(tbl); return full_val; + } + + /* Build LIST column: pre-allocate, then gather per group. + * Direct pointer assignment avoids ray_list_append overhead. */ + ray_t* list_col = ray_alloc(n_groups * sizeof(ray_t*)); + if (!list_col || RAY_IS_ERR(list_col)) { + ray_release(full_val); + for (int ai = 0; ai < n_agg_out; ai++) { if (agg_results[ai]) ray_release(agg_results[ai]); } + ray_release(groups); if (eval_tbl != tbl) ray_release(eval_tbl); ray_release(tbl); + return ray_error("oom", NULL); + } + list_col->type = RAY_LIST; + /* Track filled length incrementally — see the DAG + * scatter above for rationale (no memset, exact + * cleanup via v->len walk in ray_release). */ + list_col->len = 0; + ray_t** list_out = (ray_t**)ray_data(list_col); + + /* Decide per-group disposition of full_val: + * - expression references a column → result must + * be row-aligned; a typed-vec or LIST whose len + * matches eval_tbl's nrows → gather, otherwise + * that's a genuine bug and we error out. + * - expression is constant (no column refs) → + * broadcast as-is to every group cell. */ + int64_t eval_nrows = ray_table_nrows(eval_tbl); + int refs_column = expr_refs_row_column(val_expr_item, eval_tbl); + int is_indexable = + ray_is_vec(full_val) || full_val->type == RAY_LIST; + int full_is_row_aligned = + is_indexable && full_val->len == eval_nrows; + + if (refs_column && !full_is_row_aligned) { + /* Non-streaming aggregation fallback: the full-table + * eval didn't produce a row-aligned shape (e.g. a + * user lambda returned a scalar from a vector arg), + * so collect per-group and post-apply the expression + * to each group's slice. Each cell can be any shape; + * homogeneous-scalar cells collapse to a typed vec. */ + ray_release(full_val); + ray_release(list_col); /* len=0, walks nothing */ + ray_t* per_group = nonagg_eval_per_group( + val_expr_item, eval_tbl, groups, n_groups); + if (RAY_IS_ERR(per_group)) { + for (int ai = 0; ai < n_agg_out; ai++) { if (agg_results[ai]) ray_release(agg_results[ai]); } + ray_release(groups); if (eval_tbl != tbl) ray_release(eval_tbl); ray_release(tbl); + return per_group; + } + /* core produces typed vec or list as appropriate */ + agg_names[n_agg_out] = kid; + agg_results[n_agg_out] = per_group; + n_agg_out++; + continue; + } + + ray_t** gi_items = (ray_t**)ray_data(groups); + for (int64_t gi = 0; gi < n_groups; gi++) { + ray_t* idx_list = gi_items[gi * 2 + 1]; + ray_t* cell; + if (full_is_row_aligned) { + cell = gather_by_idx(full_val, + (int64_t*)ray_data(idx_list), idx_list->len); + } else { + /* Pure constant (no column refs) → broadcast */ + ray_retain(full_val); + cell = full_val; + } + list_out[gi] = cell; + list_col->len = gi + 1; /* commit slot */ + } + ray_release(full_val); + agg_names[n_agg_out] = kid; + agg_results[n_agg_out] = list_col; + n_agg_out++; + } + } + + /* Build result table: key column + aggregation columns */ + ray_t* result = ray_table_new(1 + n_agg_out); + if (RAY_IS_ERR(result)) { ray_release(groups); if (eval_tbl != tbl) ray_release(eval_tbl); ray_release(tbl); return result; } + + /* Key column: build a typed vector matching the source column type */ + ray_t** grp_items = (ray_t**)ray_data(groups); + ray_t* key_col_src = ray_table_get_col(eval_tbl, by_key_sym); + { + int8_t ktype = key_col_src ? key_col_src->type : RAY_I64; + if (RAY_IS_PARTED(ktype)) ktype = (int8_t)RAY_PARTED_BASETYPE(ktype); + ray_t* key_vec; + if (ktype == RAY_STR) { + key_vec = ray_vec_new(RAY_STR, n_groups); + for (int64_t gi = 0; gi < n_groups && key_vec && !RAY_IS_ERR(key_vec); gi++) { + ray_t* k = grp_items[gi * 2]; + const char* sp = ray_str_ptr(k); + size_t slen = ray_str_len(k); + key_vec = ray_str_vec_append(key_vec, sp ? sp : "", sp ? slen : 0); + } + } else { + uint8_t kattrs = key_col_src ? key_col_src->attrs : 0; + if (ktype == RAY_SYM) + key_vec = ray_sym_vec_new(kattrs & RAY_SYM_W_MASK, n_groups); + else + key_vec = ray_vec_new(ktype, n_groups); + if (key_vec && !RAY_IS_ERR(key_vec)) { + key_vec->len = n_groups; + /* Zero-fill data region so skipped GUID/null slots are safe */ + memset(ray_data(key_vec), 0, (size_t)n_groups * ray_sym_elem_size(ktype, key_vec->attrs)); + for (int64_t gi = 0; gi < n_groups; gi++) + store_typed_elem(key_vec, gi, grp_items[gi * 2]); + } + } + if (!key_vec || RAY_IS_ERR(key_vec)) { + for (int i = 0; i < n_agg_out; i++) { if (agg_results[i]) ray_release(agg_results[i]); } + ray_release(result); ray_release(groups); if (eval_tbl != tbl) ray_release(eval_tbl); ray_release(tbl); + return key_vec ? key_vec : ray_error("oom", NULL); + } + result = ray_table_add_col(result, by_key_sym, key_vec); + ray_release(key_vec); + } + + for (int i = 0; i < n_agg_out; i++) { + if (agg_results[i]) + result = ray_table_add_col(result, agg_names[i], agg_results[i]); + if (agg_results[i]) ray_release(agg_results[i]); + } + + /* No explicit aggs: gather first-of-group for all non-key columns */ + if (n_agg_out == 0 && n_groups > 0) { + ray_t** gi_items = (ray_t**)ray_data(groups); + /* Collect first index per group */ + int64_t fi_stack[256]; + ray_t* fi_hdr = NULL; + int64_t* fi = (n_groups <= 256) ? fi_stack : NULL; + if (!fi) { + fi_hdr = ray_alloc((size_t)n_groups * sizeof(int64_t)); + if (!fi_hdr) { ray_release(result); ray_release(groups); if (eval_tbl != tbl) ray_release(eval_tbl); ray_release(tbl); return ray_error("oom", NULL); } + fi = (int64_t*)ray_data(fi_hdr); + } + for (int64_t gi = 0; gi < n_groups; gi++) { + ray_t* il = gi_items[gi * 2 + 1]; + int a = 0; ray_t* i0 = collection_elem(il, 0, &a); + fi[gi] = as_i64(i0); + if (a) ray_release(i0); + } + int64_t nc = ray_table_ncols(eval_tbl); + for (int64_t c = 0; c < nc && !RAY_IS_ERR(result); c++) { + int64_t cn = ray_table_col_name(eval_tbl, c); + if (cn == by_key_sym) continue; + ray_t* sc = ray_table_get_col_idx(eval_tbl, c); + ray_t* dst = NULL; + if (sc->type == RAY_STR) { + dst = ray_vec_new(RAY_STR, n_groups); + bool src_has_nulls = (sc->attrs & RAY_ATTR_HAS_NULLS) != 0; + for (int64_t gi = 0; gi < n_groups && dst && !RAY_IS_ERR(dst); gi++) { + if (src_has_nulls && ray_vec_is_null(sc, fi[gi])) { + dst = ray_str_vec_append(dst, "", 0); + if (dst && !RAY_IS_ERR(dst)) + ray_vec_set_null(dst, dst->len - 1, true); + } else { + size_t slen = 0; + const char* sp = ray_str_vec_get(sc, fi[gi], &slen); + dst = ray_str_vec_append(dst, sp ? sp : "", sp ? slen : 0); + } + } + } else if (sc->type == RAY_LIST) { + dst = ray_alloc(n_groups * sizeof(ray_t*)); + if (dst) { + dst->type = RAY_LIST; dst->len = n_groups; + ray_t** dout = (ray_t**)ray_data(dst); + ray_t** sitems = (ray_t**)ray_data(sc); + for (int64_t gi = 0; gi < n_groups; gi++) { dout[gi] = sitems[fi[gi]]; ray_retain(dout[gi]); } + } + } else { + dst = ray_vec_new(sc->type, n_groups); + if (dst && !RAY_IS_ERR(dst)) { + /* len BEFORE the loop: store_typed_elem's null + * path routes through ray_vec_set_null which + * silently drops out-of-range writes — post- + * loop assignment would lose the null bit on + * every nullable row in this gather. */ + dst->len = n_groups; + for (int64_t gi = 0; gi < n_groups; gi++) { + int a = 0; ray_t* v = collection_elem(sc, fi[gi], &a); + store_typed_elem(dst, gi, v); + if (a) ray_release(v); + } + } + } + if (!dst || RAY_IS_ERR(dst)) { + if (dst) ray_release(dst); + ray_release(result); + result = ray_error("oom", NULL); + break; + } + result = ray_table_add_col(result, cn, dst); + ray_release(dst); + } + if (fi_hdr) ray_free(fi_hdr); + } + + ray_release(groups); + if (eval_tbl != tbl) ray_release(eval_tbl); + ray_release(tbl); + return apply_sort_take(result, dict_elems, dict_n, asc_id, desc_id, take_id); + } + + /* Pre-scan: any non-aggregation expressions? If so and there's a + * WHERE, we must materialize the filtered table first so the + * post-DAG scatter evaluates on filtered data (matching agg semantics). */ + int has_nonagg = 0; + for (int64_t i = 0; i + 1 < dict_n; i += 2) { + int64_t kid = dict_elems[i]->i64; + if (kid == from_id || kid == where_id || kid == by_id || + kid == take_id || kid == asc_id || kid == desc_id) continue; + if (!is_agg_expr(dict_elems[i + 1])) { has_nonagg = 1; break; } + } + + /* The post-DAG scatter needs a flat single-segment table: it + * reads key columns directly and runs ray_eval over the whole + * input. Detect parted tables up front — if the source is + * parted and there's no WHERE to materialize it, return nyi. */ + int table_is_parted = 0; + if (has_nonagg) { + int64_t ncols = ray_table_ncols(tbl); + for (int64_t c = 0; c < ncols; c++) { + ray_t* col = ray_table_get_col_idx(tbl, c); + if (col && RAY_IS_PARTED(col->type)) { table_is_parted = 1; break; } + } + if (table_is_parted && !where_expr) { + ray_graph_free(g); ray_release(tbl); + return ray_error("nyi", "non-agg expression on parted table without WHERE"); + } + } + + /* WHERE + BY handling. Two paths: + * + * (A) Fused path — applicable when there are no non-agg + * output expressions and the source table is flat + * (not parted). Execute the filter node in-place + * via exec_node; OP_FILTER on a TABLE input installs + * a lazy RAY_SEL bitmap on g->selection and returns + * the original uncompacted table. The subsequent + * ray_group call builds its own key/agg scans over + * g->table, and exec_group honours g->selection in + * the radix / DA / sequential paths — so no rows are + * materialized twice. This is the fast path for + * `select ... by ... where` queries. + * + * (B) Materialize path — applicable when (A) is not. + * Pre-execute the filter and flatten into a new + * table, then rebuild the graph. Needed because + * the non-agg scatter runs ray_eval over a flat + * single-segment table, and parted tables need + * segment-level flattening before group anyway. + * + * (This also fixes a pre-existing WHERE-vs-by bug: any + * WHERE clause on a `select ... by` query was silently + * ignored before the filter was wired through the group + * pipeline.) */ + if (where_expr) { + bool can_fuse = !has_nonagg && !table_is_parted; + if (can_fuse) { + root = ray_optimize(g, root); + /* exec_node populates g->selection as a side effect + * of OP_FILTER on a table input, and returns the + * uncompacted table (== g->table). Discard the + * result — we only needed the side effect. */ + ray_t* fres = exec_node(g, root); + if (!fres || RAY_IS_ERR(fres)) { + if (g->selection) { + ray_release(g->selection); + g->selection = NULL; + } + ray_graph_free(g); ray_release(tbl); + return fres ? fres : ray_error("domain", NULL); + } + /* OP_CONST/OP_FILTER both retain, so the returned + * table has an extra refcount we must release. + * g->table still owns tbl via the graph, so this + * only drops the exec-node-side retain. */ + ray_release(fres); + } else { + root = ray_optimize(g, root); + ray_t* fres = ray_execute(g, root); + ray_graph_free(g); g = NULL; + if (!fres || RAY_IS_ERR(fres)) { ray_release(tbl); return fres ? fres : ray_error("domain", NULL); } + if (ray_is_lazy(fres)) fres = ray_lazy_materialize(fres); + if (!fres || RAY_IS_ERR(fres)) { ray_release(tbl); return fres ? fres : ray_error("domain", NULL); } + ray_release(tbl); + tbl = fres; + g = ray_graph_new(tbl); + if (!g) { ray_release(tbl); return ray_error("oom", NULL); } + root = ray_const_table(g, tbl); + } + } + + /* Compile group key(s) */ + ray_op_t* key_ops[16]; + uint8_t n_keys = 0; + + if (by_expr->type == RAY_SYM) { + /* Multiple keys as SYM vector: [col1 col2 ...] */ + int64_t nk = ray_len(by_expr); + int64_t* sym_ids = (int64_t*)ray_data(by_expr); + for (int64_t i = 0; i < nk && n_keys < 16; i++) { + ray_t* name_str = ray_sym_str(sym_ids[i]); + if (!name_str) { ray_graph_free(g); ray_release(tbl); return ray_error("domain", NULL); } + key_ops[n_keys] = ray_scan(g, ray_str_ptr(name_str)); + if (!key_ops[n_keys]) { ray_graph_free(g); ray_release(tbl); return ray_error("domain", NULL); } + n_keys++; + } + } else { + /* Single key expression */ + key_ops[0] = compile_expr_dag(g, by_expr); + if (!key_ops[0]) { ray_graph_free(g); ray_release(tbl); return ray_error("domain", NULL); } + n_keys = 1; + } + + /* Collect aggregation expressions from output columns. + * Non-agg expressions are tracked separately for post-DAG scatter. */ + uint16_t agg_ops[16]; + ray_op_t* agg_ins[16]; + uint8_t n_aggs = 0; + + for (int64_t i = 0; i + 1 < dict_n; i += 2) { + int64_t kid = dict_elems[i]->i64; + if (kid == from_id || kid == where_id || kid == by_id || kid == take_id || kid == asc_id || kid == desc_id) continue; + + ray_t* val_expr = dict_elems[i + 1]; + if (is_agg_expr(val_expr) && n_aggs < 16) { + ray_t** agg_elems = (ray_t**)ray_data(val_expr); + agg_ops[n_aggs] = resolve_agg_opcode(agg_elems[0]->i64); + /* Compile the aggregation input (the column reference) */ + agg_ins[n_aggs] = compile_expr_dag(g, agg_elems[1]); + if (!agg_ins[n_aggs]) { ray_graph_free(g); ray_release(tbl); return ray_error("domain", NULL); } + n_aggs++; + } else if (!is_agg_expr(val_expr) && n_nonaggs < 16) { + nonagg_names[n_nonaggs] = kid; + nonagg_exprs[n_nonaggs] = val_expr; + n_nonaggs++; + } + } + + if (n_aggs > 0 || n_nonaggs > 0) { + if (n_aggs > 0) { + root = ray_group(g, key_ops, n_keys, agg_ops, agg_ins, n_aggs); + } else { + /* No aggs but non-agg expressions exist — still need group + * boundaries. Use GROUP+COUNT on the key to get group keys. + * The count column will be dropped after execution. */ + uint16_t cnt_op = OP_COUNT; + ray_op_t* cnt_in = key_ops[0]; + root = ray_group(g, key_ops, n_keys, &cnt_op, &cnt_in, 1); + synth_count_col = 1; + } + } else { + /* No explicit aggregations — apply WHERE filter first (if any), + * then use DAG GROUP+COUNT for fast hash-parallel group boundaries, + * then gather first-of-group from the filtered table. */ + ray_t* filtered_tbl = tbl; + if (where_expr) { + root = ray_optimize(g, root); + ray_t* fres = ray_execute(g, root); + ray_graph_free(g); g = NULL; + if (!fres || RAY_IS_ERR(fres)) { ray_release(tbl); return fres ? fres : ray_error("domain", NULL); } + if (ray_is_lazy(fres)) fres = ray_lazy_materialize(fres); + if (!fres || RAY_IS_ERR(fres)) { ray_release(tbl); return fres ? fres : ray_error("domain", NULL); } + filtered_tbl = fres; + /* Rebuild graph on filtered table for GROUP+COUNT */ + g = ray_graph_new(filtered_tbl); + if (!g) { if (filtered_tbl != tbl) ray_release(filtered_tbl); ray_release(tbl); return ray_error("oom", NULL); } + n_keys = 0; + if (by_expr->type == RAY_SYM) { + int64_t nk = ray_len(by_expr); + int64_t* sym_ids = (int64_t*)ray_data(by_expr); + for (int64_t i = 0; i < nk && n_keys < 16; i++) { + ray_t* ns = ray_sym_str(sym_ids[i]); + if (ns) key_ops[n_keys++] = ray_scan(g, ray_str_ptr(ns)); + } + } else { + key_ops[0] = compile_expr_dag(g, by_expr); + if (key_ops[0]) n_keys = 1; + } + } + + uint16_t cnt_op = OP_COUNT; + ray_op_t* cnt_in = key_ops[0]; + root = ray_group(g, key_ops, n_keys, &cnt_op, &cnt_in, 1); + root = ray_optimize(g, root); + ray_t* grouped = ray_execute(g, root); + ray_graph_free(g); g = NULL; + if (!grouped || RAY_IS_ERR(grouped)) { if (filtered_tbl != tbl) ray_release(filtered_tbl); ray_release(tbl); return grouped; } + if (ray_is_lazy(grouped)) grouped = ray_lazy_materialize(grouped); + + int64_t n_groups = ray_table_nrows(grouped); + + /* Resolve key column sym early — needed for empty result schema. + * A dotted name like `Timestamp.date` compiles to a scan + trunc + * chain, not a direct column lookup, so it must land in the + * computed-key fallback path below (key_sym stays -1). Otherwise + * downstream `ray_table_get_col(filtered_tbl, key_sym)` would + * return NULL for the non-existent "Timestamp.date" column and + * the subsequent deref would crash. */ + int64_t key_sym = -1; + if (by_expr->type == -RAY_SYM && (by_expr->attrs & RAY_ATTR_NAME) + && !ray_sym_is_dotted(by_expr->i64)) + key_sym = by_expr->i64; + else if (by_expr->type == RAY_SYM && ray_len(by_expr) == 1) + key_sym = ((int64_t*)ray_data(by_expr))[0]; + + if (n_groups == 0) { + ray_release(grouped); + int64_t nc0 = ray_table_ncols(filtered_tbl); + ray_t* empty = ray_table_new(nc0 + 1); + if (!RAY_IS_ERR(empty)) { + /* Key column. For a plain/column key, key_sym + * names a real source column and we mirror its + * type. For a computed key (dotted, xbar, ...) + * we evaluate by_expr against the filtered (empty) + * table to learn the key's type and name without + * duplicating schema derivation logic. */ + int64_t empty_key_name = key_sym; + ray_t* empty_key_vec = NULL; + if (key_sym >= 0) { + ray_t* sc = ray_table_get_col(filtered_tbl, key_sym); + if (sc) { + empty_key_vec = (sc->type == RAY_STR) + ? ray_vec_new(RAY_STR, 0) + : ray_vec_new(sc->type, 0); + } + } else { + /* Match the computed-key fallback's naming + * rules (dotted tail / last name arg) and + * collision handling. */ + int64_t ck_name = -1; + int64_t ck_full = -1; + int64_t ck_head = -1; + if (by_expr->type == -RAY_SYM && (by_expr->attrs & RAY_ATTR_NAME)) { + ck_full = by_expr->i64; + if (ray_sym_is_dotted(by_expr->i64)) { + const int64_t* segs; + int nsegs = ray_sym_segs(by_expr->i64, &segs); + if (nsegs > 0) { ck_name = segs[nsegs - 1]; ck_head = segs[0]; } + } else { + ck_name = by_expr->i64; + } + } else if (by_expr->type == RAY_LIST && by_expr->len >= 2) { + ray_t** be = (ray_t**)ray_data(by_expr); + for (int64_t i = by_expr->len - 1; i >= 1; i--) { + if (be[i]->type == -RAY_SYM && (be[i]->attrs & RAY_ATTR_NAME)) { + ck_name = be[i]->i64; + break; + } + } + } + if (ck_name < 0) ck_name = ray_sym_intern("key", 3); + if (ck_head >= 0 && ck_full >= 0 && ck_name != ck_full) { + for (int64_t c = 0; c < nc0; c++) { + int64_t cn = ray_table_col_name(filtered_tbl, c); + if (cn == ck_name && cn != ck_head) { + ck_name = ck_full; + break; + } + } + } + empty_key_name = ck_name; + + /* Evaluate by_expr against the (empty) filtered table + * to get a length-0 key vector typed like the + * non-empty path would produce it. */ + ray_env_push_scope(); + for (int64_t c = 0; c < nc0; c++) { + ray_env_set_local(ray_table_col_name(filtered_tbl, c), + ray_table_get_col_idx(filtered_tbl, c)); + } + ray_t* ck_vec = ray_eval(by_expr); + ray_env_pop_scope(); + if (ck_vec && !RAY_IS_ERR(ck_vec) && ray_is_vec(ck_vec)) { + int8_t kt = ck_vec->type; + empty_key_vec = (kt == RAY_STR) + ? ray_vec_new(RAY_STR, 0) + : (kt == RAY_LIST) + ? ray_list_new(0) + : ray_vec_new(kt, 0); + } + if (ck_vec && !RAY_IS_ERR(ck_vec)) ray_release(ck_vec); + } + if (empty_key_vec && !RAY_IS_ERR(empty_key_vec)) { + empty = ray_table_add_col(empty, empty_key_name, empty_key_vec); + ray_release(empty_key_vec); + } + + for (int64_t c = 0; c < nc0; c++) { + int64_t cn = ray_table_col_name(filtered_tbl, c); + if (cn == empty_key_name) continue; + ray_t* sc = ray_table_get_col_idx(filtered_tbl, c); + ray_t* ev = (sc->type == RAY_STR) ? ray_vec_new(RAY_STR, 0) : + (sc->type == RAY_LIST) ? ray_list_new(0) : + ray_vec_new(sc->type, 0); + if (!RAY_IS_ERR(ev)) { empty = ray_table_add_col(empty, cn, ev); ray_release(ev); } + } + } + if (filtered_tbl != tbl) ray_release(filtered_tbl); + ray_release(tbl); + return empty; + } + + /* Build first_idx: scan filtered key column once, record first + * occurrence of each group key value. */ + if (key_sym < 0) { + /* Computed group key (e.g., xbar) — fall back to eval-level groupby */ + ray_release(grouped); + int64_t tbl_ncols = ray_table_ncols(filtered_tbl); + ray_env_push_scope(); + for (int64_t c = 0; c < tbl_ncols; c++) { + int64_t cn = ray_table_col_name(filtered_tbl, c); + ray_t* cv = ray_table_get_col_idx(filtered_tbl, c); + ray_env_set_local(cn, cv); + } + ray_t* computed_key = ray_eval(by_expr); + ray_env_pop_scope(); + if (!computed_key || RAY_IS_ERR(computed_key)) { + if (filtered_tbl != tbl) ray_release(filtered_tbl); + ray_release(tbl); + return computed_key ? computed_key : ray_error("domain", NULL); + } + ray_t* groups2_dict = ray_group_fn(computed_key); + if (!groups2_dict || RAY_IS_ERR(groups2_dict)) { + ray_release(computed_key); + if (filtered_tbl != tbl) ray_release(filtered_tbl); + ray_release(tbl); + return groups2_dict ? groups2_dict : ray_error("domain", NULL); + } + ray_t* groups2 = groups_to_pair_list(groups2_dict); + ray_release(groups2_dict); + if (RAY_IS_ERR(groups2)) { + ray_release(computed_key); + if (filtered_tbl != tbl) ray_release(filtered_tbl); + ray_release(tbl); + return groups2; + } + int64_t ng2 = ray_len(groups2) / 2; + if (ng2 == 0) { ray_release(groups2); ray_release(computed_key); if (filtered_tbl != tbl) ray_release(filtered_tbl); ray_release(tbl); return ray_table_new(0); } + ray_t** gi2 = (ray_t**)ray_data(groups2); + + /* fi2 must sweep EVERY group, not just the first 256 — + * the downstream result-column loops iterate up to ng2 + * and indexed reads beyond a fixed-size stack slot would + * pick up uninitialised bytes. Stack-fast for small + * group counts, heap-fallback once we need more. */ + int64_t fi2_stack[256]; + ray_t* fi2_hdr = NULL; + int64_t* fi2 = fi2_stack; + if (ng2 > 256) { + fi2_hdr = ray_alloc((size_t)ng2 * sizeof(int64_t)); + if (!fi2_hdr) { + ray_release(groups2); ray_release(computed_key); + if (filtered_tbl != tbl) ray_release(filtered_tbl); + ray_release(tbl); + return ray_error("oom", NULL); + } + fi2 = (int64_t*)ray_data(fi2_hdr); + } + for (int64_t g2 = 0; g2 < ng2; g2++) { + int alloc2 = 0; + ray_t* i02 = collection_elem(gi2[g2 * 2 + 1], 0, &alloc2); + fi2[g2] = as_i64(i02); + if (alloc2) ray_release(i02); + } + /* Name for the synthesized key column: + * - dotted sym `a.b.c` → tail segment (`c`) so `Timestamp.ss` + * surfaces as an `ss` column (pretty in the common case). + * If the tail collides with an *unrelated* source column + * (not the head of the dotted path), fall back to the + * full dotted name so we don't silently drop real data. + * - list expr `(xbar N col)` / `(+ col 1)` → last name-typed + * argument, so the transform's output deliberately + * replaces the source column (matches xbar convention). + * - fall back to an interned "key" if nothing more specific + * can be derived. */ + int64_t ckey_name = -1; + int64_t ckey_full = -1; /* full dotted sym, for collision fallback */ + int64_t ckey_head = -1; /* head segment of dotted expr (input column) */ + if (by_expr->type == -RAY_SYM && (by_expr->attrs & RAY_ATTR_NAME)) { + ckey_full = by_expr->i64; + if (ray_sym_is_dotted(by_expr->i64)) { + const int64_t* segs; + int nsegs = ray_sym_segs(by_expr->i64, &segs); + if (nsegs > 0) { + ckey_name = segs[nsegs - 1]; + ckey_head = segs[0]; + } + } else { + ckey_name = by_expr->i64; + } + } else if (by_expr->type == RAY_LIST && by_expr->len >= 2) { + ray_t** be = (ray_t**)ray_data(by_expr); + for (int64_t i = by_expr->len - 1; i >= 1; i--) { + if (be[i]->type == -RAY_SYM && (be[i]->attrs & RAY_ATTR_NAME)) { + ckey_name = be[i]->i64; + break; + } + } + } + if (ckey_name < 0) ckey_name = ray_sym_intern("key", 3); + + /* Collision check for dotted tail: if the tail name matches + * a source column that isn't the head of the dotted expr, + * the old code silently dropped that source column from the + * result. Promote to the full dotted sym so both stay. */ + if (ckey_head >= 0 && ckey_full >= 0 && ckey_name != ckey_full) { + for (int64_t c = 0; c < tbl_ncols; c++) { + int64_t cn = ray_table_col_name(filtered_tbl, c); + if (cn == ckey_name && cn != ckey_head) { + ckey_name = ckey_full; + break; + } + } + } + + ray_t* res2 = ray_table_new(tbl_ncols + 1); + /* Key column: computed_key's first-of-group values, which + * are the distinct grouping-key values surfaced to the + * user. Using the source column at fi2 indices would lose + * the transform (e.g. raw Timestamp instead of its `.ss`). */ + if (ray_is_vec(computed_key)) { + ray_t* kv = ray_vec_new(computed_key->type, ng2); + if (!RAY_IS_ERR(kv)) { + /* len BEFORE store loop — ray_vec_set_null (called + * by store_typed_elem for null atoms) range-checks + * idx against vec->len and silently no-ops + * otherwise. */ + kv->len = ng2; + for (int64_t g2 = 0; g2 < ng2; g2++) { + int a2 = 0; + ray_t* v2 = collection_elem(computed_key, fi2[g2], &a2); + store_typed_elem(kv, g2, v2); + if (a2) ray_release(v2); + } + res2 = ray_table_add_col(res2, ckey_name, kv); + ray_release(kv); + } + } + for (int64_t c = 0; c < tbl_ncols; c++) { + int64_t cn = ray_table_col_name(filtered_tbl, c); + /* Avoid duplicating a column name already used by the + * key: e.g. `by: Timestamp` (plain, non-dotted) would + * collide with the source Timestamp column. */ + if (cn == ckey_name) continue; + ray_t* sc = ray_table_get_col_idx(filtered_tbl, c); + ray_t* dc = ray_vec_new(sc->type, ng2); + dc->len = ng2; /* see note above — hoisted for null bits */ + for (int64_t g2 = 0; g2 < ng2; g2++) { int a2 = 0; ray_t* v2 = collection_elem(sc, fi2[g2], &a2); store_typed_elem(dc, g2, v2); if (a2) ray_release(v2); } + res2 = ray_table_add_col(res2, cn, dc); ray_release(dc); + } + if (fi2_hdr) ray_free(fi2_hdr); + ray_release(groups2); ray_release(computed_key); + if (filtered_tbl != tbl) ray_release(filtered_tbl); + ray_release(tbl); + return res2; + } + + ray_t* orig_key_col = ray_table_get_col(filtered_tbl, key_sym); + int64_t nrows_orig = orig_key_col ? orig_key_col->len : 0; + + /* Read group key values from grouped table BEFORE releasing it. + * grp_key_col points into grouped — must not access after release. */ + ray_t* grp_key_col = ray_table_get_col(grouped, key_sym); + int8_t kt = orig_key_col ? orig_key_col->type : 0; + + /* Heap-allocate gk_vals when n_groups > 256 */ + int64_t gk_stack[256]; + ray_t* gk_heap_hdr = NULL; + int64_t* gk_vals = gk_stack; + if (n_groups > 256) { + gk_heap_hdr = ray_alloc((size_t)n_groups * sizeof(int64_t)); + if (!gk_heap_hdr) { ray_release(grouped); if (filtered_tbl != tbl) ray_release(filtered_tbl); ray_release(tbl); return ray_error("oom", NULL); } + gk_vals = (int64_t*)ray_data(gk_heap_hdr); + } + + /* Copy group key values while grouped is still alive. + * STR/LIST/GUID keys are routed through eval-level fallback + * above, so only integer-like types reach here. Use + * read_col_i64 for non-F64 types — it dispatches on the + * column type (I32/I16/I8/BOOL/SYM adaptive width etc.), + * whereas ray_read_sym interprets `attrs` as SYM width and + * silently truncates to 1 byte for plain integer columns + * where attrs doesn't carry width bits. + * + * We also record a per-group null flag. The DAG GROUP path + * stores null keys with value=0 and differentiates via a + * null mask — if we hashed raw bits only, a null group would + * collide with non-null value 0 (for I64 / I32 / SYM / DATE + * / TIME etc.) or with +0.0 for F64 (ray_hash_f64 normalises + * -0.0 to +0.0, and F64's null bit pattern on this platform + * is the -0.0 pattern). The null flag keeps those groups + * distinct. */ + uint8_t gk_null_stack[256]; + ray_t* gk_null_hdr = NULL; + uint8_t* gk_null = gk_null_stack; + if (n_groups > 256) { + gk_null_hdr = ray_alloc((size_t)n_groups * sizeof(uint8_t)); + if (!gk_null_hdr) { + if (gk_heap_hdr) ray_free(gk_heap_hdr); + ray_release(grouped); + if (filtered_tbl != tbl) ray_release(filtered_tbl); + ray_release(tbl); + return ray_error("oom", NULL); + } + gk_null = (uint8_t*)ray_data(gk_null_hdr); + } + memset(gk_null, 0, (size_t)n_groups * sizeof(uint8_t)); + + if (grp_key_col) { + bool gk_has_nulls = (grp_key_col->attrs & RAY_ATTR_HAS_NULLS) != 0; + for (int64_t gi = 0; gi < n_groups; gi++) { + if (kt == RAY_F64) + memcpy(&gk_vals[gi], &((double*)ray_data(grp_key_col))[gi], 8); + else + gk_vals[gi] = read_col_i64(ray_data(grp_key_col), gi, kt, grp_key_col->attrs); + if (gk_has_nulls && ray_vec_is_null(grp_key_col, gi)) + gk_null[gi] = 1; + } + } + ray_release(grouped); /* grp_key_col is now invalid */ + + /* Allocate first_idx */ + int64_t first_idx_stack[256]; + ray_t* fi_heap_hdr = NULL; + int64_t* first_idx = first_idx_stack; + if (n_groups > 256) { + fi_heap_hdr = ray_alloc((size_t)n_groups * sizeof(int64_t)); + if (!fi_heap_hdr) { if (gk_heap_hdr) ray_free(gk_heap_hdr); if (filtered_tbl != tbl) ray_release(filtered_tbl); ray_release(tbl); return ray_error("oom", NULL); } + first_idx = (int64_t*)ray_data(fi_heap_hdr); + } + + /* Build {key_bits -> group_index} hash table from gk_vals so the + * scan below is O(nrows_orig + n_groups) instead of + * O(nrows_orig * n_groups). Without this a 1M-row / 1M-group + * float-key grouping hangs for tens of seconds — I64 has a + * low-cardinality direct-array fast path upstream, but F64 + * and other non-GUID scalar keys fall through to this scan. */ + for (int64_t gi = 0; gi < n_groups; gi++) first_idx[gi] = -1; + { + uint32_t fi_cap = 64; + while ((uint64_t)fi_cap < (uint64_t)n_groups * 2 && fi_cap < (1u << 30)) + fi_cap <<= 1; + uint32_t fi_mask = fi_cap - 1; + ray_t* fi_ht_hdr = ray_alloc((size_t)fi_cap * sizeof(uint32_t)); + if (!fi_ht_hdr) { + if (gk_heap_hdr) ray_free(gk_heap_hdr); + if (fi_heap_hdr) ray_free(fi_heap_hdr); + if (filtered_tbl != tbl) ray_release(filtered_tbl); + ray_release(tbl); + return ray_error("oom", NULL); + } + uint32_t* fi_ht = (uint32_t*)ray_data(fi_ht_hdr); + memset(fi_ht, 0xFF, (size_t)fi_cap * sizeof(uint32_t)); + + /* Insert every group key into the HT keyed by bit pattern. + * For F64 keys, hash via the float path; memcpy bit pattern + * out of gk_vals to dodge strict-aliasing. Null groups + * get a distinct hash so they don't collide with zero-valued + * groups (F64 null has the -0.0 bit pattern, which + * ray_hash_f64 normalises to +0.0; integer-flavoured + * nulls are stored as value=0). */ + for (int64_t gi = 0; gi < n_groups; gi++) { + uint64_t h; + if (gk_null[gi]) { + h = ray_hash_i64((int64_t)0xDEADBEEFCAFEBABEULL); + } else if (kt == RAY_F64) { + double dv; + memcpy(&dv, &gk_vals[gi], 8); + h = ray_hash_f64(dv); + } else { + h = ray_hash_i64(gk_vals[gi]); + } + uint32_t slot = (uint32_t)(h & fi_mask); + while (fi_ht[slot] != UINT32_MAX) slot = (slot + 1) & fi_mask; + fi_ht[slot] = (uint32_t)gi; + } + + /* Single linear scan of the source column; for each row + * hash-lookup its group index and record the first row + * that maps to it. Terminate early once every group has + * a first-row. */ + bool orig_nulls_flag = orig_key_col + && (orig_key_col->attrs & RAY_ATTR_HAS_NULLS) != 0; + int64_t found = 0; + for (int64_t r = 0; r < nrows_orig && found < n_groups; r++) { + bool r_null = orig_nulls_flag && ray_vec_is_null(orig_key_col, r); + int64_t ov; + if (kt == RAY_F64) memcpy(&ov, &((double*)ray_data(orig_key_col))[r], 8); + else ov = read_col_i64(ray_data(orig_key_col), r, kt, orig_key_col->attrs); + uint64_t h; + if (r_null) { + h = ray_hash_i64((int64_t)0xDEADBEEFCAFEBABEULL); + } else if (kt == RAY_F64) { + double dv; + memcpy(&dv, &ov, 8); + h = ray_hash_f64(dv); + } else { + h = ray_hash_i64(ov); + } + uint32_t slot = (uint32_t)(h & fi_mask); + while (fi_ht[slot] != UINT32_MAX) { + uint32_t cand = fi_ht[slot]; + bool match = (r_null && gk_null[cand]) + || (!r_null && !gk_null[cand] && gk_vals[cand] == ov); + if (match) { + if (first_idx[cand] < 0) { + first_idx[cand] = r; + found++; + } + break; + } + slot = (slot + 1) & fi_mask; + } + } + ray_free(fi_ht_hdr); + } + if (gk_null_hdr) ray_free(gk_null_hdr); + if (gk_heap_hdr) ray_free(gk_heap_hdr); + + /* Now build the result table using first_idx gathered above. + * key_sym and n_groups are already set. */ + + /* Build result table: key column first, then others */ + int64_t ncols = ray_table_ncols(filtered_tbl); + ray_t* result = ray_table_new(ncols); + if (RAY_IS_ERR(result)) { if (fi_heap_hdr) ray_free(fi_heap_hdr); ray_release(tbl); return result; } + + /* Add key column first */ + ray_t* key_vec_src = ray_table_get_col(filtered_tbl, key_sym); + if (key_vec_src->type == RAY_STR) { + ray_t* key_vec_dst = ray_vec_new(RAY_STR, n_groups); + if (!key_vec_dst || RAY_IS_ERR(key_vec_dst)) { if (fi_heap_hdr) ray_free(fi_heap_hdr); ray_release(tbl); ray_release(result); return key_vec_dst ? key_vec_dst : ray_error("oom", NULL); } + for (int64_t gi = 0; gi < n_groups; gi++) { + size_t slen = 0; + const char* sp = ray_str_vec_get(key_vec_src, first_idx[gi], &slen); + key_vec_dst = ray_str_vec_append(key_vec_dst, sp ? sp : "", sp ? slen : 0); + if (RAY_IS_ERR(key_vec_dst)) { if (fi_heap_hdr) ray_free(fi_heap_hdr); ray_release(tbl); ray_release(result); return key_vec_dst; } + } + result = ray_table_add_col(result, key_sym, key_vec_dst); + ray_release(key_vec_dst); + } else { + ray_t* key_vec_dst = ray_vec_new(key_vec_src->type, n_groups); + if (RAY_IS_ERR(key_vec_dst)) { if (fi_heap_hdr) ray_free(fi_heap_hdr); ray_release(tbl); ray_release(result); return key_vec_dst; } + /* Set len BEFORE the store loop: store_typed_elem routes + * null atoms through ray_vec_set_null, which range-checks + * idx against vec->len and silently returns RAY_ERR_RANGE + * otherwise. Postponing len=n_groups until after the loop + * therefore dropped the null bit on every nullable key row + * — the result would read back the raw zero/-0.0 bits with + * no HAS_NULLS flag, corrupting the grouped key column. */ + key_vec_dst->len = n_groups; + for (int64_t gi = 0; gi < n_groups; gi++) { + int alloc = 0; + ray_t* val = collection_elem(key_vec_src, first_idx[gi], &alloc); + store_typed_elem(key_vec_dst, gi, val); + if (alloc) ray_release(val); + } + result = ray_table_add_col(result, key_sym, key_vec_dst); + ray_release(key_vec_dst); + } + + /* Add non-key columns */ + for (int64_t c = 0; c < ncols; c++) { + int64_t col_name = ray_table_col_name(filtered_tbl, c); + if (col_name == key_sym) continue; + ray_t* src_col = ray_table_get_col_idx(filtered_tbl, c); + int8_t ct = src_col->type; + + if (ct == RAY_STR) { + /* String column: build STR vector */ + ray_t* dst = ray_vec_new(RAY_STR, n_groups); + if (!dst || RAY_IS_ERR(dst)) { if (fi_heap_hdr) ray_free(fi_heap_hdr); ray_release(tbl); ray_release(result); return dst ? dst : ray_error("oom", NULL); } + for (int64_t gi = 0; gi < n_groups; gi++) { + size_t slen = 0; + const char* sp = ray_str_vec_get(src_col, first_idx[gi], &slen); + dst = ray_str_vec_append(dst, sp ? sp : "", sp ? slen : 0); + if (RAY_IS_ERR(dst)) { if (fi_heap_hdr) ray_free(fi_heap_hdr); ray_release(tbl); ray_release(result); return dst; } + } + result = ray_table_add_col(result, col_name, dst); + ray_release(dst); + } else if (ct == RAY_LIST) { + /* List column: pick items */ + ray_t* dst = ray_alloc(n_groups * sizeof(ray_t*)); + if (!dst) { if (fi_heap_hdr) ray_free(fi_heap_hdr); ray_release(tbl); ray_release(result); return ray_error("oom", NULL); } + dst->type = RAY_LIST; + dst->len = n_groups; + ray_t** dout = (ray_t**)ray_data(dst); + ray_t** src_items = (ray_t**)ray_data(src_col); + for (int64_t gi = 0; gi < n_groups; gi++) { + dout[gi] = src_items[first_idx[gi]]; + ray_retain(dout[gi]); + } + result = ray_table_add_col(result, col_name, dst); + ray_release(dst); + } else { + /* Typed vector: copy elements at first indices. + * len must be set before the store loop so null bits + * propagate through store_typed_elem → ray_vec_set_null + * (same reason as the key column above). */ + ray_t* dst = ray_vec_new(ct, n_groups); + if (RAY_IS_ERR(dst)) { if (fi_heap_hdr) ray_free(fi_heap_hdr); ray_release(tbl); ray_release(result); return dst; } + dst->len = n_groups; + for (int64_t gi = 0; gi < n_groups; gi++) { + int alloc = 0; + ray_t* val = collection_elem(src_col, first_idx[gi], &alloc); + store_typed_elem(dst, gi, val); + if (alloc) ray_release(val); + } + result = ray_table_add_col(result, col_name, dst); + ray_release(dst); + } + if (RAY_IS_ERR(result)) { if (fi_heap_hdr) ray_free(fi_heap_hdr); ray_release(tbl); return result; } + } + + if (fi_heap_hdr) ray_free(fi_heap_hdr); + if (filtered_tbl != tbl) ray_release(filtered_tbl); + ray_release(tbl); + return apply_sort_take(result, dict_elems, dict_n, asc_id, desc_id, take_id); + } + } else if (n_out > 0) { + /* Projection only (no group by) — select specific columns */ + ray_op_t* col_ops[16]; + uint8_t nc = 0; + for (int64_t i = 0; i + 1 < dict_n; i += 2) { + int64_t kid = dict_elems[i]->i64; + if (kid == from_id || kid == where_id || kid == by_id || kid == take_id || kid == asc_id || kid == desc_id || kid == nearest_id) continue; + if (nc < 16) { + col_ops[nc] = compile_expr_dag(g, dict_elems[i + 1]); + if (!col_ops[nc]) { + /* Nearest-path resources must be freed here too — the + * rerank handle/query buffers are held across the whole + * ray_select_fn body, not just inside the nearest block. */ + if (nearest_handle_owned) ray_release(nearest_handle_owned); + if (nearest_query_owned) ray_sys_free(nearest_query_owned); + ray_graph_free(g); ray_release(tbl); + return ray_error("domain", NULL); + } + nc++; + } + } + root = ray_select(g, root, col_ops, nc); + } + + /* Sort: collect asc/desc columns in dict iteration order. + * Only add to the DAG when there's no group-by — group-by changes the + * output schema, so sort on output columns must happen post-execution. + * Values are unevaluated — a SYM atom is a column name, a SYM vector + * is multiple column names. No ray_eval needed. */ + if (has_sort && !by_expr) { + ray_op_t* sort_keys[16]; + uint8_t sort_descs[16]; + uint8_t n_sort = 0; + for (int64_t i = 0; i + 1 < dict_n && n_sort < 16; i += 2) { + int64_t kid = dict_elems[i]->i64; + uint8_t is_desc = 0; + if (kid == asc_id) is_desc = 0; + else if (kid == desc_id) is_desc = 1; + else continue; + ray_t* val = dict_elems[i + 1]; + if (val->type == -RAY_SYM) { + /* Single column name */ + ray_t* s = ray_sym_str(val->i64); + sort_keys[n_sort] = ray_scan(g, ray_str_ptr(s)); + sort_descs[n_sort] = is_desc; + n_sort++; + } else if (ray_is_vec(val) && val->type == RAY_SYM) { + /* Multiple column names */ + for (int64_t c = 0; c < val->len && n_sort < 16; c++) { + int64_t sid = ray_read_sym(ray_data(val), c, val->type, val->attrs); + ray_t* s = ray_sym_str(sid); + sort_keys[n_sort] = ray_scan(g, ray_str_ptr(s)); + sort_descs[n_sort] = is_desc; + n_sort++; + } + } else { + ray_graph_free(g); ray_release(tbl); + return ray_error("domain", NULL); + } + } + if (n_sort > 0) + root = ray_sort_op(g, root, sort_keys, sort_descs, NULL, n_sort); + } + + /* Take: add to DAG only when no group-by and no nearest (rerank + * absorbs the take into its k parameter). */ + ray_t* take_range = NULL; + if (take_expr && !by_expr && !nearest_expr) { + ray_t* tv = ray_eval(take_expr); + if (!tv || RAY_IS_ERR(tv)) { ray_graph_free(g); ray_release(tbl); return tv ? tv : ray_error("domain", NULL); } + if (ray_is_atom(tv) && (tv->type == -RAY_I64 || tv->type == -RAY_I32)) { + int64_t n_take = (tv->type == -RAY_I64) ? tv->i64 : tv->i32; + ray_release(tv); + if (n_take >= 0) + root = ray_head(g, root, n_take); + else + root = ray_tail(g, root, -n_take); + } else if (ray_is_vec(tv) && (tv->type == RAY_I64 || tv->type == RAY_I32) && tv->len == 2) { + take_range = tv; /* apply after DAG execution */ + } else { + ray_release(tv); + ray_graph_free(g); ray_release(tbl); + return ray_error("domain", NULL); + } + } + + /* Optimize and execute */ + root = ray_optimize(g, root); + ray_t* result = ray_execute(g, root); + + ray_graph_free(g); + /* The nearest-query buffer was only referenced by ext->rerank.query_vec + * and is safe to free once the graph (and thus the op ext) is gone. */ + if (nearest_query_owned) ray_sys_free(nearest_query_owned); + /* The HNSW handle was kept alive through ray_execute so the rerank + * ext's idx pointer stayed valid. Safe to release now that the + * graph (and its ext nodes) has been freed. */ + if (nearest_handle_owned) ray_release(nearest_handle_owned); + + /* Post-process: range take [start count] applied after execution */ + if (take_range && result && !RAY_IS_ERR(result)) { + ray_t* sliced = ray_take_fn(result, take_range); + ray_release(result); + ray_release(take_range); + result = sliced; + } else if (take_range) { + ray_release(take_range); + } + + /* Post-process: reorder GROUP BY BOOL results to match first-occurrence + * order in the original table (exec.c radix sort puts false before true) */ + if (by_expr && result && !RAY_IS_ERR(result) && result->type == RAY_TABLE) { + if (ray_is_lazy(result)) result = ray_lazy_materialize(result); + if (result && !RAY_IS_ERR(result) && result->type == RAY_TABLE) { + ray_t* key_col = ray_table_get_col_idx(result, 0); + if (key_col && key_col->type == RAY_BOOL && key_col->len >= 2) { + /* Find first-occurrence order of bool values in original + * table. Accept both scalar `-RAY_SYM` and single-element + * `RAY_SYM` vector forms. */ + int64_t by_sym = -1; + if (by_expr->type == -RAY_SYM) + by_sym = by_expr->i64; + else if (by_expr->type == RAY_SYM && ray_len(by_expr) == 1) + by_sym = ((int64_t*)ray_data(by_expr))[0]; + ray_t* orig_key = (by_sym >= 0) ? ray_table_get_col(tbl, by_sym) : NULL; + if (orig_key && orig_key->type == RAY_BOOL && orig_key->len > 0) { + bool first_val = ((bool*)ray_data(orig_key))[0]; + bool result_first = ((bool*)ray_data(key_col))[0]; + if (first_val != result_first) { + /* Swap rows: reverse row order in all columns */ + int64_t nrows_r = ray_table_nrows(result); + int64_t ncols_r = ray_table_ncols(result); + ray_t* reordered = ray_table_new((int32_t)ncols_r); + if (reordered && !RAY_IS_ERR(reordered)) { + int ok = 1; + for (int64_t c = 0; c < ncols_r && ok; c++) { + int64_t cn = ray_table_col_name(result, c); + ray_t* col = ray_table_get_col_idx(result, c); + int esz = ray_elem_size(col->type); + ray_t* new_col = ray_vec_new(col->type, nrows_r); + if (RAY_IS_ERR(new_col)) { ok = 0; break; } + new_col->len = nrows_r; + char* src = (char*)ray_data(col); + char* dst = (char*)ray_data(new_col); + bool has_nulls = (col->attrs & RAY_ATTR_HAS_NULLS) != 0; + for (int64_t r = 0; r < nrows_r; r++) { + memcpy(dst + r * esz, src + (nrows_r - 1 - r) * esz, esz); + if (has_nulls && ray_vec_is_null(col, nrows_r - 1 - r)) + ray_vec_set_null(new_col, r, true); + } + reordered = ray_table_add_col(reordered, cn, new_col); + ray_release(new_col); + if (RAY_IS_ERR(reordered)) { ok = 0; break; } + } + if (ok) { + ray_release(result); + result = reordered; + } else if (reordered && !RAY_IS_ERR(reordered)) { + ray_release(reordered); + } + } + } + } + } + } + } + + /* Drop the synthesized COUNT column (used only to get group + * boundaries when n_aggs == 0 && n_nonaggs > 0). Must happen + * before the rename/sort_take steps so they don't see a phantom + * column. */ + if (synth_count_col && by_expr && result && !RAY_IS_ERR(result)) { + if (ray_is_lazy(result)) result = ray_lazy_materialize(result); + if (result && !RAY_IS_ERR(result) && result->type == RAY_TABLE) { + int64_t nc = ray_table_ncols(result); + if (nc >= 1) { + ray_t* rebuilt = ray_table_new(nc - 1); + if (rebuilt && !RAY_IS_ERR(rebuilt)) { + for (int64_t c = 0; c < nc - 1; c++) { + int64_t cn = ray_table_col_name(result, c); + ray_t* col = ray_table_get_col_idx(result, c); + rebuilt = ray_table_add_col(rebuilt, cn, col); + } + ray_release(result); + result = rebuilt; + } + } + } + } + + /* NOTE: tbl is released below AFTER the non-agg scatter, which + * runs post-rename and post-sort_take so LIST columns do not + * flow through the scalar-only apply_sort_take DAG. */ + + /* Rename output columns if user specified names */ + if (result && !RAY_IS_ERR(result) && n_out > 0) { + /* Materialize lazy results if needed */ + if (ray_is_lazy(result)) result = ray_lazy_materialize(result); + } + if (result && !RAY_IS_ERR(result) && result->type == RAY_TABLE && n_out > 0) { + ray_t* schema = ray_table_schema(result); + if (schema && !RAY_IS_ERR(schema) && schema->type > 0 && schema->type < RAY_TYPE_COUNT) { + int64_t ncols = schema->len; + /* Count key columns in by clause */ + int n_key_cols = 0; + if (by_expr) { + if (ray_is_vec(by_expr) && by_expr->type == RAY_SYM) n_key_cols = (int)ray_len(by_expr); + else n_key_cols = 1; + } + /* Collect user-defined output column names. + * For group-by, the result layout is [keys, aggs..., nonaggs...]. + * Non-agg columns were added by the post-DAG scatter block + * with correct names already — only agg columns need renaming, + * in dict-iteration order of the agg entries. */ + int64_t agg_user_names[16]; + int64_t all_user_names[16]; + int n_agg_user = 0; + int n_all_user = 0; + for (int64_t i = 0; i + 1 < dict_n; i += 2) { + int64_t kid = dict_elems[i]->i64; + if (kid == from_id || kid == where_id || kid == by_id || + kid == take_id || kid == asc_id || kid == desc_id) continue; + if (n_all_user < 16) all_user_names[n_all_user++] = kid; + if (by_expr && !is_agg_expr(dict_elems[i + 1])) continue; + if (n_agg_user < 16) agg_user_names[n_agg_user++] = kid; + } + if (by_expr) { + /* Rename only the agg columns (positions after keys). + * Non-agg LIST columns were named at scatter time. */ + for (int j = 0; j < n_agg_user && n_key_cols + j < ncols; j++) + ray_table_set_col_name(result, n_key_cols + j, agg_user_names[j]); + } else { + /* Projection-only: columns are in dict order */ + for (int j = 0; j < n_all_user && n_key_cols + j < ncols; j++) + ray_table_set_col_name(result, n_key_cols + j, all_user_names[j]); + } + } + } + + /* Post-process: scatter non-agg expressions into LIST columns. + * Must run BEFORE apply_sort_take so the sort clause can + * reference non-agg output columns (and so the take clause + * slices the fully-populated result). apply_sort_take handles + * LIST columns in the result table (same path used by the + * eval_group branch). + * + * Reads group keys from the DAG result and builds row→group_id + * against the original tbl. */ + if (n_nonaggs > 0 && by_expr && result && !RAY_IS_ERR(result)) { + if (ray_is_lazy(result)) result = ray_lazy_materialize(result); + if (result && !RAY_IS_ERR(result) && result->type == RAY_TABLE) { + int64_t n_groups = ray_table_nrows(result); + + /* Resolve key sym — gated to single scalar key above. */ + int64_t ks = -1; + if (by_expr->type == -RAY_SYM && (by_expr->attrs & RAY_ATTR_NAME)) + ks = by_expr->i64; + else if (by_expr->type == RAY_SYM && ray_len(by_expr) == 1) + ks = ((int64_t*)ray_data(by_expr))[0]; + + if (ks < 0) { + ray_release(result); ray_release(tbl); + return ray_error("domain", NULL); + } + + ray_t* orig_key = ray_table_get_col(tbl, ks); + ray_t* grp_key = ray_table_get_col(result, ks); + int64_t nrows = orig_key ? orig_key->len : 0; + + if (!orig_key || !grp_key) { + ray_release(result); ray_release(tbl); + return ray_error("domain", NULL); + } + + if (n_groups > 0 && nrows > 0) { + int8_t okt = orig_key->type; + int8_t gkt = grp_key->type; + if (RAY_IS_PARTED(okt)) okt = (int8_t)RAY_PARTED_BASETYPE(okt); + if (RAY_IS_PARTED(gkt)) gkt = (int8_t)RAY_PARTED_BASETYPE(gkt); + + /* Type-aware key element reader. Normalizes any + * comparable scalar key into an int64_t so linear + * scans can use equality. For floats we bitcast so + * NaN and -0/+0 match the DAG's hash-equality. */ + #define KEY_READ(dst, vec, base_type, idx) do { \ + const void* _d = ray_data(vec); \ + switch (base_type) { \ + case RAY_BOOL: \ + case RAY_U8: (dst) = ((const uint8_t* )_d)[idx]; break; \ + case RAY_I16: (dst) = ((const int16_t* )_d)[idx]; break; \ + case RAY_I32: (dst) = ((const int32_t* )_d)[idx]; break; \ + case RAY_I64: (dst) = ((const int64_t* )_d)[idx]; break; \ + case RAY_F32: { uint32_t _u; \ + memcpy(&_u, &((const float*)_d)[idx], 4); \ + (dst) = (int64_t)_u; break; } \ + case RAY_F64: { int64_t _u; \ + memcpy(&_u, &((const double*)_d)[idx], 8); \ + (dst) = _u; break; } \ + case RAY_DATE: case RAY_TIME: \ + (dst) = ((const int32_t*)_d)[idx]; break; \ + case RAY_TIMESTAMP: \ + (dst) = ((const int64_t*)_d)[idx]; break; \ + case RAY_SYM: \ + (dst) = ray_read_sym(_d, (idx), (base_type), \ + (vec)->attrs); break; \ + default: { \ + /* Unsupported key type: signal via sentinel so the \ + * caller's type-mismatch guard catches it. Should \ + * not actually reach here because okt == gkt is \ + * checked above and only known types pass. */ \ + (dst) = 0; break; \ + } \ + } \ + } while (0) + + /* Whitelist of key types supported by KEY_READ. Any + * other type (LIST, STR, GUID, unknown) must error out — + * otherwise KEY_READ silently returns 0 and collapses + * all rows into a single (wrong) group. LIST/STR/GUID + * are already routed through use_eval_group earlier; + * this is the last-line defense for future additions. */ + int key_supported = + (okt == RAY_BOOL || okt == RAY_U8 || + okt == RAY_I16 || okt == RAY_I32 || okt == RAY_I64 || + okt == RAY_F32 || okt == RAY_F64 || + okt == RAY_DATE || okt == RAY_TIME || okt == RAY_TIMESTAMP || + okt == RAY_SYM); + if (!key_supported) { + ray_release(result); ray_release(tbl); + return ray_error("nyi", "non-agg scatter: unsupported group key type"); + } + + /* The DAG group result key column must have a base + * type comparable to the input. If types differ + * unexpectedly, fall back to error rather than mis- + * compare. */ + if (okt != gkt) { + ray_release(result); ray_release(tbl); + return ray_error("type", "group key type mismatch"); + } + + /* Allocations — any failure errors out rather than + * silently returning partial results. */ + ray_t* gk_hdr = ray_alloc((size_t)n_groups * sizeof(int64_t)); + ray_t* rg_hdr = ray_alloc((size_t)nrows * sizeof(int64_t)); + ray_t* cnt_hdr = ray_alloc((size_t)n_groups * sizeof(int64_t)); + ray_t* off_hdr = ray_alloc((size_t)n_groups * sizeof(int64_t)); + ray_t* pos_hdr = ray_alloc((size_t)n_groups * sizeof(int64_t)); + if (!gk_hdr || !rg_hdr || !cnt_hdr || !off_hdr || !pos_hdr) { + if (gk_hdr) ray_free(gk_hdr); + if (rg_hdr) ray_free(rg_hdr); + if (cnt_hdr) ray_free(cnt_hdr); + if (off_hdr) ray_free(off_hdr); + if (pos_hdr) ray_free(pos_hdr); + ray_release(result); ray_release(tbl); + return ray_error("oom", NULL); + } + int64_t* gk = (int64_t*)ray_data(gk_hdr); + int64_t* row_gid = (int64_t*)ray_data(rg_hdr); + int64_t* grp_cnt = (int64_t*)ray_data(cnt_hdr); + int64_t* offsets = (int64_t*)ray_data(off_hdr); + int64_t* pos = (int64_t*)ray_data(pos_hdr); + + /* Copy group key values from the (possibly sliced) result */ + for (int64_t gi = 0; gi < n_groups; gi++) + KEY_READ(gk[gi], grp_key, gkt, gi); + + /* Build row→group_id map. Rows whose key isn't in the + * surviving group set get row_gid = -1 and are skipped. */ + for (int64_t r = 0; r < nrows; r++) { + int64_t rv; + KEY_READ(rv, orig_key, okt, r); + row_gid[r] = -1; + for (int64_t gi = 0; gi < n_groups; gi++) { + if (rv == gk[gi]) { row_gid[r] = gi; break; } + } + } + #undef KEY_READ + + memset(grp_cnt, 0, (size_t)n_groups * sizeof(int64_t)); + for (int64_t r = 0; r < nrows; r++) + if (row_gid[r] >= 0) grp_cnt[row_gid[r]]++; + + int64_t total = 0; + for (int64_t gi = 0; gi < n_groups; gi++) total += grp_cnt[gi]; + ray_t* idx_hdr = ray_alloc((size_t)total * sizeof(int64_t)); + if (!idx_hdr) { + ray_free(gk_hdr); ray_free(rg_hdr); ray_free(cnt_hdr); + ray_free(off_hdr); ray_free(pos_hdr); + ray_release(result); ray_release(tbl); + return ray_error("oom", NULL); + } + int64_t* idx_buf = (int64_t*)ray_data(idx_hdr); + + offsets[0] = 0; + for (int64_t gi = 1; gi < n_groups; gi++) + offsets[gi] = offsets[gi - 1] + grp_cnt[gi - 1]; + + memcpy(pos, offsets, (size_t)n_groups * sizeof(int64_t)); + for (int64_t r = 0; r < nrows; r++) { + int64_t gi = row_gid[r]; + if (gi >= 0) idx_buf[pos[gi]++] = r; + } + + ray_t* scatter_err = NULL; + for (uint8_t ni = 0; ni < n_nonaggs && !scatter_err; ni++) { + /* Streaming-style fast path for `(aggr_fn col_or_expr)` + * where aggr_fn is RAY_FN_AGGR + RAY_UNARY (sum/avg/..., + * med/dev/var/stddev/...). Bypasses the full-table eval + * + non-row-aligned fallback by slicing the source per + * group and calling the unary fn directly into a typed + * vec. Equivalent perf-class to the streaming AGG path + * the eval-fallback uses for the same shapes. */ + if (is_aggr_unary_call(nonagg_exprs[ni])) { + ray_t* col = aggr_unary_per_group_buf( + nonagg_exprs[ni], tbl, + idx_buf, offsets, grp_cnt, n_groups); + if (RAY_IS_ERR(col)) { scatter_err = col; break; } + result = ray_table_add_col(result, nonagg_names[ni], col); + ray_release(col); + if (RAY_IS_ERR(result)) { + scatter_err = result; result = NULL; break; + } + continue; + } + + if (ray_env_push_scope() != RAY_OK) { + scatter_err = ray_error("oom", NULL); break; + } + expr_bind_table_names(nonagg_exprs[ni], tbl); + ray_t* full_val = ray_eval(nonagg_exprs[ni]); + ray_env_pop_scope(); + if (!full_val || RAY_IS_ERR(full_val)) { + scatter_err = full_val ? full_val : ray_error("domain", NULL); + break; + } + + ray_t* list_col = ray_alloc(n_groups * sizeof(ray_t*)); + if (!list_col) { + ray_release(full_val); + scatter_err = ray_error("oom", NULL); break; + } + list_col->type = RAY_LIST; + /* Track filled length incrementally: ray_release of + * a RAY_LIST walks exactly v->len children, so + * keeping len in sync with the number of initialized + * slots lets error paths free without touching + * uninitialized memory — and avoids a memset. */ + list_col->len = 0; + ray_t** list_out = (ray_t**)ray_data(list_col); + + /* Decide per-group disposition of full_val: + * - expression references a column → result must + * be row-aligned; otherwise that's a bug and + * we error out rather than silently broadcast. + * - constant expression (no column refs) → + * broadcast the value into every group cell. */ + int refs_column = expr_refs_row_column(nonagg_exprs[ni], tbl); + int is_indexable = + ray_is_vec(full_val) || full_val->type == RAY_LIST; + int full_is_row_aligned = + is_indexable && full_val->len == nrows; + + if (refs_column && !full_is_row_aligned) { + /* Non-streaming fallback: the expression didn't + * produce a row-aligned full-table result (e.g. a + * user lambda collapsed a vector to a scalar), so + * collect per-group and post-apply. Cells can be + * any shape; homogeneous-scalar cells collapse to + * a typed vec. */ + ray_release(full_val); + ray_release(list_col); /* len=0, walks nothing */ + ray_t* per_group = nonagg_eval_per_group_buf( + nonagg_exprs[ni], tbl, idx_buf, offsets, grp_cnt, n_groups); + if (RAY_IS_ERR(per_group)) { + scatter_err = per_group; break; + } + /* core produces typed vec or list as appropriate */ + result = ray_table_add_col(result, nonagg_names[ni], per_group); + ray_release(per_group); + if (RAY_IS_ERR(result)) { + scatter_err = result; result = NULL; break; + } + continue; + } + + int gather_ok = 1; + for (int64_t gi = 0; gi < n_groups; gi++) { + ray_t* cell; + if (full_is_row_aligned) { + cell = gather_by_idx(full_val, + &idx_buf[offsets[gi]], grp_cnt[gi]); + if (!cell || RAY_IS_ERR(cell)) { + gather_ok = 0; + break; + } + } else { + /* Constant (no column refs): broadcast */ + ray_retain(full_val); + cell = full_val; + } + list_out[gi] = cell; + list_col->len = gi + 1; /* commit slot */ + } + ray_release(full_val); + + if (!gather_ok) { + ray_release(list_col); /* releases exactly len filled slots */ + scatter_err = ray_error("oom", NULL); break; + } + + result = ray_table_add_col(result, nonagg_names[ni], list_col); + ray_release(list_col); + if (RAY_IS_ERR(result)) { + scatter_err = result; result = NULL; break; + } + } + + ray_free(gk_hdr); ray_free(rg_hdr); ray_free(cnt_hdr); + ray_free(off_hdr); ray_free(pos_hdr); ray_free(idx_hdr); + + if (scatter_err) { + if (result) ray_release(result); + ray_release(tbl); + return scatter_err; + } + } else { + /* Empty group set: add empty LIST columns so the + * output schema still includes the user-declared + * non-agg columns. */ + for (uint8_t ni = 0; ni < n_nonaggs; ni++) { + ray_t* empty_list = ray_list_new(0); + if (!empty_list || RAY_IS_ERR(empty_list)) { + ray_release(result); ray_release(tbl); + return empty_list ? empty_list : ray_error("oom", NULL); + } + result = ray_table_add_col(result, nonagg_names[ni], empty_list); + ray_release(empty_list); + if (RAY_IS_ERR(result)) { ray_release(tbl); return result; } + } + } + } + } + + ray_release(tbl); + + /* Post-process: apply sort/take for group-by queries. Runs + * last so non-agg LIST columns are already in the result, + * allowing sort clauses to reference non-agg output columns. */ + if (by_expr && (has_sort || take_expr)) + result = apply_sort_take(result, dict_elems, dict_n, asc_id, desc_id, take_id); + + if (by_sym_vec_owned) ray_release(by_sym_vec_owned); + + return result; +} + +/* (xbar col bucket) — time/value bucketing: floor(col/bucket)*bucket */ +ray_t* ray_xbar_fn(ray_t* col, ray_t* bucket) { + /* Recursive unwrap for nested collections (list of vectors) */ + if (is_collection(col) || is_collection(bucket)) + return atomic_map_binary(ray_xbar_fn, col, bucket); + /* Both are integer types (i64, i32, i16) → integer xbar */ + if (is_numeric(col) && is_numeric(bucket) && !is_float_op(col, bucket)) { + int64_t a = as_i64(col), b = as_i64(bucket); + if (b == 0 || RAY_ATOM_IS_NULL(col) || RAY_ATOM_IS_NULL(bucket)) + return ray_error("domain", NULL); + int64_t q = a / b; + if ((a ^ b) < 0 && q * b != a) q--; + int64_t result = q * b; + /* Result type follows the wider of the two operands */ + if (col->type == -RAY_I32 && bucket->type == -RAY_I32) return make_i32((int32_t)result); + if (col->type == -RAY_I16 && bucket->type == -RAY_I16) return make_i16((int16_t)result); + return make_i64(result); + } + /* Float path: either operand is f64 */ + if (is_numeric(col) && is_numeric(bucket)) { + if (RAY_ATOM_IS_NULL(col) || RAY_ATOM_IS_NULL(bucket)) + return ray_error("domain", NULL); + double c = as_f64(col), b = as_f64(bucket); + if (b == 0.0) return ray_error("domain", NULL); + double fq = floor(c / b); + return make_f64(fq * b); + } + /* Temporal xbar: col is temporal, bucket is integer or temporal (not float) */ + if (is_temporal(col) && (is_temporal(bucket) || + (is_numeric(bucket) && bucket->type != -RAY_F64))) { + int64_t a = col->i64, b; + if (is_temporal(bucket)) { + b = bucket->i64; + /* Cross-temporal conversion: TIME(ms) bucket on TIMESTAMP(ns) col */ + if (col->type == -RAY_TIMESTAMP && bucket->type == -RAY_TIME) + b *= 1000000LL; + } else { + b = as_i64(bucket); + } + if (b == 0 || RAY_ATOM_IS_NULL(bucket)) return ray_error("domain", NULL); + int64_t q = a / b; + if ((a ^ b) < 0 && q * b != a) q--; + int64_t result = q * b; + if (col->type == -RAY_TIME) return ray_time(result); + if (col->type == -RAY_DATE) return ray_date(result); + return ray_timestamp(result); + } + return ray_error("type", NULL); +} + +/* ══════════════════════════════════════════ + * Update, Insert, Upsert + * ══════════════════════════════════════════ */ + +/* Helper: convert a Rayfall list of atoms into a typed column vector by + * appending to an existing column (for insert/upsert). */ +static ray_t* append_atom_to_col(ray_t* col_vec, ray_t* atom) { + if (RAY_ATOM_IS_NULL(atom)) { + int64_t idx = col_vec->len; + uint8_t zero[16] = {0}; + col_vec = ray_vec_append(col_vec, zero); + if (!RAY_IS_ERR(col_vec)) + ray_vec_set_null(col_vec, idx, true); + return col_vec; + } + int8_t ct = col_vec->type; + if (ct == RAY_I64) { + if (atom->type != -RAY_I64) + return ray_error("type", NULL); + int64_t v = atom->i64; + return ray_vec_append(col_vec, &v); + } else if (ct == RAY_SYM) { + if (atom->type != -RAY_SYM) + return ray_error("type", NULL); + int64_t v = atom->i64; + return ray_vec_append(col_vec, &v); + } else if (ct == RAY_F64) { + if (atom->type != -RAY_F64 && atom->type != -RAY_I64) + return ray_error("type", NULL); + double v = (atom->type == -RAY_F64) ? atom->f64 : (double)atom->i64; + return ray_vec_append(col_vec, &v); + } else if (ct == RAY_BOOL) { + if (atom->type != -RAY_BOOL) + return ray_error("type", NULL); + uint8_t v = atom->b8; + return ray_vec_append(col_vec, &v); + } else if (ct == RAY_STR && atom->type == -RAY_STR) { + const char *sptr = ray_str_ptr(atom); + size_t slen = ray_str_len(atom); + return ray_str_vec_append(col_vec, sptr, slen); + } + return ray_error("type", NULL); +} + +/* (update {col: expr ... from: t [where: pred]}) + * Special form — receives unevaluated dict arg. + * For rows matching where (or all if no where), evaluate column expressions + * and replace those column values. Returns a new table. */ +/* Forward declarations */ + +ray_t* ray_update_fn(ray_t** args, int64_t n) { + if (n < 1) return ray_error("domain", NULL); + ray_t* dict = args[0]; + if (!dict || dict->type != RAY_DICT) + return ray_error("type", NULL); + + ray_t* from_expr = dict_get(dict, "from"); + if (!from_expr) return ray_error("domain", NULL); + /* Detect in-place update: from: 't means quoted symbol */ + int64_t inplace_sym = -1; + ray_t* tbl = ray_eval(from_expr); + if (RAY_IS_ERR(tbl)) return tbl; + if (tbl->type == -RAY_SYM) { + /* from: 't — resolve symbol to table variable */ + inplace_sym = tbl->i64; + ray_release(tbl); + tbl = ray_env_get(inplace_sym); + if (!tbl || RAY_IS_ERR(tbl)) return ray_error("domain", NULL); + ray_retain(tbl); + } + if (tbl->type != RAY_TABLE) { ray_release(tbl); return ray_error("type", NULL); } + + ray_t* where_expr = dict_get(dict, "where"); + ray_t* by_expr = dict_get(dict, "by"); + + /* UPDATE WITH BY: group, compute aggregate, broadcast back */ + if (by_expr && !where_expr) { + DICT_VIEW_DECL(updv); + DICT_VIEW_OPEN(dict, updv); + if (DICT_VIEW_OVERFLOW(updv)) { + ray_release(tbl); + return ray_error("domain", "update clause has too many keys"); + } + int64_t dict_n = updv_n; + ray_t** dict_elems = updv; + int64_t from_id = ray_sym_intern("from", 4); + int64_t where_id = ray_sym_intern("where", 5); + int64_t by_id = ray_sym_intern("by", 2); + + /* Resolve group key column name. + * by_expr is a name reference (not evaluated) — extract sym_id directly */ + int64_t by_col_name = -1; + if (by_expr->type == -RAY_SYM) { + by_col_name = by_expr->i64; + } + if (by_col_name < 0) { ray_release(tbl); return ray_error("type", NULL); } + + /* Find group column in table */ + ray_t* grp_col = ray_table_get_col(tbl, by_col_name); + if (!grp_col) { ray_release(tbl); return ray_error("domain", NULL); } + int64_t nrows2 = ray_table_nrows(tbl); + + /* Use ray_group_fn to get group indices: {key: [indices]}. + * Flatten the resulting RAY_DICT into the legacy interleaved + * [k0,v0,…] LIST shape this branch was written against. */ + ray_t* groups = NULL; + { + ray_t* gd = ray_group_fn(grp_col); + if (!gd || RAY_IS_ERR(gd)) { ray_release(tbl); return gd ? gd : ray_error("oom", NULL); } + groups = groups_to_pair_list(gd); + ray_release(gd); + if (RAY_IS_ERR(groups)) { ray_release(tbl); return groups; } + } + + /* Start with a copy of the original table */ + int64_t ncols = ray_table_ncols(tbl); + ray_t* result = ray_table_new((int32_t)ncols); + if (RAY_IS_ERR(result)) { ray_release(groups); ray_release(tbl); return result; } + for (int64_t c = 0; c < ncols; c++) { + int64_t cn = ray_table_col_name(tbl, c); + ray_t* col = ray_table_get_col_idx(tbl, c); + ray_retain(col); + result = ray_table_add_col(result, cn, col); + ray_release(col); + if (RAY_IS_ERR(result)) { ray_release(groups); ray_release(tbl); return result; } + } + + /* For each aggregate expression, compute per group and broadcast */ + for (int64_t d = 0; d + 1 < dict_n; d += 2) { + int64_t kid = dict_elems[d]->i64; + if (kid == from_id || kid == where_id || kid == by_id) continue; + ray_t* agg_expr = dict_elems[d + 1]; + + /* Evaluate the aggregate for each group and broadcast */ + ray_t* grp_items = (ray_t**)ray_data(groups) ? groups : NULL; + if (!grp_items) { ray_release(result); ray_release(groups); ray_release(tbl); return ray_error("oom", NULL); } + int64_t ngroups = groups->len / 2; + ray_t** gdata = (ray_t**)ray_data(groups); + + /* We need to evaluate the aggregate per group. + * Build the result column by evaluating the expression on each group's subset. */ + ray_t* out_col = ray_vec_new(RAY_I64, nrows2); /* will be resized to correct type */ + if (RAY_IS_ERR(out_col)) { ray_release(result); ray_release(groups); ray_release(tbl); return out_col; } + + int8_t out_type = RAY_I64; + int first_group = 1; + + for (int64_t gi = 0; gi < ngroups; gi++) { + ray_t* idx_vec = gdata[gi * 2 + 1]; /* index vector for this group */ + int64_t gsize = ray_len(idx_vec); + + /* Build a sub-table for this group */ + ray_t* sub_tbl = ray_table_new((int32_t)ncols); + if (RAY_IS_ERR(sub_tbl)) { ray_release(out_col); ray_release(result); ray_release(groups); ray_release(tbl); return sub_tbl; } + for (int64_t c = 0; c < ncols; c++) { + int64_t cn = ray_table_col_name(tbl, c); + ray_t* full_col = ray_table_get_col_idx(tbl, c); + int8_t ct = full_col->type; + ray_t* sub_col = ray_vec_new(ct, gsize); + if (RAY_IS_ERR(sub_col)) { ray_release(sub_tbl); ray_release(out_col); ray_release(result); ray_release(groups); ray_release(tbl); return sub_col; } + sub_col->len = gsize; + int esz = ray_elem_size(ct); + char* src = (char*)ray_data(full_col); + char* dst = (char*)ray_data(sub_col); + int64_t* idxs = (int64_t*)ray_data(idx_vec); + for (int64_t r = 0; r < gsize; r++) + memcpy(dst + r * esz, src + idxs[r] * esz, esz); + sub_tbl = ray_table_add_col(sub_tbl, cn, sub_col); + ray_release(sub_col); + if (RAY_IS_ERR(sub_tbl)) { ray_release(out_col); ray_release(result); ray_release(groups); ray_release(tbl); return sub_tbl; } + } + + /* Evaluate expression on sub-table via DAG */ + ray_graph_t* ug = ray_graph_new(sub_tbl); + ray_op_t* expr_op = compile_expr_dag(ug, agg_expr); + if (!expr_op) { ray_graph_free(ug); ray_release(sub_tbl); ray_release(out_col); ray_release(result); ray_release(groups); ray_release(tbl); return ray_error("domain", NULL); } + expr_op = ray_optimize(ug, expr_op); + ray_t* agg_result = ray_execute(ug, expr_op); + ray_graph_free(ug); + ray_release(sub_tbl); + + if (RAY_IS_ERR(agg_result)) { ray_release(out_col); ray_release(result); ray_release(groups); ray_release(tbl); return agg_result; } + + /* Determine output type from first group */ + if (first_group) { + if (ray_is_atom(agg_result)) out_type = -agg_result->type; + else if (ray_is_vec(agg_result)) out_type = agg_result->type; + ray_release(out_col); + out_col = ray_vec_new(out_type, nrows2); + if (RAY_IS_ERR(out_col)) { ray_release(agg_result); ray_release(result); ray_release(groups); ray_release(tbl); return out_col; } + out_col->len = nrows2; + first_group = 0; + } + + /* Broadcast aggregate value to all rows in this group */ + int64_t* idxs = (int64_t*)ray_data(idx_vec); + if (ray_is_atom(agg_result)) { + for (int64_t r = 0; r < gsize; r++) + store_typed_elem(out_col, idxs[r], agg_result); + } + ray_release(agg_result); + } + + /* Add the new column to the result table */ + result = ray_table_add_col(result, kid, out_col); + ray_release(out_col); + if (RAY_IS_ERR(result)) { ray_release(groups); ray_release(tbl); return result; } + } + + ray_release(groups); + /* Store in-place if needed */ + if (inplace_sym >= 0) { + ray_env_set(inplace_sym, result); + } + ray_release(tbl); + return result; + } + + /* Evaluate WHERE using the DAG to get a boolean mask */ + int64_t nrows = ray_table_nrows(tbl); + uint8_t* mask = NULL; + + if (where_expr) { + /* Try DAG compilation first, fall back to eval-level */ + ray_t* mask_vec = NULL; + ray_graph_t* g = ray_graph_new(tbl); + if (g) { + ray_op_t* pred = compile_expr_dag(g, where_expr); + if (pred) { + pred = ray_optimize(g, pred); + mask_vec = ray_execute(g, pred); + } + ray_graph_free(g); + } + /* Fallback: eval-level predicate evaluation */ + if (!mask_vec || RAY_IS_ERR(mask_vec)) { + /* Bind column names to column vectors in env, then eval */ + int64_t ncols2 = ray_table_ncols(tbl); + ray_env_push_scope(); + for (int64_t c = 0; c < ncols2; c++) { + int64_t cn = ray_table_col_name(tbl, c); + ray_t* col = ray_table_get_col_idx(tbl, c); + ray_env_set(cn, col); + } + mask_vec = ray_eval(where_expr); + ray_env_pop_scope(); + } + if (!mask_vec || RAY_IS_ERR(mask_vec)) { ray_release(tbl); return mask_vec ? mask_vec : ray_error("type", NULL); } + if (mask_vec->type != RAY_BOOL || mask_vec->len != nrows) { + ray_release(mask_vec); + ray_release(tbl); + return ray_error("type", NULL); + } + mask = (uint8_t*)ray_data(mask_vec); + /* Keep mask_vec alive until we're done */ + + /* Build a new table with updated columns */ + int64_t ncols = ray_table_ncols(tbl); + DICT_VIEW_DECL(updw); + DICT_VIEW_OPEN(dict, updw); + if (DICT_VIEW_OVERFLOW(updw)) { + ray_release(mask_vec); ray_release(tbl); + return ray_error("domain", "update clause has too many keys"); + } + int64_t dict_n = updw_n; + ray_t** dict_elems = updw; + int64_t from_id = ray_sym_intern("from", 4); + int64_t where_id = ray_sym_intern("where", 5); + + ray_t* result = ray_table_new(ncols); + if (RAY_IS_ERR(result)) { ray_release(mask_vec); ray_release(tbl); return result; } + + for (int64_t c = 0; c < ncols; c++) { + int64_t col_name = ray_table_col_name(tbl, c); + ray_t* orig_col = ray_table_get_col_idx(tbl, c); + + /* Check if this column has an update expression */ + ray_t* update_expr = NULL; + for (int64_t d = 0; d + 1 < dict_n; d += 2) { + int64_t kid = dict_elems[d]->i64; + if (kid == from_id || kid == where_id) continue; + if (kid == col_name) { update_expr = dict_elems[d + 1]; break; } + } + + if (!update_expr) { + /* No update for this column — copy as-is */ + ray_retain(orig_col); + result = ray_table_add_col(result, col_name, orig_col); + ray_release(orig_col); + } else { + /* Evaluate the expression for each row and apply to matching rows */ + int8_t ct = orig_col->type; + ray_t* new_col = ray_vec_new(ct, nrows); + if (RAY_IS_ERR(new_col)) { ray_release(result); ray_release(mask_vec); ray_release(tbl); return new_col; } + + /* Evaluate expression via DAG, fallback to eval-level */ + ray_t* expr_vec = NULL; + { + ray_graph_t* ug = ray_graph_new(tbl); + if (ug) { + ray_op_t* expr_op = compile_expr_dag(ug, update_expr); + if (expr_op) { + expr_op = ray_optimize(ug, expr_op); + expr_vec = ray_execute(ug, expr_op); + } + ray_graph_free(ug); + } + } + if (!expr_vec || RAY_IS_ERR(expr_vec)) { + /* Fallback: eval with column bindings */ + int64_t ncols_e = ray_table_ncols(tbl); + ray_env_push_scope(); + for (int64_t c2 = 0; c2 < ncols_e; c2++) { + int64_t cn = ray_table_col_name(tbl, c2); + ray_t* col2 = ray_table_get_col_idx(tbl, c2); + ray_env_set(cn, col2); + } + expr_vec = ray_eval(update_expr); + ray_env_pop_scope(); + } + if (!expr_vec || RAY_IS_ERR(expr_vec)) { ray_release(new_col); ray_release(result); ray_release(mask_vec); ray_release(tbl); return expr_vec ? expr_vec : ray_error("type", NULL); } + + /* WHERE update: expression result replaces ONLY masked rows. + * When type differs (e.g., I64 col, F64 expr from (* col 1.1)), + * keep original column type and cast expr results. + * Only numeric promotions are allowed — STR↔numeric is a type error. */ + int8_t expr_type = (expr_vec->type < 0) ? -expr_vec->type : expr_vec->type; + if (expr_type != ct && expr_type > 0 && ray_is_vec(expr_vec)) { + /* Only allow numeric promotions (I64↔F64, I32↔F64) */ + int is_numeric_promo = (ct == RAY_I64 || ct == RAY_I32 || ct == RAY_F64) && + (expr_type == RAY_I64 || expr_type == RAY_I32 || expr_type == RAY_F64); + if (!is_numeric_promo) { + ray_release(expr_vec); ray_release(new_col); ray_release(result); ray_release(mask_vec); ray_release(tbl); + return ray_error("type", NULL); + } + /* Copy original column values first */ + int esz = ray_elem_size(ct); + memcpy(ray_data(new_col), ray_data(orig_col), (size_t)(nrows * esz)); + new_col->len = nrows; + /* Overlay masked rows with type conversion */ + for (int64_t r = 0; r < nrows; r++) { + if (!mask[r]) continue; + if (ct == RAY_I64 && expr_type == RAY_F64) + ((int64_t*)ray_data(new_col))[r] = (int64_t)((double*)ray_data(expr_vec))[r]; + else if (ct == RAY_I32 && expr_type == RAY_F64) + ((int32_t*)ray_data(new_col))[r] = (int32_t)((double*)ray_data(expr_vec))[r]; + else if (ct == RAY_F64 && expr_type == RAY_I64) + ((double*)ray_data(new_col))[r] = (double)((int64_t*)ray_data(expr_vec))[r]; + } + /* Null-bit propagation: memcpy above only copies values, + * not the nullmap. Carry over orig_col's nulls for the + * untouched rows, and pull expr_vec's nulls in for the + * masked rows. Without this, casting a null F64 expr + * back to an I64 column silently produces 0. */ + for (int64_t r = 0; r < nrows; r++) { + ray_t* src = mask[r] ? expr_vec : orig_col; + if (ray_vec_is_null(src, r)) + ray_vec_set_null(new_col, r, true); + } + ray_release(expr_vec); + result = ray_table_add_col(result, col_name, new_col); + ray_release(new_col); + if (RAY_IS_ERR(result)) { ray_release(mask_vec); ray_release(tbl); return result; } + continue; + } + + /* Broadcast scalar atom to full column vector if needed */ + if (expr_vec->type < 0) { + /* Type check atom against column type BEFORE broadcast */ + int ok = (expr_vec->type == -ct); + if (!ok && ct == RAY_F64 && expr_vec->type == -RAY_I64) ok = 1; + if (!ok && ct == RAY_LIST && expr_vec->type == -RAY_SYM) ok = 1; + if (!ok && ct == RAY_SYM && expr_vec->type == -RAY_SYM) ok = 1; + if (!ok) { + ray_release(expr_vec); ray_release(new_col); ray_release(result); ray_release(mask_vec); ray_release(tbl); + return ray_error("type", NULL); + } + /* SYM atom to LIST column: build boxed list, merge with mask */ + if (ct == RAY_LIST && expr_vec->type == -RAY_SYM) { + ray_free(new_col); + ray_t* new_list = ray_list_new((int32_t)nrows); + if (RAY_IS_ERR(new_list)) { ray_release(expr_vec); ray_release(result); ray_release(mask_vec); ray_release(tbl); return new_list; } + ray_t** orig_elems = (ray_t**)ray_data(orig_col); + for (int64_t r = 0; r < nrows; r++) { + ray_t* elem = mask[r] ? expr_vec : orig_elems[r]; + ray_retain(elem); + new_list = ray_list_append(new_list, elem); + ray_release(elem); + if (RAY_IS_ERR(new_list)) { ray_release(expr_vec); ray_release(result); ray_release(mask_vec); ray_release(tbl); return new_list; } + } + ray_release(expr_vec); + result = ray_table_add_col(result, col_name, new_list); + ray_release(new_list); + if (RAY_IS_ERR(result)) { ray_release(mask_vec); ray_release(tbl); return result; } + continue; + } + ray_t* bcast = ray_vec_new(ct, nrows); + if (RAY_IS_ERR(bcast)) { ray_release(expr_vec); ray_release(new_col); ray_release(result); ray_release(mask_vec); ray_release(tbl); return bcast; } + if (ct == RAY_STR && expr_vec->type == -RAY_STR) { + const char* sp = ray_str_ptr(expr_vec); + size_t sl = ray_str_len(expr_vec); + for (int64_t r = 0; r < nrows; r++) { + bcast = ray_str_vec_append(bcast, sp, sl); + if (RAY_IS_ERR(bcast)) { ray_release(expr_vec); ray_release(new_col); ray_release(result); ray_release(mask_vec); ray_release(tbl); return bcast; } + } + } else { + size_t esz = (ct == RAY_BOOL) ? 1 : 8; + uint8_t elem[8] = {0}; + if (ct == RAY_F64 && expr_vec->type == -RAY_I64) { + double promoted = (double)expr_vec->i64; + memcpy(elem, &promoted, 8); + } else { + memcpy(elem, &expr_vec->i64, esz); + } + for (int64_t r = 0; r < nrows; r++) { + bcast = ray_vec_append(bcast, elem); + if (RAY_IS_ERR(bcast)) { ray_release(expr_vec); ray_release(new_col); ray_release(result); ray_release(mask_vec); ray_release(tbl); return bcast; } + } + } + /* Preserve typed-null markers across broadcast. Without + * this, (update {a: 0N from: t}) silently writes plain + * zeros into the I64 column — the value bits get copied + * but the null bitmap doesn't, so (nil? a) reports false + * on what should be null cells. */ + if (RAY_ATOM_IS_NULL(expr_vec)) { + for (int64_t r = 0; r < nrows; r++) + ray_vec_set_null(bcast, r, true); + } + ray_release(expr_vec); + expr_vec = bcast; + } + + /* Promote I64 vector to F64 if column is F64 */ + if (expr_vec->type == RAY_I64 && ct == RAY_F64) { + int64_t nr = ray_len(expr_vec); + ray_t* promoted = ray_vec_new(RAY_F64, nr); + if (RAY_IS_ERR(promoted)) { ray_release(expr_vec); ray_release(new_col); ray_release(result); ray_release(mask_vec); ray_release(tbl); return promoted; } + int64_t* src_data = (int64_t*)ray_data(expr_vec); + for (int64_t r = 0; r < nr; r++) { + double v = (double)src_data[r]; + promoted = ray_vec_append(promoted, &v); + if (RAY_IS_ERR(promoted)) { ray_release(expr_vec); ray_release(new_col); ray_release(result); ray_release(mask_vec); ray_release(tbl); return promoted; } + } + /* Carry the nullmap across the I64→F64 promotion. */ + for (int64_t r = 0; r < nr; r++) + if (ray_vec_is_null(expr_vec, r)) + ray_vec_set_null(promoted, r, true); + ray_release(expr_vec); + expr_vec = promoted; + } + + /* Type check: expr_vec must match original column type */ + if (expr_vec->type != ct) { + ray_release(expr_vec); ray_release(new_col); ray_release(result); ray_release(mask_vec); ray_release(tbl); + return ray_error("type", NULL); + } + + /* Merge: use expr_vec for matching rows, orig_col for non-matching. + * Null-bit propagation applies to STR/SYM as well — a null in + * either the orig column (unmasked rows) or the expr (masked + * rows) must travel into new_col's nullmap. */ + if (ct == RAY_STR) { + for (int64_t r = 0; r < nrows; r++) { + ray_t* src_vec = mask[r] ? expr_vec : orig_col; + size_t slen = 0; + const char* sp = ray_str_vec_get(src_vec, r, &slen); + new_col = ray_str_vec_append(new_col, sp ? sp : "", sp ? slen : 0); + if (RAY_IS_ERR(new_col)) { ray_release(expr_vec); ray_release(result); ray_release(mask_vec); ray_release(tbl); return new_col; } + if (ray_vec_is_null(src_vec, r)) + ray_vec_set_null(new_col, new_col->len - 1, true); + } + } else if (ct == RAY_SYM) { + for (int64_t r = 0; r < nrows; r++) { + ray_t* src_vec = mask[r] ? expr_vec : orig_col; + int64_t sym_val = ray_read_sym(ray_data(src_vec), r, src_vec->type, src_vec->attrs); + new_col = ray_vec_append(new_col, &sym_val); + if (RAY_IS_ERR(new_col)) { ray_release(expr_vec); ray_release(result); ray_release(mask_vec); ray_release(tbl); return new_col; } + if (ray_vec_is_null(src_vec, r)) + ray_vec_set_null(new_col, new_col->len - 1, true); + } + } else { + size_t elem_sz = (ct == RAY_BOOL) ? 1 : 8; + uint8_t* orig_data = (uint8_t*)ray_data(orig_col); + uint8_t* expr_data = (uint8_t*)ray_data(expr_vec); + for (int64_t r = 0; r < nrows; r++) { + ray_t* src_vec = mask[r] ? expr_vec : orig_col; + uint8_t* base = mask[r] ? expr_data : orig_data; + new_col = ray_vec_append(new_col, base + r * elem_sz); + if (RAY_IS_ERR(new_col)) { ray_release(expr_vec); ray_release(result); ray_release(mask_vec); ray_release(tbl); return new_col; } + /* Propagate null bit from whichever side supplied + * the value. Without this, masking in a typed-null + * broadcast would copy zero bytes into the slot but + * leave the destination's nullmap clear → silent + * loss of null marker. */ + if (ray_vec_is_null(src_vec, r)) + ray_vec_set_null(new_col, new_col->len - 1, true); + } + } + result = ray_table_add_col(result, col_name, new_col); + ray_release(new_col); + ray_release(expr_vec); + } + if (RAY_IS_ERR(result)) { ray_release(mask_vec); ray_release(tbl); return result; } + } + + ray_release(mask_vec); + if (inplace_sym >= 0 && result && !RAY_IS_ERR(result)) { + ray_env_set(inplace_sym, result); + } + ray_release(tbl); + return result; + } + + /* No WHERE — update all rows */ + int64_t ncols = ray_table_ncols(tbl); + DICT_VIEW_DECL(upda); + DICT_VIEW_OPEN(dict, upda); + if (DICT_VIEW_OVERFLOW(upda)) { + ray_release(tbl); + return ray_error("domain", "update clause has too many keys"); + } + int64_t dict_n = upda_n; + ray_t** dict_elems = upda; + int64_t from_id = ray_sym_intern("from", 4); + + ray_t* result = ray_table_new(ncols); + if (RAY_IS_ERR(result)) { ray_release(tbl); return result; } + + for (int64_t c = 0; c < ncols; c++) { + int64_t col_name = ray_table_col_name(tbl, c); + ray_t* orig_col = ray_table_get_col_idx(tbl, c); + + ray_t* update_expr = NULL; + for (int64_t d = 0; d + 1 < dict_n; d += 2) { + int64_t kid = dict_elems[d]->i64; + if (kid == from_id) continue; + if (kid == col_name) { update_expr = dict_elems[d + 1]; break; } + } + + if (!update_expr) { + ray_retain(orig_col); + result = ray_table_add_col(result, col_name, orig_col); + ray_release(orig_col); + } else { + ray_t* expr_vec = NULL; + { + ray_graph_t* ug = ray_graph_new(tbl); + if (ug) { + ray_op_t* expr_op = compile_expr_dag(ug, update_expr); + if (expr_op) { + expr_op = ray_optimize(ug, expr_op); + expr_vec = ray_execute(ug, expr_op); + } + ray_graph_free(ug); + } + } + if (!expr_vec || RAY_IS_ERR(expr_vec)) { + /* Fallback: eval with column bindings */ + int64_t ncols_f = ray_table_ncols(tbl); + ray_env_push_scope(); + for (int64_t cf = 0; cf < ncols_f; cf++) { + int64_t cn = ray_table_col_name(tbl, cf); + ray_t* colf = ray_table_get_col_idx(tbl, cf); + ray_env_set(cn, colf); + } + expr_vec = ray_eval(update_expr); + ray_env_pop_scope(); + } + if (!expr_vec || RAY_IS_ERR(expr_vec)) { ray_release(result); ray_release(tbl); return expr_vec ? expr_vec : ray_error("type", NULL); } + + /* Broadcast scalar atom to full column vector if needed */ + if (expr_vec->type < 0) { + int64_t nrows = ray_table_nrows(tbl); + int8_t ct = orig_col->type; + /* Type check atom against column type BEFORE broadcast */ + int ok = (expr_vec->type == -ct); + if (!ok && ct == RAY_F64 && expr_vec->type == -RAY_I64) ok = 1; + /* SYM atom → LIST column (LIST of SYM atoms) */ + if (!ok && ct == RAY_LIST && expr_vec->type == -RAY_SYM) ok = 1; + if (!ok) { + ray_release(expr_vec); ray_release(result); ray_release(tbl); + return ray_error("type", NULL); + } + /* SYM atom to LIST column: broadcast as boxed list */ + if (ct == RAY_LIST && expr_vec->type == -RAY_SYM) { + ray_t* bcast = ray_list_new((int32_t)nrows); + if (RAY_IS_ERR(bcast)) { ray_release(expr_vec); ray_release(result); ray_release(tbl); return bcast; } + for (int64_t r = 0; r < nrows; r++) { + ray_retain(expr_vec); + bcast = ray_list_append(bcast, expr_vec); + ray_release(expr_vec); + if (RAY_IS_ERR(bcast)) { ray_release(expr_vec); ray_release(result); ray_release(tbl); return bcast; } + } + ray_release(expr_vec); + expr_vec = bcast; + goto no_where_add_col; + } + ray_t* bcast = ray_vec_new(ct, nrows); + if (RAY_IS_ERR(bcast)) { ray_release(expr_vec); ray_release(result); ray_release(tbl); return bcast; } + if (ct == RAY_STR && expr_vec->type == -RAY_STR) { + const char* sp = ray_str_ptr(expr_vec); + size_t sl = ray_str_len(expr_vec); + for (int64_t r = 0; r < nrows; r++) { + bcast = ray_str_vec_append(bcast, sp, sl); + if (RAY_IS_ERR(bcast)) { ray_release(expr_vec); ray_release(result); ray_release(tbl); return bcast; } + } + } else { + size_t esz = (ct == RAY_BOOL) ? 1 : 8; + uint8_t elem[8] = {0}; + if (ct == RAY_F64 && expr_vec->type == -RAY_I64) { + double promoted = (double)expr_vec->i64; + memcpy(elem, &promoted, 8); + } else { + memcpy(elem, &expr_vec->i64, esz); + } + for (int64_t r = 0; r < nrows; r++) { + bcast = ray_vec_append(bcast, elem); + if (RAY_IS_ERR(bcast)) { ray_release(expr_vec); ray_release(result); ray_release(tbl); return bcast; } + } + } + /* Preserve typed-null markers across broadcast (mirrors the + * WHERE branch fix at the analogous site above). */ + if (RAY_ATOM_IS_NULL(expr_vec)) { + for (int64_t r = 0; r < nrows; r++) + ray_vec_set_null(bcast, r, true); + } + ray_release(expr_vec); + expr_vec = bcast; + } + + /* Promote I64 vector to F64 if column is F64 */ + if (expr_vec->type == RAY_I64 && orig_col->type == RAY_F64) { + int64_t nr = ray_len(expr_vec); + ray_t* promoted = ray_vec_new(RAY_F64, nr); + if (RAY_IS_ERR(promoted)) { ray_release(expr_vec); ray_release(result); ray_release(tbl); return promoted; } + int64_t* src_data = (int64_t*)ray_data(expr_vec); + for (int64_t r = 0; r < nr; r++) { + double v = (double)src_data[r]; + promoted = ray_vec_append(promoted, &v); + if (RAY_IS_ERR(promoted)) { ray_release(expr_vec); ray_release(result); ray_release(tbl); return promoted; } + } + /* Carry the nullmap across the I64→F64 promotion. */ + for (int64_t r = 0; r < nr; r++) + if (ray_vec_is_null(expr_vec, r)) + ray_vec_set_null(promoted, r, true); + ray_release(expr_vec); + expr_vec = promoted; + } + + /* No-WHERE update: allow type change for same-category types. + * Atoms (type<0) will be broadcast later, check after broadcast. + * For vectors, check now: only numeric promotions or same type. + * Also allow SYM/LIST interop (columns may be stored as LIST). */ + if (expr_vec->type > 0 && expr_vec->type != orig_col->type) { + int is_ok = 0; + /* Numeric promotions */ + if ((orig_col->type == RAY_I64 || orig_col->type == RAY_I32 || orig_col->type == RAY_F64) && + (expr_vec->type == RAY_I64 || expr_vec->type == RAY_I32 || expr_vec->type == RAY_F64)) + is_ok = 1; + /* SYM/LIST interop */ + if ((orig_col->type == RAY_SYM || orig_col->type == RAY_LIST) && + (expr_vec->type == RAY_SYM || expr_vec->type == RAY_LIST)) + is_ok = 1; + if (!is_ok) { + ray_release(expr_vec); ray_release(result); ray_release(tbl); + return ray_error("type", NULL); + } + } + +no_where_add_col: + result = ray_table_add_col(result, col_name, expr_vec); + ray_release(expr_vec); + } + if (RAY_IS_ERR(result)) { ray_release(tbl); return result; } + } + + /* Add NEW columns from dict (columns not already in the table) */ + for (int64_t d = 0; d + 1 < dict_n; d += 2) { + int64_t kid = dict_elems[d]->i64; + if (kid == from_id) continue; + /* Check if this column already exists */ + int exists = 0; + for (int64_t c = 0; c < ncols; c++) { + if (ray_table_col_name(tbl, c) == kid) { exists = 1; break; } + } + if (exists) continue; + + /* New column: evaluate expression and add */ + ray_t* update_expr = dict_elems[d + 1]; + ray_graph_t* ug = ray_graph_new(tbl); + ray_op_t* expr_op = compile_expr_dag(ug, update_expr); + if (!expr_op) { ray_release(result); ray_release(tbl); ray_graph_free(ug); return ray_error("domain", NULL); } + expr_op = ray_optimize(ug, expr_op); + ray_t* expr_vec = ray_execute(ug, expr_op); + ray_graph_free(ug); + if (RAY_IS_ERR(expr_vec)) { ray_release(result); ray_release(tbl); return expr_vec; } + + /* Broadcast scalar to column */ + if (expr_vec->type < 0) { + int64_t nrows = ray_table_nrows(tbl); + int8_t ct = -expr_vec->type; + ray_t* bcast = ray_vec_new(ct, nrows); + if (RAY_IS_ERR(bcast)) { ray_release(expr_vec); ray_release(result); ray_release(tbl); return bcast; } + size_t esz = ray_elem_size(ct); + uint8_t elem[8] = {0}; + memcpy(elem, &expr_vec->i64, esz > 8 ? 8 : esz); + for (int64_t r = 0; r < nrows; r++) { + bcast = ray_vec_append(bcast, elem); + if (RAY_IS_ERR(bcast)) { ray_release(expr_vec); ray_release(result); ray_release(tbl); return bcast; } + } + /* Preserve typed-null markers across broadcast (mirrors the + * existing-column branches above). Without this, + * (update {c: 0N from: t}) would silently materialise a + * brand-new column of plain zeros. */ + if (RAY_ATOM_IS_NULL(expr_vec)) { + for (int64_t r = 0; r < nrows; r++) + ray_vec_set_null(bcast, r, true); + } + ray_release(expr_vec); + expr_vec = bcast; + } + + result = ray_table_add_col(result, kid, expr_vec); + ray_release(expr_vec); + if (RAY_IS_ERR(result)) { ray_release(tbl); return result; } + } + + /* Store in-place if from: 't */ + if (inplace_sym >= 0 && result && !RAY_IS_ERR(result)) { + ray_env_set(inplace_sym, result); + } + ray_release(tbl); + return result; +} + +/* (insert table (list val1 val2 ...)) — append a row to a table */ +ray_t* ray_insert_fn(ray_t** args, int64_t n) { + if (n < 2) return ray_error("domain", NULL); + + /* Special form: detect 'sym (quoted symbol for in-place insert) */ + int64_t inplace_sym = -1; + ray_t* tbl_raw = args[0]; + ray_t* tbl; + + /* Detect calling convention: already-evaluated args (from upsert) vs raw parse tree */ + int already_eval = (tbl_raw && tbl_raw->type == RAY_TABLE); + + if (!already_eval && tbl_raw && tbl_raw->type == -RAY_SYM && !(tbl_raw->attrs & RAY_ATTR_NAME)) { + /* Quoted symbol 'sym (no ATTR_NAME) — in-place insert */ + inplace_sym = tbl_raw->i64; + tbl = ray_env_get(inplace_sym); + if (!tbl || RAY_IS_ERR(tbl)) return ray_error("domain", NULL); + ray_retain(tbl); + } else if (already_eval) { + tbl = tbl_raw; + ray_retain(tbl); + } else { + tbl = ray_eval(tbl_raw); + if (!tbl || RAY_IS_ERR(tbl)) return tbl ? tbl : ray_error("type", NULL); + } + + /* ==================================================================== + * Vec/list dispatch — n==2 append, n==3 positional insert. + * Tables with n==2 fall through to the legacy table-row append below. + * ==================================================================== */ + if (tbl->type != RAY_TABLE) { + if (already_eval) { ray_release(tbl); return ray_error("type", NULL); } + if (tbl->attrs & RAY_ATTR_ARENA) { ray_release(tbl); return ray_error("type", NULL); } + + /* Slice → materialise so cow can mutate. Lists never slice. */ + if (tbl->attrs & RAY_ATTR_SLICE) { + if (tbl->type == RAY_LIST) { ray_release(tbl); return ray_error("type", NULL); } + ray_t* empty = ray_vec_new(tbl->type, 0); + if (!empty || RAY_IS_ERR(empty)) { + ray_release(tbl); + return empty ? empty : ray_error("oom", NULL); + } + ray_t* mat = ray_vec_concat(tbl, empty); + ray_release(empty); + ray_release(tbl); + if (!mat || RAY_IS_ERR(mat)) return mat ? mat : ray_error("oom", NULL); + tbl = mat; + } + + bool is_target_list = (tbl->type == RAY_LIST); + bool is_target_vec = ray_is_vec(tbl); + if (!is_target_list && !is_target_vec) { + ray_release(tbl); + return ray_error("type", NULL); + } + if (n != 2 && n != 3) { + ray_release(tbl); + return ray_error("domain", NULL); + } + + ray_t* result = NULL; + int8_t tt = tbl->type; + + if (n == 2) { + /* APPEND */ + ray_t* val = ray_eval(args[1]); + if (!val || RAY_IS_ERR(val)) { + ray_release(tbl); + return val ? val : ray_error("type", NULL); + } + if (is_target_list) { + /* Always one slot — never splice on append. */ + tbl = ray_list_append(tbl, val); + result = tbl; + } else if (val->type == -tt) { + /* Atom of matching type → element append */ + int64_t new_idx = tbl->len; + if (tt == RAY_STR) { + tbl = ray_str_vec_append(tbl, ray_str_ptr(val), ray_str_len(val)); + } else if (tt == RAY_SYM) { + int64_t s = val->i64; + tbl = ray_vec_append(tbl, &s); + } else if (tt == RAY_GUID) { + /* GUID atom's 16-byte payload lives in val->obj; typed-null + * atoms have obj==NULL — write zeros and let the post-call + * RAY_ATOM_IS_NULL check mark the slot. */ + static const uint8_t zero_guid[16] = {0}; + const void* src = val->obj ? ray_data(val->obj) : zero_guid; + tbl = ray_vec_append(tbl, src); + } else { + tbl = ray_vec_append(tbl, &val->u8); + } + if (tbl && !RAY_IS_ERR(tbl) && RAY_ATOM_IS_NULL(val)) + ray_vec_set_null(tbl, new_idx, true); + result = tbl; + } else if (val->type == tt) { + /* Same-type vec → splice at end */ + result = ray_vec_concat(tbl, val); + ray_release(tbl); + } else { + ray_release(tbl); + ray_release(val); + return ray_error("type", NULL); + } + ray_release(val); + } else { + /* n == 3 — POSITIONAL */ + ray_t* idx_arg = ray_eval(args[1]); + if (!idx_arg || RAY_IS_ERR(idx_arg)) { + ray_release(tbl); + return idx_arg ? idx_arg : ray_error("type", NULL); + } + ray_t* val = ray_eval(args[2]); + if (!val || RAY_IS_ERR(val)) { + ray_release(tbl); + ray_release(idx_arg); + return val ? val : ray_error("type", NULL); + } + + if (is_target_list) { + if (idx_arg->type == -RAY_I64) { + tbl = ray_list_insert_at(tbl, idx_arg->i64, val); + result = tbl; + } else if (idx_arg->type == RAY_I64) { + if (val->type != RAY_LIST) { + ray_release(tbl); ray_release(idx_arg); ray_release(val); + return ray_error("type", NULL); + } + result = ray_list_insert_many(tbl, idx_arg, val); + ray_release(tbl); + } else { + ray_release(tbl); ray_release(idx_arg); ray_release(val); + return ray_error("type", NULL); + } + } else { + /* vec target */ + if (idx_arg->type == -RAY_I64) { + int64_t i = idx_arg->i64; + if (val->type == -tt) { + if (tt == RAY_STR) { + result = ray_str_vec_insert_at(tbl, i, + ray_str_ptr(val), ray_str_len(val)); + ray_release(tbl); + } else if (tt == RAY_SYM) { + int64_t s = val->i64; + tbl = ray_vec_insert_at(tbl, i, &s); + result = tbl; + } else if (tt == RAY_GUID) { + static const uint8_t zero_guid[16] = {0}; + const void* src = val->obj ? ray_data(val->obj) : zero_guid; + tbl = ray_vec_insert_at(tbl, i, src); + result = tbl; + } else { + tbl = ray_vec_insert_at(tbl, i, &val->u8); + result = tbl; + } + if (result && !RAY_IS_ERR(result) && RAY_ATOM_IS_NULL(val)) + ray_vec_set_null(result, i, true); + } else if (val->type == tt) { + result = ray_vec_insert_vec_at(tbl, i, val); + ray_release(tbl); + } else { + ray_release(tbl); ray_release(idx_arg); ray_release(val); + return ray_error("type", NULL); + } + } else if (idx_arg->type == RAY_I64) { + if (tt == RAY_STR) { + ray_release(tbl); ray_release(idx_arg); ray_release(val); + return ray_error("type", NULL); + } + if (val->type != tt && val->type != -tt) { + ray_release(tbl); ray_release(idx_arg); ray_release(val); + return ray_error("type", NULL); + } + result = ray_vec_insert_many(tbl, idx_arg, val); + ray_release(tbl); + } else { + ray_release(tbl); ray_release(idx_arg); ray_release(val); + return ray_error("type", NULL); + } + } + ray_release(idx_arg); + ray_release(val); + } + + if (inplace_sym >= 0 && result && !RAY_IS_ERR(result)) { + ray_env_set(inplace_sym, result); + ray_retain(result); + } + return result; + } + + /* Table target: arity-3 positional row insert is not implemented. */ + if (n != 2) { ray_release(tbl); return ray_error("nyi", NULL); } + + /* Evaluate the row argument (skip if already evaluated) */ + ray_t* row = already_eval ? (ray_retain(args[1]), args[1]) : ray_eval(args[1]); + if (!row || RAY_IS_ERR(row)) { ray_release(tbl); return row ? row : ray_error("type", NULL); } + if (tbl->type != RAY_TABLE) { ray_release(tbl); ray_release(row); return ray_error("type", NULL); } + + int64_t ncols = ray_table_ncols(tbl); + ray_t* row_orig = row; /* keep original eval result for cleanup */ + + if (!is_list(row) && row->type != RAY_TABLE && row->type != RAY_DICT) { ray_release(tbl); ray_release(row); return ray_error("type", NULL); } + + /* Table row: convert to list of column vectors */ + ray_t* tbl_row_list = NULL; + if (row->type == RAY_TABLE) { + int64_t src_ncols = ray_table_ncols(row); + if (src_ncols != ncols) { ray_release(tbl); ray_release(row); return ray_error("domain", NULL); } + tbl_row_list = ray_alloc(ncols * sizeof(ray_t*)); + if (!tbl_row_list) { ray_release(tbl); ray_release(row_orig); return ray_error("oom", NULL); } + tbl_row_list->type = RAY_LIST; + tbl_row_list->len = ncols; + ray_t** trl = (ray_t**)ray_data(tbl_row_list); + for (int64_t c = 0; c < ncols; c++) { + int64_t col_name = ray_table_col_name(tbl, c); + ray_t* src_col = ray_table_get_col(row, col_name); + if (!src_col) src_col = ray_table_get_col_idx(row, c); + if (!src_col) { + tbl_row_list->len = 0; + ray_free(tbl_row_list); + ray_release(tbl); ray_release(row_orig); + return ray_error("domain", NULL); + } + trl[c] = src_col; + ray_retain(src_col); + } + row = tbl_row_list; + } + + /* Dict row: extract values in table column order */ + ray_t* dict_vals = NULL; + if (row->type == RAY_DICT) { + ray_t* dkeys = ray_dict_keys(row); + ray_t* dvals = ray_dict_vals(row); + if (!dkeys || dkeys->type != RAY_SYM || !dvals) { + ray_release(tbl); ray_release(row_orig); + return ray_error("type", NULL); + } + int64_t dict_len = dkeys->len; + + dict_vals = ray_alloc(ncols * sizeof(ray_t*)); + if (!dict_vals) { ray_release(tbl); ray_release(row_orig); return ray_error("oom", NULL); } + dict_vals->type = RAY_LIST; + dict_vals->len = ncols; + ray_t** dv = (ray_t**)ray_data(dict_vals); + + for (int64_t c = 0; c < ncols; c++) { + int64_t col_name = ray_table_col_name(tbl, c); + dv[c] = NULL; + for (int64_t d = 0; d < dict_len; d++) { + int64_t dk = ray_read_sym(ray_data(dkeys), d, RAY_SYM, dkeys->attrs); + if (dk != col_name) continue; + if (dvals->type == RAY_LIST) { + dv[c] = ((ray_t**)ray_data(dvals))[d]; + if (dv[c]) ray_retain(dv[c]); + } else { + int alloc = 0; + dv[c] = collection_elem(dvals, d, &alloc); + if (!alloc && dv[c]) ray_retain(dv[c]); + } + break; + } + } + /* Verify all dict keys exist as table columns */ + for (int64_t d = 0; d < dict_len; d++) { + int64_t dk = ray_read_sym(ray_data(dkeys), d, RAY_SYM, dkeys->attrs); + int found_in_tbl = 0; + for (int64_t c = 0; c < ncols; c++) { + if (ray_table_col_name(tbl, c) == dk) { found_in_tbl = 1; break; } + } + if (!found_in_tbl) { + for (int64_t c = 0; c < ncols; c++) if (dv[c]) ray_release(dv[c]); + dict_vals->len = 0; + ray_free(dict_vals); + ray_release(tbl); ray_release(row_orig); + return ray_error("value", NULL); + } + } + row = dict_vals; + } + + if (ray_len(row) != ncols) { + if (dict_vals) { + for (int64_t c = 0; c < ncols; c++) ray_release(((ray_t**)ray_data(dict_vals))[c]); + dict_vals->len = 0; + ray_free(dict_vals); + } + ray_release(tbl); ray_release(row_orig); + return ray_error("domain", NULL); + } + + ray_t** row_elems = (ray_t**)ray_data(row); + int64_t nrows = ray_table_nrows(tbl); + + ray_t* result = ray_table_new(ncols); + if (RAY_IS_ERR(result)) return result; + + for (int64_t c = 0; c < ncols; c++) { + int64_t col_name = ray_table_col_name(tbl, c); + ray_t* orig_col = ray_table_get_col_idx(tbl, c); + int8_t ct = orig_col->type; + + ray_t* new_col = ray_vec_new(ct, nrows + 1); + if (RAY_IS_ERR(new_col)) { ray_release(result); return new_col; } + + /* Copy existing data */ + bool src_has_nulls = (orig_col->attrs & RAY_ATTR_HAS_NULLS) != 0; + if (ct == RAY_STR) { + for (int64_t r = 0; r < nrows; r++) { + if (src_has_nulls && ray_vec_is_null(orig_col, r)) { + new_col = ray_str_vec_append(new_col, "", 0); + if (!RAY_IS_ERR(new_col)) + ray_vec_set_null(new_col, new_col->len - 1, true); + } else { + size_t slen = 0; + const char* sp = ray_str_vec_get(orig_col, r, &slen); + new_col = ray_str_vec_append(new_col, sp ? sp : "", sp ? slen : 0); + } + if (RAY_IS_ERR(new_col)) { ray_release(result); return new_col; } + } + } else if (ct == RAY_SYM) { + for (int64_t r = 0; r < nrows; r++) { + int64_t sym_val = ray_read_sym(ray_data(orig_col), r, orig_col->type, orig_col->attrs); + new_col = ray_vec_append(new_col, &sym_val); + if (RAY_IS_ERR(new_col)) { ray_release(result); return new_col; } + if (src_has_nulls && ray_vec_is_null(orig_col, r)) + ray_vec_set_null(new_col, new_col->len - 1, true); + } + } else { + size_t elem_sz = (ct == RAY_BOOL) ? 1 : 8; + uint8_t* src = (uint8_t*)ray_data(orig_col); + for (int64_t r = 0; r < nrows; r++) { + new_col = ray_vec_append(new_col, src + r * elem_sz); + if (RAY_IS_ERR(new_col)) { ray_release(result); return new_col; } + if (src_has_nulls && ray_vec_is_null(orig_col, r)) + ray_vec_set_null(new_col, new_col->len - 1, true); + } + } + + /* Append new row value(s) — atom for single row, vector for multi-row */ + if (!row_elems[c]) { + /* NULL = null value for this column type */ + ray_t* null_atom = ray_typed_null(-ct); + new_col = append_atom_to_col(new_col, null_atom); + ray_release(null_atom); + } else if (ray_is_atom(row_elems[c])) { + new_col = append_atom_to_col(new_col, row_elems[c]); + } else if (ray_is_vec(row_elems[c]) || row_elems[c]->type == RAY_LIST) { + ray_t* merged = ray_concat_fn(new_col, row_elems[c]); + ray_release(new_col); + new_col = merged; + } else { + new_col = append_atom_to_col(new_col, row_elems[c]); + } + if (RAY_IS_ERR(new_col)) { ray_release(result); return new_col; } + + result = ray_table_add_col(result, col_name, new_col); + ray_release(new_col); + if (RAY_IS_ERR(result)) return result; + } + + /* Cleanup dict_vals, tbl_row_list, and original row */ + if (dict_vals) { + ray_t** dv = (ray_t**)ray_data(dict_vals); + for (int64_t c = 0; c < ncols; c++) if (dv[c]) ray_release(dv[c]); + dict_vals->len = 0; /* prevent ray_free from double-releasing children */ + ray_free(dict_vals); + } + if (tbl_row_list) { + ray_t** trl = (ray_t**)ray_data(tbl_row_list); + for (int64_t c = 0; c < ncols; c++) if (trl[c]) ray_release(trl[c]); + tbl_row_list->len = 0; + ray_free(tbl_row_list); + } + ray_release(tbl); + ray_release(row_orig); + + /* In-place: update the variable in the env */ + if (inplace_sym >= 0 && !RAY_IS_ERR(result)) { + ray_env_set(inplace_sym, result); + ray_retain(result); + return result; + } + return result; +} + +/* (upsert table key_col (list val1 val2 ...)) — update row if key matches, else insert. + * Special form: first arg may be 'sym for in-place, other args are evaluated. */ +ray_t* ray_upsert_fn(ray_t** args, int64_t n) { + if (n < 3) return ray_error("domain", NULL); + + /* Detect calling convention: already-evaluated args (from recursive call) vs raw parse tree */ + int64_t inplace_sym = -1; + ray_t* tbl_raw = args[0]; + int already_eval = (tbl_raw && tbl_raw->type == RAY_TABLE); + ray_t* tbl; + + if (!already_eval && tbl_raw && tbl_raw->type == -RAY_SYM && !(tbl_raw->attrs & RAY_ATTR_NAME)) { + inplace_sym = tbl_raw->i64; + tbl = ray_env_get(inplace_sym); + if (!tbl || RAY_IS_ERR(tbl)) return ray_error("domain", NULL); + ray_retain(tbl); + } else if (already_eval) { + tbl = tbl_raw; + ray_retain(tbl); + } else { + tbl = ray_eval(tbl_raw); + if (!tbl || RAY_IS_ERR(tbl)) return tbl ? tbl : ray_error("type", NULL); + } + + ray_t* key_sym = already_eval ? (ray_retain(args[1]), args[1]) : ray_eval(args[1]); + if (!key_sym || RAY_IS_ERR(key_sym)) { ray_release(tbl); return key_sym ? key_sym : ray_error("type", NULL); } + + ray_t* row = already_eval ? (ray_retain(args[2]), args[2]) : ray_eval(args[2]); + if (!row || RAY_IS_ERR(row)) { ray_release(tbl); ray_release(key_sym); return row ? row : ray_error("type", NULL); } + + if (tbl->type != RAY_TABLE) { ray_release(tbl); ray_release(key_sym); ray_release(row); return ray_error("type", NULL); } + if (!is_list(row) && row->type != RAY_TABLE && row->type != RAY_DICT) { ray_release(tbl); ray_release(key_sym); ray_release(row); return ray_error("type", NULL); } + + int64_t ncols = ray_table_ncols(tbl); + + /* Table row: iterate row-by-row for proper upsert semantics */ + if (row->type == RAY_TABLE) { + int64_t src_nrows = ray_table_nrows(row); + int64_t src_ncols = ray_table_ncols(row); + + /* Zero-row payload → upsert is a no-op regardless of payload + * schema. Skip all schema-strictness here: rejecting an empty + * partial payload (e.g. for missing key columns) regresses the + * pre-existing "empty input = do nothing" behavior. No data + * flows, so neither silent-drop nor null-key crashes are + * possible below. */ + if (src_nrows == 0) { + ray_release(key_sym); ray_release(row); + return tbl; + } + + /* Schema-strictness (table payload is a PARTIAL view — columns + * in target but not in source are intentionally null-filled). + * We only need to reject: + * (a) a source column whose name isn't in the target (extra + * → silent drop of user data); + * (b) a source column name that appears more than once in the + * source (ambiguous); + * (c) a source column name whose target column appears more + * than once in `tbl` (name-keyed gather can't tell which + * target slot the value belongs to → silent duplication). + * Duplicate target columns whose names don't appear in `row` + * are harmless — they get null-filled like any other missing + * column. */ + for (int64_t sc = 0; sc < src_ncols; sc++) { + int64_t scn = ray_table_col_name(row, sc); + int64_t tbl_matches = 0, src_matches = 0; + for (int64_t i = 0; i < ncols; i++) if (ray_table_col_name(tbl, i) == scn) tbl_matches++; + for (int64_t i = 0; i < src_ncols; i++) if (ray_table_col_name(row, i) == scn) src_matches++; + if (tbl_matches != 1 || src_matches != 1) { + ray_release(tbl); ray_release(key_sym); ray_release(row); + return ray_error("value", NULL); + } + } + + /* Partial updates may null-fill ordinary columns, but the key + * column(s) MUST be present — otherwise the recursive upsert + * reads a NULL from row_elems[key_col] and segfaults. Resolve + * key names from key_sym and require each to appear in row. */ + int64_t key_names[16]; + int64_t n_key = 0; + if (key_sym->type == -RAY_SYM) { + key_names[n_key++] = key_sym->i64; + } else if (key_sym->type == -RAY_I64) { + int64_t k = key_sym->i64; + if (k <= 0 || k > ncols || k > 16) { + ray_release(tbl); ray_release(key_sym); ray_release(row); + return ray_error("domain", NULL); + } + for (int64_t i = 0; i < k; i++) + key_names[n_key++] = ray_table_col_name(tbl, i); + } else { + ray_release(tbl); ray_release(key_sym); ray_release(row); + return ray_error("type", NULL); + } + for (int64_t k = 0; k < n_key; k++) { + int found = 0; + for (int64_t i = 0; i < src_ncols; i++) + if (ray_table_col_name(row, i) == key_names[k]) { found = 1; break; } + if (!found) { + ray_release(tbl); ray_release(key_sym); ray_release(row); + return ray_error("value", NULL); + } + } + + /* Gather source columns in target order (now guaranteed 1-to-1). */ + ray_t* src_cols[64]; + for (int64_t c = 0; c < ncols && c < 64; c++) { + int64_t cn = ray_table_col_name(tbl, c); + src_cols[c] = ray_table_get_col(row, cn); + } + ray_t* cur_tbl = tbl; + ray_retain(cur_tbl); + for (int64_t r = 0; r < src_nrows; r++) { + ray_t* single = ray_alloc(ncols * sizeof(ray_t*)); + if (!single) { ray_release(cur_tbl); ray_release(tbl); ray_release(key_sym); ray_release(row); return ray_error("oom", NULL); } + single->type = RAY_LIST; + single->len = ncols; + ray_t** sr = (ray_t**)ray_data(single); + for (int64_t c = 0; c < ncols; c++) { + int alloc = 0; + sr[c] = src_cols[c] ? collection_elem(src_cols[c], r, &alloc) : NULL; + if (!alloc && sr[c]) ray_retain(sr[c]); + } + ray_t* upsert_args[3] = { cur_tbl, key_sym, single }; + ray_t* new_tbl = ray_upsert_fn(upsert_args, 3); + for (int64_t c = 0; c < ncols; c++) if (sr[c]) ray_release(sr[c]); + single->len = 0; + ray_free(single); + ray_release(cur_tbl); + if (RAY_IS_ERR(new_tbl)) { ray_release(tbl); ray_release(key_sym); ray_release(row); return new_tbl; } + cur_tbl = new_tbl; + } + ray_release(tbl); + ray_release(key_sym); + ray_release(row); + if (inplace_sym >= 0 && !RAY_IS_ERR(cur_tbl)) { + ray_env_set(inplace_sym, cur_tbl); + ray_retain(cur_tbl); + } + return cur_tbl; + } + + /* Dict row: extract values in column order to create a plain list */ + ray_t* dict_row_list = NULL; + if (row->type == RAY_DICT) { + ray_t* dkeys = ray_dict_keys(row); + ray_t* dvals = ray_dict_vals(row); + if (!dkeys || dkeys->type != RAY_SYM || !dvals) { + ray_release(tbl); ray_release(key_sym); ray_release(row); + return ray_error("type", NULL); + } + int64_t n_pairs = dkeys->len; + + /* Schema-strictness: every column name must appear exactly once + * on each side. Mirrors the table-payload path. */ + if (n_pairs != ncols) { + ray_release(tbl); ray_release(key_sym); ray_release(row); + return ray_error("value", NULL); + } + for (int64_t c = 0; c < ncols; c++) { + int64_t cn = ray_table_col_name(tbl, c); + int64_t tbl_matches = 0, dict_matches = 0; + for (int64_t i = 0; i < ncols; i++) + if (ray_table_col_name(tbl, i) == cn) tbl_matches++; + for (int64_t d = 0; d < n_pairs; d++) { + int64_t dk = ray_read_sym(ray_data(dkeys), d, RAY_SYM, dkeys->attrs); + if (dk == cn) dict_matches++; + } + if (tbl_matches != 1 || dict_matches != 1) { + ray_release(tbl); ray_release(key_sym); ray_release(row); + return ray_error("value", NULL); + } + } + + dict_row_list = ray_alloc(ncols * sizeof(ray_t*)); + if (!dict_row_list) { ray_release(tbl); ray_release(key_sym); ray_release(row); return ray_error("oom", NULL); } + dict_row_list->type = RAY_LIST; + dict_row_list->len = ncols; + ray_t** drl = (ray_t**)ray_data(dict_row_list); + for (int64_t c = 0; c < ncols; c++) { + int64_t col_name = ray_table_col_name(tbl, c); + drl[c] = NULL; + for (int64_t d = 0; d < n_pairs; d++) { + int64_t dk = ray_read_sym(ray_data(dkeys), d, RAY_SYM, dkeys->attrs); + if (dk != col_name) continue; + if (dvals->type == RAY_LIST) { + drl[c] = ((ray_t**)ray_data(dvals))[d]; + if (drl[c]) ray_retain(drl[c]); + } else { + int alloc = 0; + drl[c] = collection_elem(dvals, d, &alloc); + if (!alloc && drl[c]) ray_retain(drl[c]); + } + break; + } + } + ray_release(row); + row = dict_row_list; + } + + if (ray_len(row) != ncols) { ray_release(tbl); ray_release(key_sym); ray_release(row); return ray_error("domain", NULL); } + + ray_t** row_elems = (ray_t**)ray_data(row); + int64_t nrows = ray_table_nrows(tbl); + + /* Determine key columns — integer N means "first N columns are keys" */ + int64_t n_key_cols = 1; + int64_t key_col_indices[16]; + if (key_sym->type == -RAY_SYM) { + key_col_indices[0] = -1; + for (int64_t c = 0; c < ncols; c++) { + if (ray_table_col_name(tbl, c) == key_sym->i64) { + key_col_indices[0] = c; + break; + } + } + if (key_col_indices[0] < 0) { ray_release(tbl); ray_release(key_sym); ray_release(row); return ray_error("domain", NULL); } + } else if (key_sym->type == -RAY_I64) { + n_key_cols = key_sym->i64; + if (n_key_cols <= 0 || n_key_cols > ncols || n_key_cols > 16) { ray_release(tbl); ray_release(key_sym); ray_release(row); return ray_error("domain", NULL); } + for (int64_t k = 0; k < n_key_cols; k++) key_col_indices[k] = k; + } else { + ray_release(tbl); ray_release(key_sym); ray_release(row); + return ray_error("type", NULL); + } + + /* Multi-row upsert: if row values are vectors, iterate row-by-row */ + ray_t* key_elem = row_elems[key_col_indices[0]]; + if (ray_is_vec(key_elem) || key_elem->type == RAY_LIST) { + int64_t new_nrows = ray_len(key_elem); + ray_t* cur_tbl = tbl; + ray_retain(cur_tbl); + for (int64_t r = 0; r < new_nrows; r++) { + /* Build single-row list from multi-row columns */ + ray_t* single_row = ray_alloc(ncols * sizeof(ray_t*)); + if (!single_row) { ray_release(cur_tbl); ray_release(tbl); ray_release(key_sym); ray_release(row); return ray_error("oom", NULL); } + single_row->type = RAY_LIST; + single_row->len = ncols; + ray_t** sr = (ray_t**)ray_data(single_row); + for (int64_t c = 0; c < ncols; c++) { + int alloc = 0; + sr[c] = collection_elem(row_elems[c], r, &alloc); + if (!alloc && sr[c]) ray_retain(sr[c]); + } + /* Upsert single row into current table */ + ray_t* upsert_args[3] = { cur_tbl, key_sym, single_row }; + ray_t* new_tbl = ray_upsert_fn(upsert_args, 3); + /* Clean up single_row */ + for (int64_t c = 0; c < ncols; c++) if (sr[c]) ray_release(sr[c]); + single_row->len = 0; + ray_free(single_row); + ray_release(cur_tbl); + if (RAY_IS_ERR(new_tbl)) { ray_release(tbl); ray_release(key_sym); ray_release(row); return new_tbl; } + cur_tbl = new_tbl; + } + ray_release(tbl); + ray_release(key_sym); + ray_release(row); + if (inplace_sym >= 0 && !RAY_IS_ERR(cur_tbl)) { + ray_env_set(inplace_sym, cur_tbl); + ray_retain(cur_tbl); + } + return cur_tbl; + } + + /* Type-check key columns before searching */ + for (int64_t k = 0; k < n_key_cols; k++) { + int64_t kci = key_col_indices[k]; + ray_t* key_col = ray_table_get_col_idx(tbl, kci); + ray_t* key_atom = row_elems[kci]; + int8_t kt = key_col->type; + if (kt == RAY_STR && key_atom->type != -RAY_STR) { + ray_release(tbl); ray_release(key_sym); ray_release(row); + return ray_error("type", NULL); + } + if (kt == RAY_SYM && key_atom->type != -RAY_SYM) { + ray_release(tbl); ray_release(key_sym); ray_release(row); + return ray_error("type", NULL); + } + } + + /* Find the row to update by composite key match */ + int64_t match_row = -1; + for (int64_t r = 0; r < nrows; r++) { + int match = 1; + for (int64_t k = 0; k < n_key_cols && match; k++) { + int64_t kci = key_col_indices[k]; + ray_t* key_col = ray_table_get_col_idx(tbl, kci); + ray_t* key_atom = row_elems[kci]; + int8_t kt = key_col->type; + if (kt == RAY_F64) { + double needle = (key_atom->type == -RAY_F64) ? key_atom->f64 : (double)key_atom->i64; + if (((double*)ray_data(key_col))[r] != needle) match = 0; + } else if (kt == RAY_SYM) { + if (ray_read_sym(ray_data(key_col), r, key_col->type, key_col->attrs) != key_atom->i64) match = 0; + } else if (kt == RAY_STR) { + const char* ns = ray_str_ptr(key_atom); + size_t nl = ray_str_len(key_atom); + size_t rl = 0; + const char* rs = ray_str_vec_get(key_col, r, &rl); + if (rl != nl || (nl > 0 && (!rs || !ns || memcmp(rs, ns, nl) != 0))) match = 0; + } else { + int64_t needle = elem_as_i64(key_atom); + int64_t existing = (kt == RAY_I64 || kt == RAY_TIMESTAMP) ? + ((int64_t*)ray_data(key_col))[r] : + (kt == RAY_I32 || kt == RAY_DATE || kt == RAY_TIME) ? + (int64_t)((int32_t*)ray_data(key_col))[r] : + (kt == RAY_BOOL) ? (int64_t)((uint8_t*)ray_data(key_col))[r] : + ((int64_t*)ray_data(key_col))[r]; + if (existing != needle) match = 0; + } + } + if (match) { match_row = r; break; } + } + + if (match_row < 0) { + /* Key not found — insert: pass pre-evaluated args */ + ray_t* insert_args[2] = { tbl, row }; + ray_t* result = ray_insert_fn(insert_args, 2); + ray_release(tbl); + ray_release(key_sym); + ray_release(row); + if (inplace_sym >= 0 && !RAY_IS_ERR(result)) { + ray_env_set(inplace_sym, result); + ray_retain(result); + } + return result; + } + + /* Key found — update that row */ + ray_t* result = ray_table_new(ncols); + if (RAY_IS_ERR(result)) { ray_release(tbl); ray_release(key_sym); ray_release(row); return result; } + + for (int64_t c = 0; c < ncols; c++) { + int64_t col_name = ray_table_col_name(tbl, c); + ray_t* orig_col = ray_table_get_col_idx(tbl, c); + int8_t ct = orig_col->type; + + ray_t* new_col = ray_vec_new(ct, nrows); + if (RAY_IS_ERR(new_col)) { ray_release(result); ray_release(tbl); ray_release(key_sym); ray_release(row); return new_col; } + + /* If row_elems[c] is NULL (missing column), keep original values */ + int has_new_val = (row_elems[c] != NULL); + + if (ct == RAY_STR) { + for (int64_t r = 0; r < nrows; r++) { + if (r == match_row && has_new_val) { + new_col = append_atom_to_col(new_col, row_elems[c]); + } else { + size_t slen = 0; + const char* sp = ray_str_vec_get(orig_col, r, &slen); + new_col = ray_str_vec_append(new_col, sp ? sp : "", sp ? slen : 0); + } + if (RAY_IS_ERR(new_col)) { ray_release(result); ray_release(tbl); ray_release(key_sym); ray_release(row); return new_col; } + } + } else if (ct == RAY_SYM) { + for (int64_t r = 0; r < nrows; r++) { + if (r == match_row && has_new_val) { + new_col = append_atom_to_col(new_col, row_elems[c]); + } else { + int64_t sym_val = ray_read_sym(ray_data(orig_col), r, orig_col->type, orig_col->attrs); + new_col = ray_vec_append(new_col, &sym_val); + } + if (RAY_IS_ERR(new_col)) { ray_release(result); ray_release(tbl); ray_release(key_sym); ray_release(row); return new_col; } + } + } else { + size_t elem_sz = (ct == RAY_BOOL) ? 1 : 8; + uint8_t* src = (uint8_t*)ray_data(orig_col); + for (int64_t r = 0; r < nrows; r++) { + if (r == match_row && has_new_val) { + new_col = append_atom_to_col(new_col, row_elems[c]); + } else { + new_col = ray_vec_append(new_col, src + r * elem_sz); + } + if (RAY_IS_ERR(new_col)) { ray_release(result); ray_release(tbl); ray_release(key_sym); ray_release(row); return new_col; } + } + } + + result = ray_table_add_col(result, col_name, new_col); + ray_release(new_col); + if (RAY_IS_ERR(result)) { ray_release(tbl); ray_release(key_sym); ray_release(row); return result; } + } + + ray_release(tbl); + ray_release(key_sym); + ray_release(row); + + if (inplace_sym >= 0 && !RAY_IS_ERR(result)) { + ray_env_set(inplace_sym, result); + ray_retain(result); + } + return result; +} + +/* ══════════════════════════════════════════ + * Join operations + * ══════════════════════════════════════════ */ + +/* Shared implementation for left-join (join_type=1) and inner-join (join_type=0). + * (left-join t1 t2 [key ...]) / (inner-join t1 t2 [key ...]) */ +static ray_t* join_impl(ray_t** args, int64_t n, uint8_t join_type) { + if (n < 3) return ray_error("domain", NULL); + + ray_t* left_tbl = args[0]; + ray_t* right_tbl = args[1]; + ray_t* keys = args[2]; + + /* Detect alternative calling convention: (join [keys] t1 t2) */ + if (left_tbl->type != RAY_TABLE && args[1]->type == RAY_TABLE && args[2]->type == RAY_TABLE) { + keys = args[0]; + left_tbl = args[1]; + right_tbl = args[2]; + } + + if (left_tbl->type != RAY_TABLE || right_tbl->type != RAY_TABLE) + return ray_error("type", NULL); + ray_t* _bxk = NULL; + keys = unbox_vec_arg(keys, &_bxk); + if (RAY_IS_ERR(keys)) return keys; + if (!is_list(keys)) + { if (_bxk) ray_release(_bxk); return ray_error("type", NULL); } + + int64_t nk = ray_len(keys); + if (nk == 0 || nk > 16) { if (_bxk) ray_release(_bxk); return ray_error("domain", NULL); } + ray_t** key_elems = (ray_t**)ray_data(keys); + + ray_graph_t* g = ray_graph_new(left_tbl); + if (!g) { if (_bxk) ray_release(_bxk); return ray_error("oom", NULL); } + + ray_op_t* left_node = ray_const_table(g, left_tbl); + ray_op_t* right_node = ray_const_table(g, right_tbl); + + ray_op_t* lk[16], *rk[16]; + for (int64_t i = 0; i < nk; i++) { + if (key_elems[i]->type != -RAY_SYM) { + ray_graph_free(g); if (_bxk) ray_release(_bxk); + return ray_error("type", NULL); + } + ray_t* name_str = ray_sym_str(key_elems[i]->i64); + if (!name_str) { ray_graph_free(g); if (_bxk) ray_release(_bxk); return ray_error("domain", NULL); } + lk[i] = ray_scan(g, ray_str_ptr(name_str)); + rk[i] = ray_scan(g, ray_str_ptr(name_str)); + if (!lk[i] || !rk[i]) { ray_graph_free(g); if (_bxk) ray_release(_bxk); return ray_error("domain", NULL); } + } + + if (_bxk) ray_release(_bxk); + + ray_op_t* jn = ray_join(g, left_node, lk, right_node, rk, + (uint8_t)nk, join_type); + if (!jn) { ray_graph_free(g); return ray_error("oom", NULL); } + + jn = ray_optimize(g, jn); + ray_t* result = ray_execute(g, jn); + ray_graph_free(g); + return result; +} + +ray_t* ray_left_join_fn(ray_t** args, int64_t n) { return join_impl(args, n, 1); } +ray_t* ray_inner_join_fn(ray_t** args, int64_t n) { return join_impl(args, n, 0); } + +/* (antijoin left right [keys]) + * Anti-semi-join: keep rows from left that have NO match in right on keys. */ +static ray_t* antijoin_impl(ray_t** args, int64_t n) { + if (n < 3) return ray_error("domain", NULL); + + ray_t* left_tbl = args[0]; + ray_t* right_tbl = args[1]; + ray_t* keys = args[2]; + + /* Detect alternative calling convention: (antijoin [keys] t1 t2) */ + if (left_tbl->type != RAY_TABLE && args[1]->type == RAY_TABLE && args[2]->type == RAY_TABLE) { + keys = args[0]; + left_tbl = args[1]; + right_tbl = args[2]; + } + + if (left_tbl->type != RAY_TABLE || right_tbl->type != RAY_TABLE) + return ray_error("type", NULL); + ray_t* _bxk = NULL; + keys = unbox_vec_arg(keys, &_bxk); + if (RAY_IS_ERR(keys)) return keys; + if (!is_list(keys)) + { if (_bxk) ray_release(_bxk); return ray_error("type", NULL); } + + int64_t nk = ray_len(keys); + if (nk == 0 || nk > 16) { if (_bxk) ray_release(_bxk); return ray_error("domain", NULL); } + ray_t** key_elems = (ray_t**)ray_data(keys); + + ray_graph_t* g = ray_graph_new(left_tbl); + if (!g) { if (_bxk) ray_release(_bxk); return ray_error("oom", NULL); } + + ray_op_t* left_node = ray_const_table(g, left_tbl); + ray_op_t* right_node = ray_const_table(g, right_tbl); + + ray_op_t* lk[16], *rk[16]; + for (int64_t i = 0; i < nk; i++) { + if (key_elems[i]->type != -RAY_SYM) { + ray_graph_free(g); if (_bxk) ray_release(_bxk); + return ray_error("type", NULL); + } + ray_t* name_str = ray_sym_str(key_elems[i]->i64); + if (!name_str) { ray_graph_free(g); if (_bxk) ray_release(_bxk); return ray_error("domain", NULL); } + lk[i] = ray_scan(g, ray_str_ptr(name_str)); + rk[i] = ray_scan(g, ray_str_ptr(name_str)); + if (!lk[i] || !rk[i]) { ray_graph_free(g); if (_bxk) ray_release(_bxk); return ray_error("domain", NULL); } + } + + if (_bxk) ray_release(_bxk); + + ray_op_t* jn = ray_antijoin(g, left_node, lk, right_node, rk, (uint8_t)nk); + if (!jn) { ray_graph_free(g); return ray_error("oom", NULL); } + + jn = ray_optimize(g, jn); + ray_t* result = ray_execute(g, jn); + ray_graph_free(g); + return result; +} + +ray_t* ray_anti_join_fn(ray_t** args, int64_t n) { return antijoin_impl(args, n); } + +/* ------------------------------------------------------------------------ */ +/* window-join parallel worker */ +/* ------------------------------------------------------------------------ */ + +#define WJ_MAX_AGG 16 + +typedef struct { + int64_t cnt; + int64_t sum_i; + double sum_f; + int64_t sum_sq_i; + double sum_sq_f; + int64_t extreme_i; + double extreme_f; + int64_t prod_i; + double prod_f; +} wj_acc_t; + +typedef struct { + int64_t left_nrows; + int64_t right_nrows; + int64_t n_eq; + int64_t n_agg; + + /* Left-row metadata — pre-extracted to int64 so workers can read + * without touching any ray_t objects (no locking, no allocation). */ + const int64_t* lo_arr; + const int64_t* hi_arr; + const int64_t* left_eq_arr[WJ_MAX_AGG]; + + /* Right-side sort order and time column (sorted rank -> original idx) */ + const int64_t* right_sort; + const int64_t* rt_time_i; + + /* Right equality columns (raw), kept for binary-search compares */ + const void* eq_data[WJ_MAX_AGG]; + int8_t eq_type[WJ_MAX_AGG]; + uint8_t eq_attrs[WJ_MAX_AGG]; + + /* Per-agg metadata and preloaded sorted source vectors */ + uint8_t agg_raw[WJ_MAX_AGG]; + uint16_t agg_ops[WJ_MAX_AGG]; + int8_t agg_result_types[WJ_MAX_AGG]; + int agg_is_float[WJ_MAX_AGG]; + const int64_t* sorted_i[WJ_MAX_AGG]; + const double* sorted_f[WJ_MAX_AGG]; + const uint8_t* sorted_nn[WJ_MAX_AGG]; + + /* Per-agg result output — writers index by lr directly */ + void* result_data[WJ_MAX_AGG]; + uint8_t* result_null[WJ_MAX_AGG]; /* 1 byte per row: 1 = null */ +} wj_scan_ctx_t; + +static void wj_scan_fn(void* ctx_, uint32_t worker_id, int64_t start, int64_t end) { + (void)worker_id; + wj_scan_ctx_t* c = (wj_scan_ctx_t*)ctx_; + wj_acc_t acc[WJ_MAX_AGG]; + int64_t n_eq = c->n_eq; + int64_t n_agg = c->n_agg; + int64_t rn = c->right_nrows; + const int64_t* right_sort = c->right_sort; + const int64_t* rt_time_i = c->rt_time_i; + + for (int64_t lr = start; lr < end; lr++) { + int64_t lo = c->lo_arr[lr]; + int64_t hi = c->hi_arr[lr]; + + int64_t target_eq[WJ_MAX_AGG]; + for (int64_t e = 0; e < n_eq; e++) + target_eq[e] = c->left_eq_arr[e][lr]; + + /* lower_bound: first rank with (eq, time) >= (target_eq, lo) */ + int64_t lb = 0, lb_hi = rn; + while (lb < lb_hi) { + int64_t m = (lb + lb_hi) >> 1; + int64_t ri = right_sort[m]; + int cmp = 0; + for (int64_t e = 0; e < n_eq && cmp == 0; e++) { + int64_t rv = read_col_i64(c->eq_data[e], ri, c->eq_type[e], c->eq_attrs[e]); + if (rv < target_eq[e]) cmp = -1; + else if (rv > target_eq[e]) cmp = 1; + } + if (cmp == 0 && rt_time_i[ri] < lo) cmp = -1; + if (cmp < 0) lb = m + 1; else lb_hi = m; + } + int64_t ub = lb, ub_hi = rn; + while (ub < ub_hi) { + int64_t m = (ub + ub_hi) >> 1; + int64_t ri = right_sort[m]; + int cmp = 0; + for (int64_t e = 0; e < n_eq && cmp == 0; e++) { + int64_t rv = read_col_i64(c->eq_data[e], ri, c->eq_type[e], c->eq_attrs[e]); + if (rv < target_eq[e]) cmp = -1; + else if (rv > target_eq[e]) cmp = 1; + } + if (cmp == 0 && rt_time_i[ri] <= hi) cmp = -1; + if (cmp < 0) ub = m + 1; else ub_hi = m; + } + + memset(acc, 0, sizeof(acc)); + for (int64_t a = 0; a < n_agg; a++) { + if (c->agg_ops[a] == OP_PROD) { acc[a].prod_i = 1; acc[a].prod_f = 1.0; } + } + + /* Per-agg tight scan (hoisted switch, sequential SIMD-friendly read) */ + for (int64_t a = 0; a < n_agg; a++) { + if (c->agg_raw[a]) continue; + wj_acc_t* A = &acc[a]; + uint16_t op = c->agg_ops[a]; + if (op == OP_COUNT) { A->cnt += (ub - lb); continue; } + + const uint8_t* nn = c->sorted_nn[a]; + if (c->agg_is_float[a]) { + const double* ss = c->sorted_f[a]; + switch (op) { + case OP_SUM: case OP_AVG: { + double sum = 0; int64_t cnt = 0; + if (nn) { for (int64_t k = lb; k < ub; k++) if (nn[k]) { sum += ss[k]; cnt++; } } + else { for (int64_t k = lb; k < ub; k++) sum += ss[k]; cnt = ub - lb; } + A->sum_f = sum; A->cnt = cnt; break; + } + case OP_VAR: case OP_VAR_POP: + case OP_STDDEV: case OP_STDDEV_POP: { + double sum = 0, sum2 = 0; int64_t cnt = 0; + if (nn) { + for (int64_t k = lb; k < ub; k++) + if (nn[k]) { double v = ss[k]; sum += v; sum2 += v * v; cnt++; } + } else { + for (int64_t k = lb; k < ub; k++) { double v = ss[k]; sum += v; sum2 += v * v; } + cnt = ub - lb; + } + A->sum_f = sum; A->sum_sq_f = sum2; A->cnt = cnt; break; + } + case OP_PROD: { + double p = 1.0; int64_t cnt = 0; + if (nn) { for (int64_t k = lb; k < ub; k++) if (nn[k]) { p *= ss[k]; cnt++; } } + else { for (int64_t k = lb; k < ub; k++) p *= ss[k]; cnt = ub - lb; } + A->prod_f = p; A->cnt = cnt; break; + } + case OP_MIN: { + int64_t k = lb; + if (nn) { + double best = 0; int64_t cnt = 0; + for (; k < ub; k++) if (nn[k]) { best = ss[k]; cnt = 1; k++; break; } + for (; k < ub; k++) if (nn[k]) { double v = ss[k]; if (v < best) best = v; cnt++; } + A->extreme_f = best; A->cnt = cnt; + } else if (k < ub) { + double best = ss[k++]; + for (; k < ub; k++) { double v = ss[k]; if (v < best) best = v; } + A->extreme_f = best; A->cnt = ub - lb; + } + break; + } + case OP_MAX: { + int64_t k = lb; + if (nn) { + double best = 0; int64_t cnt = 0; + for (; k < ub; k++) if (nn[k]) { best = ss[k]; cnt = 1; k++; break; } + for (; k < ub; k++) if (nn[k]) { double v = ss[k]; if (v > best) best = v; cnt++; } + A->extreme_f = best; A->cnt = cnt; + } else if (k < ub) { + double best = ss[k++]; + for (; k < ub; k++) { double v = ss[k]; if (v > best) best = v; } + A->extreme_f = best; A->cnt = ub - lb; + } + break; + } + case OP_FIRST: { + if (nn) { + int64_t cnt = 0; + for (int64_t k = lb; k < ub; k++) if (nn[k]) { + if (cnt == 0) A->extreme_f = ss[k]; + cnt++; + } + A->cnt = cnt; + } else if (lb < ub) { + A->extreme_f = ss[lb]; A->cnt = ub - lb; + } + break; + } + case OP_LAST: { + if (nn) { + int64_t cnt = 0, last_k = -1; + for (int64_t k = lb; k < ub; k++) if (nn[k]) { last_k = k; cnt++; } + if (last_k >= 0) A->extreme_f = ss[last_k]; + A->cnt = cnt; + } else if (lb < ub) { + A->extreme_f = ss[ub - 1]; A->cnt = ub - lb; + } + break; + } + default: break; + } + } else { + const int64_t* ss = c->sorted_i[a]; + switch (op) { + case OP_SUM: case OP_AVG: { + int64_t sum = 0; int64_t cnt = 0; + if (nn) { for (int64_t k = lb; k < ub; k++) if (nn[k]) { sum += ss[k]; cnt++; } } + else { for (int64_t k = lb; k < ub; k++) sum += ss[k]; cnt = ub - lb; } + A->sum_i = sum; A->cnt = cnt; break; + } + case OP_VAR: case OP_VAR_POP: + case OP_STDDEV: case OP_STDDEV_POP: { + int64_t sum = 0, sum2 = 0; int64_t cnt = 0; + if (nn) { + for (int64_t k = lb; k < ub; k++) + if (nn[k]) { int64_t v = ss[k]; sum += v; sum2 += v * v; cnt++; } + } else { + for (int64_t k = lb; k < ub; k++) { int64_t v = ss[k]; sum += v; sum2 += v * v; } + cnt = ub - lb; + } + A->sum_i = sum; A->sum_sq_i = sum2; A->cnt = cnt; break; + } + case OP_PROD: { + int64_t p = 1; int64_t cnt = 0; + if (nn) { for (int64_t k = lb; k < ub; k++) if (nn[k]) { p *= ss[k]; cnt++; } } + else { for (int64_t k = lb; k < ub; k++) p *= ss[k]; cnt = ub - lb; } + A->prod_i = p; A->cnt = cnt; break; + } + case OP_MIN: { + int64_t k = lb; + if (nn) { + int64_t best = 0, cnt = 0; + for (; k < ub; k++) if (nn[k]) { best = ss[k]; cnt = 1; k++; break; } + for (; k < ub; k++) if (nn[k]) { int64_t v = ss[k]; if (v < best) best = v; cnt++; } + A->extreme_i = best; A->cnt = cnt; + } else if (k < ub) { + int64_t best = ss[k++]; + for (; k < ub; k++) { int64_t v = ss[k]; if (v < best) best = v; } + A->extreme_i = best; A->cnt = ub - lb; + } + break; + } + case OP_MAX: { + int64_t k = lb; + if (nn) { + int64_t best = 0, cnt = 0; + for (; k < ub; k++) if (nn[k]) { best = ss[k]; cnt = 1; k++; break; } + for (; k < ub; k++) if (nn[k]) { int64_t v = ss[k]; if (v > best) best = v; cnt++; } + A->extreme_i = best; A->cnt = cnt; + } else if (k < ub) { + int64_t best = ss[k++]; + for (; k < ub; k++) { int64_t v = ss[k]; if (v > best) best = v; } + A->extreme_i = best; A->cnt = ub - lb; + } + break; + } + case OP_FIRST: { + if (nn) { + int64_t cnt = 0; + for (int64_t k = lb; k < ub; k++) if (nn[k]) { + if (cnt == 0) A->extreme_i = ss[k]; + cnt++; + } + A->cnt = cnt; + } else if (lb < ub) { + A->extreme_i = ss[lb]; A->cnt = ub - lb; + } + break; + } + case OP_LAST: { + if (nn) { + int64_t cnt = 0, last_k = -1; + for (int64_t k = lb; k < ub; k++) if (nn[k]) { last_k = k; cnt++; } + if (last_k >= 0) A->extreme_i = ss[last_k]; + A->cnt = cnt; + } else if (lb < ub) { + A->extreme_i = ss[ub - 1]; A->cnt = ub - lb; + } + break; + } + default: break; + } + } + } + + /* Finalize → indexed write at slot lr */ + for (int64_t a = 0; a < n_agg; a++) { + wj_acc_t* A = &acc[a]; + int8_t rty = c->agg_result_types[a]; + bool null_out = false; + int64_t out_i = 0; + double out_f = 0.0; + + if (c->agg_raw[a]) { + null_out = true; + } else { + switch (c->agg_ops[a]) { + case OP_COUNT: out_i = A->cnt; break; + case OP_SUM: + if (c->agg_is_float[a]) out_f = A->sum_f; else out_i = A->sum_i; + break; + case OP_PROD: + if (A->cnt == 0) null_out = true; + else if (c->agg_is_float[a]) out_f = A->prod_f; else out_i = A->prod_i; + break; + case OP_MIN: case OP_MAX: case OP_FIRST: case OP_LAST: + if (A->cnt == 0) null_out = true; + else if (c->agg_is_float[a]) out_f = A->extreme_f; else out_i = A->extreme_i; + break; + case OP_AVG: + if (A->cnt == 0) null_out = true; + else out_f = c->agg_is_float[a] + ? A->sum_f / (double)A->cnt + : (double)A->sum_i / (double)A->cnt; + break; + case OP_VAR: case OP_VAR_POP: + case OP_STDDEV: case OP_STDDEV_POP: { + bool sample = (c->agg_ops[a] == OP_VAR || c->agg_ops[a] == OP_STDDEV); + bool insuf = sample ? (A->cnt <= 1) : (A->cnt <= 0); + if (insuf) { null_out = true; break; } + double mean, var_pop; + if (c->agg_is_float[a]) { + mean = A->sum_f / (double)A->cnt; + var_pop = A->sum_sq_f / (double)A->cnt - mean * mean; + } else { + mean = (double)A->sum_i / (double)A->cnt; + var_pop = (double)A->sum_sq_i / (double)A->cnt - mean * mean; + } + if (var_pop < 0) var_pop = 0; + if (c->agg_ops[a] == OP_VAR_POP) out_f = var_pop; + else if (c->agg_ops[a] == OP_VAR) out_f = var_pop * A->cnt / (A->cnt - 1); + else if (c->agg_ops[a] == OP_STDDEV_POP) out_f = sqrt(var_pop); + else out_f = sqrt(var_pop * A->cnt / (A->cnt - 1)); + break; + } + default: null_out = true; break; + } + } + + c->result_null[a][lr] = null_out ? 1 : 0; + if (null_out) continue; + + void* rd = c->result_data[a]; + if (rty == RAY_F64) ((double*)rd)[lr] = out_f; + else if (rty == RAY_F32) ((float*)rd)[lr] = (float)out_f; + else if (rty == RAY_I64 || rty == RAY_TIMESTAMP) + ((int64_t*)rd)[lr] = out_i; + else if (rty == RAY_I32 || rty == RAY_DATE || rty == RAY_TIME) + ((int32_t*)rd)[lr] = (int32_t)out_i; + else if (rty == RAY_I16) ((int16_t*)rd)[lr] = (int16_t)out_i; + else ((uint8_t*)rd)[lr] = (uint8_t)out_i; + } + } +} + +/* (window-join t1 t2 [eq-keys] time-col) + * ASOF join: for each left row, find closest right row with time <= left.time + * within the same equality partition. */ +ray_t* ray_window_join_fn(ray_t** args, int64_t n) { + if (n < 4) return ray_error("domain", NULL); + + /* Special form: evaluate first 4 args, keep agg dict (args[4]) unevaluated */ + ray_t* eargs[5]; + for (int i = 0; i < 4 && i < (int)n; i++) { + eargs[i] = ray_eval(args[i]); + if (!eargs[i] || RAY_IS_ERR(eargs[i])) { + for (int j = 0; j < i; j++) ray_release(eargs[j]); + return eargs[i] ? eargs[i] : ray_error("type", NULL); + } + } + eargs[4] = (n >= 5) ? args[4] : NULL; /* agg dict stays unevaluated */ + + /* Rayforce calling convention: + * (window-join [eq+time keys] intervals left right {agg}) */ + if (n >= 5 && ray_is_vec(eargs[0]) && eargs[0]->type == RAY_SYM && + eargs[2]->type == RAY_TABLE && eargs[3]->type == RAY_TABLE) { + /* Rayforce convention: implement at eval level. + * See file-scope wj_scan_fn / wj_scan_ctx_t for the parallel worker. */ + ray_t* keys_vec = eargs[0]; /* [Sym Time] — equality + time keys */ + ray_t* intervals = eargs[1]; /* list of [lo hi] time windows */ + ray_t* left_tbl = eargs[2]; /* trades */ + ray_t* right_tbl = eargs[3]; /* quotes */ + ray_t* agg_dict = eargs[4]; /* unevaluated dict */ + + int64_t nkeys = ray_len(keys_vec); + if (nkeys < 2) return ray_error("domain", NULL); + int64_t* key_ids = (int64_t*)ray_data(keys_vec); + + /* Last key is the time key, rest are equality keys */ + int64_t time_key = key_ids[nkeys - 1]; + int64_t n_eq = nkeys - 1; + + int64_t left_nrows = ray_table_nrows(left_tbl); + int64_t right_nrows = ray_table_nrows(right_tbl); + + /* Get left time column */ + ray_t* left_time = ray_table_get_col(left_tbl, time_key); + ray_t* right_time = ray_table_get_col(right_tbl, time_key); + if (!left_time || !right_time) return ray_error("domain", NULL); + + /* Get equality columns */ + ray_t* left_eq[16], *right_eq[16]; + for (int64_t e = 0; e < n_eq && e < 16; e++) { + left_eq[e] = ray_table_get_col(left_tbl, key_ids[e]); + right_eq[e] = ray_table_get_col(right_tbl, key_ids[e]); + if (!left_eq[e] || !right_eq[e]) return ray_error("domain", NULL); + } + + /* Parse every (name, (op src)) pair from the agg dict. The dict's + * physical layout is [keys (SYM vec), vals (LIST)] — read keys[i] + * via ray_read_sym and pair it with vals[i] from the LIST. + * WJ_MAX_AGG is defined at file scope (for wj_scan_ctx_t). */ + int64_t agg_names[WJ_MAX_AGG]; + uint16_t agg_ops[WJ_MAX_AGG]; + int64_t agg_src_ids[WJ_MAX_AGG]; + ray_t* agg_src_vecs[WJ_MAX_AGG] = {0}; + int8_t agg_types[WJ_MAX_AGG]; + int agg_is_float[WJ_MAX_AGG]; + ray_t* agg_result_vecs[WJ_MAX_AGG] = {0}; + int agg_raw[WJ_MAX_AGG] = {0}; /* {name: Col} bare-column form — legacy placeholder */ + int64_t n_agg = 0; + + if (agg_dict && agg_dict->type == RAY_DICT) { + ray_t* dkeys = ray_dict_keys(agg_dict); + ray_t* dvals = ray_dict_vals(agg_dict); + int64_t adn = (dkeys && dkeys->type == RAY_SYM) ? dkeys->len : 0; + ray_t** lvals = (dvals && dvals->type == RAY_LIST) ? (ray_t**)ray_data(dvals) : NULL; + for (int64_t di = 0; di < adn && n_agg < WJ_MAX_AGG; di++) { + int64_t kname_id = ray_read_sym(ray_data(dkeys), di, RAY_SYM, dkeys->attrs); + ray_t* expr = lvals ? lvals[di] : NULL; + if (!expr) continue; + /* (op col) aggregation form */ + if (expr->type == RAY_LIST && expr->len >= 2) { + ray_t** ae = (ray_t**)ray_data(expr); + if (!(ae[0]->type == -RAY_SYM && (ae[0]->attrs & RAY_ATTR_NAME))) continue; + if (!(ae[1]->type == -RAY_SYM && (ae[1]->attrs & RAY_ATTR_NAME))) continue; + agg_names[n_agg] = kname_id; + agg_ops[n_agg] = resolve_agg_opcode(ae[0]->i64); + agg_src_ids[n_agg] = ae[1]->i64; + agg_raw[n_agg] = 0; + n_agg++; + continue; + } + /* Bare column reference — legacy map-group form, emitted as null column */ + if (expr->type == -RAY_SYM && (expr->attrs & RAY_ATTR_NAME)) { + agg_names[n_agg] = kname_id; + agg_ops[n_agg] = OP_MIN; + agg_src_ids[n_agg] = expr->i64; + agg_raw[n_agg] = 1; + n_agg++; + continue; + } + } + } + + /* Resolve sources, pick result types, allocate result vectors. + * Raw bare-column form ({name: Col}) is a legacy placeholder — it + * accepts any column type (numeric or not) and always produces a + * nullable i64 column filled with nulls. All true aggregation ops + * require a numeric source column. */ + int8_t agg_result_types[WJ_MAX_AGG]; + for (int64_t a = 0; a < n_agg; a++) { + if (agg_raw[a]) { + agg_src_vecs[a] = NULL; + agg_types[a] = RAY_I64; + agg_is_float[a] = 0; + agg_result_types[a] = RAY_I64; + agg_result_vecs[a] = ray_vec_new(RAY_I64, left_nrows); + if (RAY_IS_ERR(agg_result_vecs[a])) { + ray_t* err = agg_result_vecs[a]; + for (int64_t b = 0; b < a; b++) ray_release(agg_result_vecs[b]); + for (int i = 0; i < 4; i++) ray_release(eargs[i]); + return err; + } + continue; + } + if (agg_ops[a] == 0) { + for (int64_t b = 0; b < a; b++) ray_release(agg_result_vecs[b]); + for (int i = 0; i < 4; i++) ray_release(eargs[i]); + return ray_error("domain", NULL); + } + ray_t* src = ray_table_get_col(right_tbl, agg_src_ids[a]); + if (!src) { + for (int64_t b = 0; b < a; b++) ray_release(agg_result_vecs[b]); + for (int i = 0; i < 4; i++) ray_release(eargs[i]); + return ray_error("domain", NULL); + } + int8_t t = src->type; + /* COUNT never reads source values — accept any column type. Every + * other aggregation reads v_i/v_f and requires a numeric source. */ + if (agg_ops[a] != OP_COUNT) { + switch (t) { + case RAY_I64: case RAY_I32: case RAY_I16: case RAY_U8: + case RAY_F64: case RAY_F32: case RAY_BOOL: + case RAY_DATE: case RAY_TIME: case RAY_TIMESTAMP: + break; + default: + for (int64_t b = 0; b < a; b++) ray_release(agg_result_vecs[b]); + for (int i = 0; i < 4; i++) ray_release(eargs[i]); + return ray_error("type", NULL); + } + } + agg_src_vecs[a] = src; + agg_types[a] = t; + agg_is_float[a] = (t == RAY_F64 || t == RAY_F32); + + int8_t rt; + switch (agg_ops[a]) { + case OP_COUNT: rt = RAY_I64; break; + case OP_AVG: + case OP_VAR: case OP_VAR_POP: + case OP_STDDEV: case OP_STDDEV_POP: rt = RAY_F64; break; + case OP_SUM: case OP_PROD: + rt = agg_is_float[a] ? RAY_F64 : RAY_I64; break; + default: /* MIN/MAX/FIRST/LAST */ rt = t; break; + } + agg_result_types[a] = rt; + agg_result_vecs[a] = ray_vec_new(rt, left_nrows); + if (RAY_IS_ERR(agg_result_vecs[a])) { + ray_t* err = agg_result_vecs[a]; + for (int64_t b = 0; b < a; b++) ray_release(agg_result_vecs[b]); + for (int i = 0; i < 4; i++) ray_release(eargs[i]); + return err; + } + } + + /* wj_acc_t is defined at file scope now (used by wj_scan_fn). */ + + /* Sort right table by (eq_keys..., time) once so each left row only + * scans the quote rows whose (eq,time) fall inside its window. + * Per-row cost drops from O(right_nrows) to O(log right_nrows + window). */ + ray_t* rs_hdr = NULL, *rt_hdr = NULL, *tmp_hdr = NULL; + int64_t* right_sort = NULL; + int64_t* rt_time_i = NULL; + int64_t* tmp_sort = NULL; + const void* eq_data[16]; + int8_t eq_type[16]; + uint8_t eq_attrs[16]; + for (int64_t e = 0; e < n_eq; e++) { + eq_data[e] = ray_data(right_eq[e]); + eq_type[e] = right_eq[e]->type; + eq_attrs[e] = right_eq[e]->attrs; + } + if (right_nrows > 0) { + right_sort = (int64_t*)scratch_alloc(&rs_hdr, (size_t)right_nrows * sizeof(int64_t)); + rt_time_i = (int64_t*)scratch_alloc(&rt_hdr, (size_t)right_nrows * sizeof(int64_t)); + tmp_sort = (int64_t*)scratch_alloc(&tmp_hdr, (size_t)right_nrows * sizeof(int64_t)); + if (!right_sort || !rt_time_i || !tmp_sort) { + if (rs_hdr) scratch_free(rs_hdr); + if (rt_hdr) scratch_free(rt_hdr); + if (tmp_hdr) scratch_free(tmp_hdr); + for (int64_t a = 0; a < n_agg; a++) ray_release(agg_result_vecs[a]); + for (int i = 0; i < 4; i++) ray_release(eargs[i]); + return ray_error("oom", NULL); + } + /* Cache time column access so the sort compare avoids reloading them */ + int8_t rt_type = right_time->type; + uint8_t rt_attrs = right_time->attrs; + const void* rt_data = ray_data(right_time); + for (int64_t rr = 0; rr < right_nrows; rr++) { + right_sort[rr] = rr; + rt_time_i[rr] = read_col_i64(rt_data, rr, rt_type, rt_attrs); + } + /* Bottom-up merge sort on index array */ + for (int64_t width = 1; width < right_nrows; width *= 2) { + for (int64_t lo = 0; lo < right_nrows; lo += 2 * width) { + int64_t mid = lo + width; + int64_t hi = lo + 2 * width; + if (mid > right_nrows) mid = right_nrows; + if (hi > right_nrows) hi = right_nrows; + int64_t a = lo, b = mid, t = lo; + while (a < mid && b < hi) { + int64_t ai = right_sort[a], bi = right_sort[b]; + int cmp = 0; + for (int64_t e = 0; e < n_eq && cmp == 0; e++) { + int64_t va = read_col_i64(eq_data[e], ai, eq_type[e], eq_attrs[e]); + int64_t vb = read_col_i64(eq_data[e], bi, eq_type[e], eq_attrs[e]); + if (va < vb) cmp = -1; + else if (va > vb) cmp = 1; + } + if (cmp == 0) { + if (rt_time_i[ai] < rt_time_i[bi]) cmp = -1; + else if (rt_time_i[ai] > rt_time_i[bi]) cmp = 1; + } + tmp_sort[t++] = (cmp <= 0) ? right_sort[a++] : right_sort[b++]; + } + while (a < mid) tmp_sort[t++] = right_sort[a++]; + while (b < hi) tmp_sort[t++] = right_sort[b++]; + for (int64_t c = lo; c < hi; c++) right_sort[c] = tmp_sort[c]; + } + } + scratch_free(tmp_hdr); + tmp_hdr = NULL; + tmp_sort = NULL; + } + + /* Preload one sorted source column per aggregation. + * After sorting right_sort, the hot loop wants *sequential* access + * (SIMD + prefetch friendly) — not an indirect gather through + * right_sort[k]. We materialize sorted_src_i[a][k] = value at + * right_sort[k] once, then every left row's window scans are a + * plain array walk. + * + * COUNT / raw form carry no source; nothing to preload. PROD and + * ops on null-containing columns still go through the slow scan + * (see below), so the preload is gated on the easy numeric cases. */ + int64_t* sorted_i[WJ_MAX_AGG] = {0}; + double* sorted_f[WJ_MAX_AGG] = {0}; + uint8_t* sorted_nn[WJ_MAX_AGG] = {0}; /* 0 = null, 1 = value present */ + ray_t* sorted_i_hdr[WJ_MAX_AGG] = {0}; + ray_t* sorted_f_hdr[WJ_MAX_AGG] = {0}; + ray_t* sorted_nn_hdr[WJ_MAX_AGG] = {0}; + for (int64_t a = 0; a < n_agg; a++) { + if (agg_raw[a] || agg_ops[a] == OP_COUNT) continue; + ray_t* src = agg_src_vecs[a]; + if (!src || right_nrows == 0) continue; + bool has_nulls = (src->attrs & RAY_ATTR_HAS_NULLS) != 0; + if (has_nulls) { + sorted_nn[a] = (uint8_t*)scratch_alloc(&sorted_nn_hdr[a], + (size_t)right_nrows); + if (!sorted_nn[a]) { goto wj_preload_oom; } + } + if (agg_is_float[a]) { + sorted_f[a] = (double*)scratch_alloc(&sorted_f_hdr[a], + (size_t)right_nrows * sizeof(double)); + if (!sorted_f[a]) { goto wj_preload_oom; } + int8_t t = agg_types[a]; + const void* sd = ray_data(src); + for (int64_t k = 0; k < right_nrows; k++) { + int64_t rr = right_sort[k]; + double v = (t == RAY_F32) + ? (double)((const float*)sd)[rr] + : ((const double*)sd)[rr]; + sorted_f[a][k] = v; + if (has_nulls) sorted_nn[a][k] = ray_vec_is_null(src, rr) ? 0 : 1; + } + } else { + sorted_i[a] = (int64_t*)scratch_alloc(&sorted_i_hdr[a], + (size_t)right_nrows * sizeof(int64_t)); + if (!sorted_i[a]) { goto wj_preload_oom; } + const void* sd = ray_data(src); + int8_t t = agg_types[a]; + uint8_t at = src->attrs; + for (int64_t k = 0; k < right_nrows; k++) { + int64_t rr = right_sort[k]; + sorted_i[a][k] = read_col_i64(sd, rr, t, at); + if (has_nulls) sorted_nn[a][k] = ray_vec_is_null(src, rr) ? 0 : 1; + } + } + } + + #define WJ_CLEANUP_TEMP() do { \ + if (rs_hdr) scratch_free(rs_hdr); \ + if (rt_hdr) scratch_free(rt_hdr); \ + if (tmp_hdr) scratch_free(tmp_hdr); \ + for (int64_t _a = 0; _a < n_agg; _a++) { \ + if (sorted_i_hdr[_a]) scratch_free(sorted_i_hdr[_a]); \ + if (sorted_f_hdr[_a]) scratch_free(sorted_f_hdr[_a]); \ + if (sorted_nn_hdr[_a]) scratch_free(sorted_nn_hdr[_a]); \ + } \ + } while (0) + + if (0) { + wj_preload_oom: + WJ_CLEANUP_TEMP(); + for (int64_t a = 0; a < n_agg; a++) ray_release(agg_result_vecs[a]); + for (int i = 0; i < 4; i++) ray_release(eargs[i]); + return ray_error("oom", NULL); + } + + /* Pre-extract left-row metadata (interval endpoints + eq-key tuples) + * into flat int64 arrays. This hoists all ray_t allocation and the + * width-aware reads out of the hot loop so the parallel worker can + * process rows without touching any ref-counted objects. */ + ray_t* lo_hdr = NULL, *hi_hdr = NULL; + int64_t* lo_arr = (int64_t*)scratch_alloc(&lo_hdr, (size_t)left_nrows * sizeof(int64_t)); + int64_t* hi_arr = (int64_t*)scratch_alloc(&hi_hdr, (size_t)left_nrows * sizeof(int64_t)); + if ((!lo_arr || !hi_arr) && left_nrows > 0) { + if (lo_hdr) scratch_free(lo_hdr); + if (hi_hdr) scratch_free(hi_hdr); + WJ_CLEANUP_TEMP(); + for (int64_t a = 0; a < n_agg; a++) ray_release(agg_result_vecs[a]); + for (int i = 0; i < 4; i++) ray_release(eargs[i]); + return ray_error("oom", NULL); + } + for (int64_t lr = 0; lr < left_nrows; lr++) { + int alloc_iv = 0; + ray_t* iv = collection_elem(intervals, lr, &alloc_iv); + if (!iv || RAY_IS_ERR(iv) || ray_len(iv) < 2) { + if (alloc_iv && iv) ray_release(iv); + if (lo_hdr) scratch_free(lo_hdr); + if (hi_hdr) scratch_free(hi_hdr); + WJ_CLEANUP_TEMP(); + for (int64_t a = 0; a < n_agg; a++) ray_release(agg_result_vecs[a]); + for (int i = 0; i < 4; i++) ray_release(eargs[i]); + return ray_error("domain", NULL); + } + int alloc_lo = 0, alloc_hi = 0; + ray_t* lo_atom = collection_elem(iv, 0, &alloc_lo); + ray_t* hi_atom = collection_elem(iv, 1, &alloc_hi); + lo_arr[lr] = as_i64(lo_atom); + hi_arr[lr] = as_i64(hi_atom); + if (alloc_lo) ray_release(lo_atom); + if (alloc_hi) ray_release(hi_atom); + if (alloc_iv) ray_release(iv); + } + + ray_t* left_eq_hdr[WJ_MAX_AGG] = {0}; + int64_t* left_eq_arr[WJ_MAX_AGG] = {0}; + for (int64_t e = 0; e < n_eq; e++) { + left_eq_arr[e] = (int64_t*)scratch_alloc(&left_eq_hdr[e], + (size_t)left_nrows * sizeof(int64_t)); + if (!left_eq_arr[e] && left_nrows > 0) { + if (lo_hdr) scratch_free(lo_hdr); + if (hi_hdr) scratch_free(hi_hdr); + for (int64_t f = 0; f < e; f++) + if (left_eq_hdr[f]) scratch_free(left_eq_hdr[f]); + WJ_CLEANUP_TEMP(); + for (int64_t a = 0; a < n_agg; a++) ray_release(agg_result_vecs[a]); + for (int i = 0; i < 4; i++) ray_release(eargs[i]); + return ray_error("oom", NULL); + } + const void* sd = ray_data(left_eq[e]); + int8_t t = left_eq[e]->type; + uint8_t at = left_eq[e]->attrs; + for (int64_t lr = 0; lr < left_nrows; lr++) + left_eq_arr[e][lr] = read_col_i64(sd, lr, t, at); + } + + /* Pre-size each result vector and allocate a 1-byte-per-row null + * staging array — writers index by lr without touching the nullmap. */ + ray_t* null_stage_hdr[WJ_MAX_AGG] = {0}; + uint8_t* null_stage[WJ_MAX_AGG] = {0}; + for (int64_t a = 0; a < n_agg; a++) { + agg_result_vecs[a]->len = left_nrows; + null_stage[a] = (uint8_t*)scratch_alloc(&null_stage_hdr[a], (size_t)left_nrows); + if (!null_stage[a] && left_nrows > 0) { + if (lo_hdr) scratch_free(lo_hdr); + if (hi_hdr) scratch_free(hi_hdr); + for (int64_t f = 0; f < n_eq; f++) if (left_eq_hdr[f]) scratch_free(left_eq_hdr[f]); + for (int64_t b = 0; b < a; b++) if (null_stage_hdr[b]) scratch_free(null_stage_hdr[b]); + WJ_CLEANUP_TEMP(); + for (int64_t b = 0; b < n_agg; b++) ray_release(agg_result_vecs[b]); + for (int i = 0; i < 4; i++) ray_release(eargs[i]); + return ray_error("oom", NULL); + } + memset(null_stage[a], 0, (size_t)left_nrows); + } + + /* Build the scan context and dispatch. */ + wj_scan_ctx_t wctx; + memset(&wctx, 0, sizeof(wctx)); + wctx.left_nrows = left_nrows; + wctx.right_nrows = right_nrows; + wctx.n_eq = n_eq; + wctx.n_agg = n_agg; + wctx.lo_arr = lo_arr; + wctx.hi_arr = hi_arr; + wctx.right_sort = right_sort; + wctx.rt_time_i = rt_time_i; + for (int64_t e = 0; e < n_eq; e++) { + wctx.left_eq_arr[e] = left_eq_arr[e]; + wctx.eq_data[e] = eq_data[e]; + wctx.eq_type[e] = eq_type[e]; + wctx.eq_attrs[e] = eq_attrs[e]; + } + for (int64_t a = 0; a < n_agg; a++) { + wctx.agg_raw[a] = (uint8_t)agg_raw[a]; + wctx.agg_ops[a] = agg_ops[a]; + wctx.agg_result_types[a] = agg_result_types[a]; + wctx.agg_is_float[a] = agg_is_float[a]; + wctx.sorted_i[a] = sorted_i[a]; + wctx.sorted_f[a] = sorted_f[a]; + wctx.sorted_nn[a] = sorted_nn[a]; + wctx.result_data[a] = ray_data(agg_result_vecs[a]); + wctx.result_null[a] = null_stage[a]; + } + + ray_pool_t* pool = ray_pool_get(); + if (pool && left_nrows >= 2048) { + ray_pool_dispatch(pool, wj_scan_fn, &wctx, left_nrows); + } else { + wj_scan_fn(&wctx, 0, 0, left_nrows); + } + + /* Apply staged null flags to each result vec's null bitmap sequentially. */ + for (int64_t a = 0; a < n_agg; a++) { + ray_t* rv = agg_result_vecs[a]; + const uint8_t* stage = null_stage[a]; + for (int64_t lr = 0; lr < left_nrows; lr++) + if (stage[lr]) ray_vec_set_null(rv, lr, true); + } + + /* Free pre-extract scratch */ + if (lo_hdr) scratch_free(lo_hdr); + if (hi_hdr) scratch_free(hi_hdr); + for (int64_t e = 0; e < n_eq; e++) + if (left_eq_hdr[e]) scratch_free(left_eq_hdr[e]); + for (int64_t a = 0; a < n_agg; a++) + if (null_stage_hdr[a]) scratch_free(null_stage_hdr[a]); + + + WJ_CLEANUP_TEMP(); + #undef WJ_CLEANUP_TEMP + + /* Build result table: left columns + every aggregation column */ + int64_t ncols = ray_table_ncols(left_tbl); + ray_t* result = ray_table_new(ncols + n_agg); + for (int64_t c = 0; c < ncols; c++) { + int64_t cn = ray_table_col_name(left_tbl, c); + ray_t* cv = ray_table_get_col_idx(left_tbl, c); + ray_retain(cv); + result = ray_table_add_col(result, cn, cv); + ray_release(cv); + } + for (int64_t a = 0; a < n_agg; a++) { + result = ray_table_add_col(result, agg_names[a], agg_result_vecs[a]); + ray_release(agg_result_vecs[a]); + } + for (int i = 0; i < 4; i++) ray_release(eargs[i]); + return result; + #undef WJ_MAX_AGG + } + + ray_t* left_tbl = eargs[0]; + ray_t* right_tbl = eargs[1]; + ray_t* eq_keys = eargs[2]; + ray_t* time_sym = eargs[3]; + + if (left_tbl->type != RAY_TABLE || right_tbl->type != RAY_TABLE) + return ray_error("type", NULL); + if (time_sym->type != -RAY_SYM) + return ray_error("type", NULL); + + uint8_t n_eq = 0; + ray_t** eq_elems = NULL; + ray_t* _bxeq = NULL; + eq_keys = unbox_vec_arg(eq_keys, &_bxeq); + if (is_list(eq_keys)) { + n_eq = (uint8_t)ray_len(eq_keys); + eq_elems = (ray_t**)ray_data(eq_keys); + } + + ray_graph_t* g = ray_graph_new(left_tbl); + if (!g) return ray_error("oom", NULL); + + ray_op_t* left_node = ray_const_table(g, left_tbl); + ray_op_t* right_node = ray_const_table(g, right_tbl); + + ray_t* tname = ray_sym_str(time_sym->i64); + if (!tname) { ray_graph_free(g); return ray_error("domain", NULL); } + ray_op_t* time_op = ray_scan(g, ray_str_ptr(tname)); + if (!time_op) { ray_graph_free(g); return ray_error("domain", NULL); } + + ray_op_t* eq_ops[16]; + for (uint8_t i = 0; i < n_eq; i++) { + if (eq_elems[i]->type != -RAY_SYM) { + ray_graph_free(g); + return ray_error("type", NULL); + } + ray_t* nm = ray_sym_str(eq_elems[i]->i64); + if (!nm) { ray_graph_free(g); return ray_error("domain", NULL); } + eq_ops[i] = ray_scan(g, ray_str_ptr(nm)); + if (!eq_ops[i]) { ray_graph_free(g); return ray_error("domain", NULL); } + } + + if (_bxeq) ray_release(_bxeq); + + ray_op_t* jn = ray_asof_join(g, left_node, right_node, + time_op, eq_ops, n_eq, 1); + if (!jn) { ray_graph_free(g); return ray_error("oom", NULL); } + + jn = ray_optimize(g, jn); + ray_t* result = ray_execute(g, jn); + ray_graph_free(g); + return result; +} + +/* (asof-join [key1 key2 ... timeKey] leftTable rightTable) + * Last key is the time/asof column, rest are equality keys. */ +ray_t* ray_asof_join_fn(ray_t** args, int64_t n) { + if (n < 3) return ray_error("arity", NULL); + ray_t* keys_vec = args[0]; + ray_t* left_tbl = args[1]; + ray_t* right_tbl = args[2]; + + if (left_tbl->type != RAY_TABLE || right_tbl->type != RAY_TABLE) + return ray_error("type", NULL); + + /* Keys vector must be a SYM vector with at least 2 elements (eq + time) */ + ray_t* _bxk = NULL; + keys_vec = unbox_vec_arg(keys_vec, &_bxk); + if (!is_list(keys_vec) || ray_len(keys_vec) < 2) { + if (_bxk) ray_release(_bxk); + return ray_error("domain", NULL); + } + ray_t** kelems = (ray_t**)ray_data(keys_vec); + int64_t nkeys = ray_len(keys_vec); + + /* Last key is the time column */ + ray_t* time_sym = kelems[nkeys - 1]; + if (time_sym->type != -RAY_SYM) { + if (_bxk) ray_release(_bxk); + return ray_error("type", NULL); + } + + /* Remaining keys are equality keys */ + uint8_t n_eq = (uint8_t)(nkeys - 1); + ray_t** eq_syms = kelems; /* first n_eq elements */ + + ray_graph_t* g = ray_graph_new(left_tbl); + if (!g) { if (_bxk) ray_release(_bxk); return ray_error("oom", NULL); } + + ray_op_t* left_node = ray_const_table(g, left_tbl); + ray_op_t* right_node = ray_const_table(g, right_tbl); + + ray_t* tname = ray_sym_str(time_sym->i64); + if (!tname) { ray_graph_free(g); if (_bxk) ray_release(_bxk); return ray_error("domain", NULL); } + ray_op_t* time_op = ray_scan(g, ray_str_ptr(tname)); + if (!time_op) { ray_graph_free(g); if (_bxk) ray_release(_bxk); return ray_error("domain", NULL); } + + ray_op_t* eq_ops[16]; + for (uint8_t i = 0; i < n_eq; i++) { + if (eq_syms[i]->type != -RAY_SYM) { + ray_graph_free(g); if (_bxk) ray_release(_bxk); + return ray_error("type", NULL); + } + ray_t* nm = ray_sym_str(eq_syms[i]->i64); + if (!nm) { ray_graph_free(g); if (_bxk) ray_release(_bxk); return ray_error("domain", NULL); } + eq_ops[i] = ray_scan(g, ray_str_ptr(nm)); + if (!eq_ops[i]) { ray_graph_free(g); if (_bxk) ray_release(_bxk); return ray_error("domain", NULL); } + } + + if (_bxk) ray_release(_bxk); + + ray_op_t* jn = ray_asof_join(g, left_node, right_node, + time_op, eq_ops, n_eq, 1); + if (!jn) { ray_graph_free(g); return ray_error("oom", NULL); } + + jn = ray_optimize(g, jn); + ray_t* result = ray_execute(g, jn); + ray_graph_free(g); + return result; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/rerank.c b/crates/rayforce-sys/vendor/rayforce/src/ops/rerank.c new file mode 100644 index 0000000..a35b94b --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/rerank.c @@ -0,0 +1,546 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * Rerank executors: combine a filtered source table with a top-K + * nearest-neighbour step (index-backed ANN or brute-force KNN) in + * one DAG op. Used by `select ... where

nearest (ann|knn ...) take k`. + */ + +#include "ops/internal.h" +#include "ops/rowsel.h" +#include "mem/sys.h" +#include "store/hnsw.h" +#include +#include + +/* ========================================================================== + * Helpers + * ========================================================================== */ + +/* Element access into a numeric ray_t vector (F32 / F64 / I32 / I64) → double. */ +static double rr_at_f64(ray_t* v, int64_t i) { + void* d = ray_data(v); + switch (v->type) { + case RAY_F32: return (double)((float*)d)[i]; + case RAY_F64: return ((double*)d)[i]; + case RAY_I32: return (double)((int32_t*)d)[i]; + case RAY_I64: return (double)((int64_t*)d)[i]; + default: return 0.0; + } +} + +static bool rr_is_numeric(ray_t* v) { + if (!v || !ray_is_vec(v)) return false; + return v->type == RAY_F32 || v->type == RAY_F64 + || v->type == RAY_I32 || v->type == RAY_I64; +} + +/* Distance metrics — mirror row_score in src/ops/embedding.c. */ +typedef enum { RR_COS_DIST, RR_IP_NEG, RR_L2_DIST } rr_metric_t; + +static rr_metric_t rr_metric_from_hnsw(int32_t m) { + switch ((ray_hnsw_metric_t)m) { + case RAY_HNSW_L2: return RR_L2_DIST; + case RAY_HNSW_IP: return RR_IP_NEG; + case RAY_HNSW_COSINE: + default: return RR_COS_DIST; + } +} + +static double rr_row_dist(rr_metric_t m, ray_t* row, + const double* q, double q_norm, int32_t dim) { + double acc = 0.0, r_norm_sq = 0.0; + if (m == RR_L2_DIST) { + for (int32_t j = 0; j < dim; j++) { + double d = rr_at_f64(row, j) - q[j]; + acc += d * d; + } + return sqrt(acc); + } + for (int32_t j = 0; j < dim; j++) { + double a = rr_at_f64(row, j); + acc += a * q[j]; + if (m == RR_COS_DIST) r_norm_sq += a * a; + } + if (m == RR_IP_NEG) return -acc; + double denom = q_norm * sqrt(r_norm_sq); + double sim = (denom > 0.0) ? acc / denom : 0.0; + return 1.0 - sim; +} + +/* Build an empty-rows clone of the source schema plus a trailing _dist + * column (F64, len=0). Used for both the "source is empty" and "filter + * rejected everything" cases so callers always get a stable 4-column + * table shape. */ +static ray_t* empty_result_with_dist(ray_t* src) { + int64_t ncols = ray_table_ncols(src); + ray_t* out = ray_table_new(ncols + 1); + if (!out || RAY_IS_ERR(out)) return NULL; + for (int64_t c = 0; c < ncols; c++) { + ray_t* sc = ray_table_get_col_idx(src, c); + if (!sc) continue; + int8_t ct = RAY_IS_PARTED(sc->type) + ? (int8_t)RAY_PARTED_BASETYPE(sc->type) : sc->type; + ray_t* nc = (ct == RAY_LIST) ? ray_list_new(0) : ray_vec_new(ct, 0); + if (!nc || RAY_IS_ERR(nc)) { ray_release(out); return NULL; } + nc->len = 0; + out = ray_table_add_col(out, ray_table_col_name(src, c), nc); + ray_release(nc); + if (RAY_IS_ERR(out)) return NULL; + } + ray_t* dv = ray_vec_new(RAY_F64, 0); + if (!dv || RAY_IS_ERR(dv)) { ray_release(out); return NULL; } + dv->len = 0; + out = ray_table_add_col(out, sym_intern_safe("_dist", 5), dv); + ray_release(dv); + return out; +} + +/* Gather k rows from `tbl` at dense `rowids[]`, appending a `_dist` F64 + * column with the parallel distances. Caller owns the returned table. + * Returns NULL on OOM. */ +static ray_t* gather_rows_with_dist(ray_t* tbl, + const int64_t* rowids, const double* dists, + int64_t k) { + int64_t ncols = ray_table_ncols(tbl); + ray_t* result = ray_table_new(ncols + 1); + if (!result || RAY_IS_ERR(result)) return NULL; + + for (int64_t c = 0; c < ncols; c++) { + ray_t* src_col = ray_table_get_col_idx(tbl, c); + if (!src_col) { ray_release(result); return NULL; } + + /* PARTED columns carry ray_t** segment pointers in their data + * region, not raw element bytes — the byte-wise gather below + * would read pointer values as column data. Reject with a clear + * error rather than produce garbage; PARTED support is future work. */ + if (RAY_IS_PARTED(src_col->type)) { + ray_release(result); + return ray_error("nyi", + "nearest: PARTED columns not supported in result projection"); + } + + int8_t ct = src_col->type; + + /* Allocate the destination column with the right shape. col_vec_new + * handles SYM width preservation; LIST uses its own constructor. */ + ray_t* new_col = (ct == RAY_LIST) ? ray_list_new(k) : col_vec_new(src_col, k); + if (!new_col || RAY_IS_ERR(new_col)) { ray_release(result); return NULL; } + new_col->len = k; + + if (ct == RAY_LIST) { + ray_t** d = (ray_t**)ray_data(new_col); + ray_t** s = (ray_t**)ray_data(src_col); + for (int64_t i = 0; i < k; i++) { + d[i] = s[rowids[i]]; + if (d[i]) ray_retain(d[i]); + } + } else { + /* All fixed-width types (including SYM at any width, RAY_STR's + * 16-byte inline cells, DATE/TIME/TIMESTAMP, GUID) go through + * byte-wise memcpy driven by the column's element size. + * Mirrors sel_compact's gather convention. */ + uint8_t esz = col_esz(src_col); + char* dst = (char*)ray_data(new_col); + const char* src = (const char*)ray_data(src_col); + for (int64_t i = 0; i < k; i++) + memcpy(dst + i * esz, src + rowids[i] * esz, esz); + + /* RAY_STR: share the source pool (inline bytes reference + * pooled long-string data). */ + if (ct == RAY_STR) col_propagate_str_pool(new_col, src_col); + + /* RAY_SYM: propagate the per-vector sym_dict so narrow-width + * local indices resolve against the same dictionary. For + * sliced SYM columns the sym_dict lives on the slice_parent + * (the slice's own union slot holds slice_parent/offset). + * Guards against the inline-nullmap aliasing mirror sort.c:3307. */ + if (ct == RAY_SYM) { + const ray_t* dict_owner = (src_col->attrs & RAY_ATTR_SLICE) + ? src_col->slice_parent : src_col; + if (dict_owner && + (!(dict_owner->attrs & RAY_ATTR_HAS_NULLS) || + (dict_owner->attrs & RAY_ATTR_NULLMAP_EXT)) && + dict_owner->sym_dict) { + ray_retain(dict_owner->sym_dict); + new_col->sym_dict = dict_owner->sym_dict; + } + } + + /* Null bitmap: the shared col_propagate_nulls_gather only + * inspects src's own attrs — for a sliced src it misses + * HAS_NULLS on the parent. Mirror sort.c:3315's slice-aware + * check so sliced source columns don't lose their nulls. */ + bool src_has_nulls = + (src_col->attrs & RAY_ATTR_HAS_NULLS) || + ((src_col->attrs & RAY_ATTR_SLICE) && src_col->slice_parent && + (src_col->slice_parent->attrs & RAY_ATTR_HAS_NULLS)); + if (src_has_nulls) { + for (int64_t r = 0; r < k; r++) { + if (ray_vec_is_null(src_col, rowids[r])) + ray_vec_set_null(new_col, r, true); + } + } + } + + ray_t* prev = result; + result = ray_table_add_col(result, ray_table_col_name(tbl, c), new_col); + ray_release(new_col); + if (!result || RAY_IS_ERR(result)) { + /* ray_table_add_col's error paths don't release the input + * table when they fail mid-way (cow may have returned the + * same pointer). Release our prior accumulator to avoid + * leaking the partially-built table and its retained cols. */ + if (prev && !RAY_IS_ERR(prev) && prev != result) ray_release(prev); + return NULL; + } + } + + /* Append _dist column */ + ray_t* dist_vec = ray_vec_new(RAY_F64, k); + if (!dist_vec || RAY_IS_ERR(dist_vec)) { ray_release(result); return NULL; } + dist_vec->len = k; + double* dd = (double*)ray_data(dist_vec); + for (int64_t i = 0; i < k; i++) dd[i] = dists[i]; + ray_t* prev = result; + result = ray_table_add_col(result, sym_intern_safe("_dist", 5), dist_vec); + ray_release(dist_vec); + if (!result || RAY_IS_ERR(result)) { + if (prev && !RAY_IS_ERR(prev) && prev != result) ray_release(prev); + return NULL; + } + return result; +} + +/* Extract the accepted rowid set from a possibly-lazy source table. + * + * Returns: + * - NULL pointer AND *count = nrows — no filter: identity scan, all rows accepted. + * - NULL pointer AND *count = 0 — filter rejected every row. + * - NULL pointer AND *count = -1 — ALLOCATION FAILURE: caller must propagate OOM. + * - non-NULL pointer AND *count > 0 — explicit rowid list to walk. + * + * `g->selection` is always cleared before returning when this helper has + * observed it — success or failure — so downstream ops don't double-filter. */ +static int64_t* accepted_rowids(ray_graph_t* g, int64_t nrows, int64_t* count) { + if (!g->selection) { *count = nrows; return NULL; } + + int64_t n_accepted = ray_rowsel_meta(g->selection)->total_pass; + + /* Consume the selection up front so all exit paths leave g->selection + * clean regardless of downstream allocation outcomes. */ + ray_t* sel = g->selection; + g->selection = NULL; + + if (n_accepted == 0) { + ray_release(sel); + *count = 0; + return NULL; + } + + ray_t* idx_blk = ray_rowsel_to_indices(sel); + if (!idx_blk) { + ray_release(sel); + *count = -1; /* OOM */ + return NULL; + } + + int64_t* dense = (int64_t*)ray_sys_alloc((size_t)n_accepted * sizeof(int64_t)); + if (!dense) { + ray_release(idx_blk); + ray_release(sel); + *count = -1; /* OOM */ + return NULL; + } + memcpy(dense, ray_data(idx_blk), (size_t)n_accepted * sizeof(int64_t)); + ray_release(idx_blk); + ray_release(sel); + *count = n_accepted; + return dense; +} + +/* Max-heap top-K by distance (lower=closer). Mirrors the heap in + * src/ops/embedding.c:ray_knn_fn. */ +typedef struct { double d; int64_t id; } rr_ent_t; + +static void rr_heap_insert(rr_ent_t* heap, int64_t k, int64_t* size, + double d, int64_t id) { + if (*size < k) { + int64_t j = (*size)++; + heap[j] = (rr_ent_t){ d, id }; + while (j > 0) { + int64_t p = (j - 1) / 2; + if (heap[p].d >= heap[j].d) break; + rr_ent_t t = heap[p]; heap[p] = heap[j]; heap[j] = t; + j = p; + } + } else if (d < heap[0].d) { + heap[0] = (rr_ent_t){ d, id }; + int64_t j = 0; + for (;;) { + int64_t l = 2*j+1, r = 2*j+2, best = j; + if (l < *size && heap[l].d > heap[best].d) best = l; + if (r < *size && heap[r].d > heap[best].d) best = r; + if (best == j) break; + rr_ent_t t = heap[j]; heap[j] = heap[best]; heap[best] = t; + j = best; + } + } +} + +static void rr_heap_sort(rr_ent_t* heap, int64_t size) { + /* Insertion sort ascending by distance — size is small. */ + for (int64_t i = 1; i < size; i++) { + rr_ent_t key = heap[i]; + int64_t j = i - 1; + while (j >= 0 && heap[j].d > key.d) { + heap[j + 1] = heap[j]; + j--; + } + heap[j + 1] = key; + } +} + +/* ========================================================================== + * exec_ann_rerank — index-backed, filter-aware iterative scan. + * + * Pushes the filter's accepted-rowid bitmap into HNSW's beam search as + * a predicate callback (`ray_hnsw_search_filter`). Rejected nodes are + * still traversed for graph connectivity; only accepted nodes enter the + * result heap. This replaces the prior oversample+refilter loop which + * degraded to near-full-scan for highly selective filters with no + * recall guarantee. + * ========================================================================== */ + +/* Predicate context — membership bitmap over the index's row space. */ +typedef struct { + const uint8_t* member; + int64_t n_nodes; +} rr_member_ctx_t; + +static bool rr_member_accept(int64_t node_id, void* ctx) { + const rr_member_ctx_t* c = (const rr_member_ctx_t*)ctx; + if (node_id < 0 || node_id >= c->n_nodes) return false; + return (c->member[node_id / 8] >> (node_id % 8)) & 1; +} + +ray_t* exec_ann_rerank(ray_graph_t* g, ray_op_t* op, ray_t* src) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + if (!src || src->type != RAY_TABLE) return ray_error("type", NULL); + + ray_hnsw_t* idx = (ray_hnsw_t*)ext->rerank.hnsw_idx; + const float* query = ext->rerank.query_vec; + int32_t dim = ext->rerank.dim; + int64_t k = ext->rerank.k; + int32_t ef = ext->rerank.ef_search; + if (!idx || !query || dim <= 0 || k <= 0) return ray_error("schema", NULL); + if (dim != idx->dim) return ray_error("length", NULL); + + int64_t src_rows = ray_table_nrows(src); + + /* Special-case empty source: return a well-shaped empty result. */ + if (src_rows == 0) { + if (g->selection) { ray_release(g->selection); g->selection = NULL; } + ray_t* r = empty_result_with_dist(src); + return r ? r : ray_error("oom", NULL); + } + + int64_t accepted_count = 0; + int64_t* accepted = accepted_rowids(g, src_rows, &accepted_count); + if (accepted_count < 0) return ray_error("oom", NULL); + if (accepted_count == 0) { + ray_t* r = empty_result_with_dist(src); + return r ? r : ray_error("oom", NULL); + } + + int64_t n_nodes = idx->n_nodes; + int32_t ef_search = ef; + if ((int64_t)ef_search < k) ef_search = (int32_t)k; + + int64_t* out_ids = (int64_t*)ray_sys_alloc((size_t)k * sizeof(int64_t)); + double* out_ds = (double*)ray_sys_alloc((size_t)k * sizeof(double)); + if (!out_ids || !out_ds) { + if (out_ids) ray_sys_free(out_ids); + if (out_ds) ray_sys_free(out_ds); + if (accepted) ray_sys_free(accepted); + return ray_error("oom", NULL); + } + + int64_t n_found; + if (!accepted) { + /* No filter — plain search with no per-candidate callback. */ + n_found = ray_hnsw_search(idx, query, dim, k, ef_search, out_ids, out_ds); + } else { + /* Build membership bitmap over the index's row space and hand it + * to the filtered iterative scan as a predicate callback. */ + size_t bm_size = ((size_t)n_nodes + 7) / 8; + uint8_t* member = (uint8_t*)ray_sys_alloc(bm_size); + if (!member) { + ray_sys_free(out_ids); ray_sys_free(out_ds); ray_sys_free(accepted); + return ray_error("oom", NULL); + } + memset(member, 0, bm_size); + for (int64_t i = 0; i < accepted_count; i++) { + int64_t rid = accepted[i]; + if (rid >= 0 && rid < n_nodes) member[rid / 8] |= (uint8_t)(1u << (rid % 8)); + } + ray_sys_free(accepted); + accepted = NULL; + + rr_member_ctx_t cb_ctx = { .member = member, .n_nodes = n_nodes }; + n_found = ray_hnsw_search_filter(idx, query, dim, k, ef_search, + rr_member_accept, &cb_ctx, + out_ids, out_ds); + ray_sys_free(member); + } + if (accepted) ray_sys_free(accepted); + + /* ray_hnsw_search / _filter return -1 on internal OOM — surface it as + * an error rather than silently returning a zero-row table. */ + if (n_found < 0) { + ray_sys_free(out_ids); + ray_sys_free(out_ds); + return ray_error("oom", NULL); + } + + ray_t* result = gather_rows_with_dist(src, out_ids, out_ds, n_found); + ray_sys_free(out_ids); + ray_sys_free(out_ds); + if (!result) return ray_error("oom", NULL); + return result; +} + +/* ========================================================================== + * exec_knn_rerank — brute force over a filtered column + * ========================================================================== */ + +ray_t* exec_knn_rerank(ray_graph_t* g, ray_op_t* op, ray_t* src) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + if (!src || src->type != RAY_TABLE) return ray_error("type", NULL); + + int64_t col_sym = ext->rerank.col_sym; + const float* query = ext->rerank.query_vec; + int32_t dim = ext->rerank.dim; + int64_t k = ext->rerank.k; + rr_metric_t metric = rr_metric_from_hnsw(ext->rerank.metric); + if (col_sym <= 0 || !query || dim <= 0 || k <= 0) return ray_error("schema", NULL); + + /* Special-case empty source: return a well-shaped empty result rather + * than falling into the top-K code with k_eff=0. */ + int64_t src_rows = ray_table_nrows(src); + if (src_rows == 0) { + /* Consume any dangling selection to keep downstream ops clean. */ + if (g->selection) { ray_release(g->selection); g->selection = NULL; } + ray_t* r = empty_result_with_dist(src); + return r ? r : ray_error("oom", NULL); + } + + /* We walk the ORIGINAL source table and skip non-accepted rows via + * an accepted-rowid list. Avoids sel_compact, which currently + * doesn't correctly materialise RAY_LIST columns. */ + ray_t* col = ray_table_get_col(src, col_sym); + if (!col) return ray_error("name", NULL); + if (col->type != RAY_LIST) return ray_error("type", NULL); + + int64_t nrows = col->len; + + int64_t accepted_count = 0; + int64_t* accepted = accepted_rowids(g, nrows, &accepted_count); + if (accepted_count < 0) return ray_error("oom", NULL); + if (accepted_count == 0) { + ray_t* r = empty_result_with_dist(src); + return r ? r : ray_error("oom", NULL); + } + + /* Convert query float* → double[] + norm. */ + double* q_buf = (double*)ray_sys_alloc((size_t)dim * sizeof(double)); + if (!q_buf) { if (accepted) ray_sys_free(accepted); return ray_error("oom", NULL); } + double q_norm_sq = 0.0; + for (int32_t j = 0; j < dim; j++) { + q_buf[j] = (double)query[j]; + q_norm_sq += q_buf[j] * q_buf[j]; + } + double q_norm = sqrt(q_norm_sq); + + int64_t k_eff = k; + if (k_eff > accepted_count) k_eff = accepted_count; + + rr_ent_t* heap = (rr_ent_t*)ray_sys_alloc((size_t)k_eff * sizeof(rr_ent_t)); + if (!heap) { + ray_sys_free(q_buf); if (accepted) ray_sys_free(accepted); + return ray_error("oom", NULL); + } + int64_t heap_size = 0; + + /* Walk accepted rows — identity scan if no filter, dense rowid list otherwise. */ + if (accepted) { + for (int64_t ai = 0; ai < accepted_count; ai++) { + int64_t i = accepted[ai]; + if (i < 0 || i >= nrows) continue; + ray_t* row = ray_list_get(col, i); + if (!rr_is_numeric(row) || row->len != dim) { + ray_sys_free(heap); ray_sys_free(q_buf); ray_sys_free(accepted); + return ray_error("type", NULL); + } + double d = rr_row_dist(metric, row, q_buf, q_norm, dim); + rr_heap_insert(heap, k_eff, &heap_size, d, i); + } + } else { + for (int64_t i = 0; i < nrows; i++) { + ray_t* row = ray_list_get(col, i); + if (!rr_is_numeric(row) || row->len != dim) { + ray_sys_free(heap); ray_sys_free(q_buf); + return ray_error("type", NULL); + } + double d = rr_row_dist(metric, row, q_buf, q_norm, dim); + rr_heap_insert(heap, k_eff, &heap_size, d, i); + } + } + ray_sys_free(q_buf); + if (accepted) ray_sys_free(accepted); + + rr_heap_sort(heap, heap_size); + + int64_t* out_ids = (int64_t*)ray_sys_alloc((size_t)heap_size * sizeof(int64_t)); + double* out_ds = (double*)ray_sys_alloc((size_t)heap_size * sizeof(double)); + if ((!out_ids || !out_ds) && heap_size > 0) { + if (out_ids) ray_sys_free(out_ids); + if (out_ds) ray_sys_free(out_ds); + ray_sys_free(heap); + return ray_error("oom", NULL); + } + for (int64_t i = 0; i < heap_size; i++) { + out_ids[i] = heap[i].id; + out_ds[i] = heap[i].d; + } + ray_sys_free(heap); + + ray_t* result = gather_rows_with_dist(src, out_ids, out_ds, heap_size); + if (out_ids) ray_sys_free(out_ids); + if (out_ds) ray_sys_free(out_ds); + if (!result) return ray_error("oom", NULL); + return result; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/rowsel.c b/crates/rayforce-sys/vendor/rayforce/src/ops/rowsel.c new file mode 100644 index 0000000..aa83b2d --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/rowsel.c @@ -0,0 +1,445 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * ray_rowsel — implementation. See src/ops/rowsel.h for the data + * layout and lifetime contract. + */ + +#include "ops/rowsel.h" +#include "ops/ops.h" +#include "core/pool.h" + +#include +#include +#include + +/* ────────────────────────────────────────────────────────────────── + * Allocation helpers + * ────────────────────────────────────────────────────────────────── */ + +ray_t* ray_rowsel_new(int64_t nrows, int64_t total_pass, int64_t idx_count) { + if (nrows < 0 || total_pass < 0 || total_pass > nrows || + idx_count < 0 || idx_count > total_pass) return NULL; + + size_t payload = ray_rowsel_payload_bytes(nrows, idx_count); + ray_t* block = ray_alloc(payload); + if (!block) return NULL; + + /* ray_alloc zeroes the 32-byte header but NOT the data area. + * Initialize the inline meta header explicitly; arrays are filled + * by the producer after this call. */ + ray_rowsel_t* m = ray_rowsel_meta(block); + m->total_pass = total_pass; + m->nrows = nrows; + m->n_segs = (uint32_t)((nrows + RAY_MORSEL_ELEMS - 1) / RAY_MORSEL_ELEMS); + if (nrows <= 0) m->n_segs = 0; + m->_pad = 0; + + return block; +} + +void ray_rowsel_release(ray_t* block) { + if (block) ray_release(block); +} + +/* ────────────────────────────────────────────────────────────────── + * Producer — parallel two-pass build from a RAY_BOOL pred vec + * ────────────────────────────────────────────────────────────────── */ + +/* Pass 1 worker context. Each worker owns a disjoint segment range + * [start, end) and writes per-segment popcounts into popcount[]. */ +typedef struct { + const uint8_t* pred_data; + int64_t nrows; + uint32_t* popcount; /* one entry per segment */ +} rowsel_pass1_ctx_t; + +static void rowsel_pass1_fn(void* vctx, uint32_t worker_id, + int64_t start_seg, int64_t end_seg) { + (void)worker_id; + rowsel_pass1_ctx_t* c = (rowsel_pass1_ctx_t*)vctx; + const uint8_t* pred = c->pred_data; + int64_t nrows = c->nrows; + uint32_t* popcount = c->popcount; + + for (int64_t seg = start_seg; seg < end_seg; seg++) { + int64_t base = seg * RAY_MORSEL_ELEMS; + int64_t end = base + RAY_MORSEL_ELEMS; + if (end > nrows) end = nrows; + uint32_t n = 0; + for (int64_t r = base; r < end; r++) + n += pred[r] != 0; + popcount[seg] = n; + } +} + +/* Pass 2 worker context. Each worker owns a disjoint segment range + * and writes morsel-local indices into the (already-sized) idx[] + * array. Workers never overlap because each segment's slice + * idx[seg_offsets[seg] .. seg_offsets[seg+1]) is exclusive. */ +typedef struct { + const uint8_t* pred_data; + int64_t nrows; + const uint8_t* seg_flags; + const uint32_t* seg_offsets; + uint16_t* idx; +} rowsel_pass2_ctx_t; + +static void rowsel_pass2_fn(void* vctx, uint32_t worker_id, + int64_t start_seg, int64_t end_seg) { + (void)worker_id; + rowsel_pass2_ctx_t* c = (rowsel_pass2_ctx_t*)vctx; + const uint8_t* pred = c->pred_data; + int64_t nrows = c->nrows; + + for (int64_t seg = start_seg; seg < end_seg; seg++) { + if (c->seg_flags[seg] != RAY_SEL_MIX) continue; /* NONE / ALL: nothing to write */ + int64_t base = seg * RAY_MORSEL_ELEMS; + int64_t end = base + RAY_MORSEL_ELEMS; + if (end > nrows) end = nrows; + uint16_t* out = c->idx + c->seg_offsets[seg]; + uint32_t out_n = 0; + for (int64_t r = base; r < end; r++) { + if (pred[r]) + out[out_n++] = (uint16_t)(r - base); + } + /* sanity: out_n must equal seg_offsets[seg+1] - seg_offsets[seg] */ + } +} + +ray_t* ray_rowsel_from_pred(ray_t* pred) { + if (!pred || pred->type != RAY_BOOL) return NULL; + int64_t nrows = pred->len; + if (nrows == 0) { + /* Empty source — empty selection. */ + return ray_rowsel_new(0, 0, 0); + } + + const uint8_t* pred_data = (const uint8_t*)ray_data(pred); + uint32_t n_segs = (uint32_t)((nrows + RAY_MORSEL_ELEMS - 1) / RAY_MORSEL_ELEMS); + + /* Temporary popcount[seg] buffer. ray_alloc returns a ray_t* + * whose data area is the byte buffer we need. */ + ray_t* pop_block = ray_alloc((size_t)n_segs * sizeof(uint32_t)); + if (!pop_block) return NULL; + uint32_t* popcount = (uint32_t*)ray_data(pop_block); + + /* Pass 1 — parallel popcount per segment. */ + rowsel_pass1_ctx_t p1 = { + .pred_data = pred_data, + .nrows = nrows, + .popcount = popcount, + }; + ray_pool_t* pool = ray_pool_get(); + if (pool && nrows >= RAY_PARALLEL_THRESHOLD) + ray_pool_dispatch(pool, rowsel_pass1_fn, &p1, (int64_t)n_segs); + else + rowsel_pass1_fn(&p1, 0, 0, (int64_t)n_segs); + + /* Single sweep: classify each segment and accumulate both + * total_pass (ALL + MIX rows, for meta) and idx_count (MIX rows + * only, for sizing idx[]). Walking popcount[] sequentially — + * n_segs is at most ~10K for a 10M-row table, trivial. */ + int64_t total_pass = 0; + int64_t idx_count = 0; + for (uint32_t s = 0; s < n_segs; s++) { + int64_t seg_start = (int64_t)s * RAY_MORSEL_ELEMS; + int64_t seg_end = seg_start + RAY_MORSEL_ELEMS; + if (seg_end > nrows) seg_end = nrows; + int64_t seg_len = seg_end - seg_start; + uint32_t pc = popcount[s]; + total_pass += pc; + if (pc != 0 && (int64_t)pc != seg_len) + idx_count += pc; + } + + if (total_pass == nrows) { + /* All rows pass — convention is "no selection". */ + ray_release(pop_block); + return NULL; + } + + /* Allocate the result block sized for the MIX-contributed + * indices only. ALL and NONE segments add nothing to idx[]. */ + ray_t* block = ray_rowsel_new(nrows, total_pass, idx_count); + if (!block) { + ray_release(pop_block); + return NULL; + } + + /* Fill seg_flags + seg_offsets in a second sequential walk over + * popcount[]. cum accumulates MIX-contributed indices to build + * the prefix sum into idx[]. */ + uint8_t* seg_flags = ray_rowsel_flags(block); + uint32_t* seg_offsets = ray_rowsel_offsets(block); + uint32_t cum = 0; + for (uint32_t s = 0; s < n_segs; s++) { + seg_offsets[s] = cum; + int64_t seg_start = (int64_t)s * RAY_MORSEL_ELEMS; + int64_t seg_end = seg_start + RAY_MORSEL_ELEMS; + if (seg_end > nrows) seg_end = nrows; + int64_t seg_len = seg_end - seg_start; + uint32_t pc = popcount[s]; + if (pc == 0) { + seg_flags[s] = RAY_SEL_NONE; + } else if ((int64_t)pc == seg_len) { + seg_flags[s] = RAY_SEL_ALL; + /* ALL contributes nothing to idx[]; cum unchanged. */ + } else { + seg_flags[s] = RAY_SEL_MIX; + cum += pc; + } + } + seg_offsets[n_segs] = cum; + + /* Pass 2 — parallel index write into idx[]. */ + if (cum > 0) { + rowsel_pass2_ctx_t p2 = { + .pred_data = pred_data, + .nrows = nrows, + .seg_flags = seg_flags, + .seg_offsets = seg_offsets, + .idx = ray_rowsel_idx(block), + }; + if (pool && nrows >= RAY_PARALLEL_THRESHOLD) + ray_pool_dispatch(pool, rowsel_pass2_fn, &p2, (int64_t)n_segs); + else + rowsel_pass2_fn(&p2, 0, 0, (int64_t)n_segs); + } + + ray_release(pop_block); + return block; +} + +/* ────────────────────────────────────────────────────────────────── + * ray_rowsel_to_indices — flatten to a dense int64 array + * ────────────────────────────────────────────────────────────────── */ + +/* Pass 2 worker context for ray_rowsel_to_indices. */ +typedef struct { + const uint8_t* flags; + const uint32_t* offsets; + const uint16_t* idx; + const uint32_t* flat_offsets; /* per-segment offset into out[] */ + int64_t* out; + int64_t nrows; +} rowsel_to_idx_ctx_t; + +static void rowsel_to_idx_fn(void* vctx, uint32_t worker_id, + int64_t start_seg, int64_t end_seg) { + (void)worker_id; + rowsel_to_idx_ctx_t* c = (rowsel_to_idx_ctx_t*)vctx; + int64_t nrows = c->nrows; + for (int64_t seg = start_seg; seg < end_seg; seg++) { + uint8_t f = c->flags[seg]; + if (f == RAY_SEL_NONE) continue; + int64_t base = seg * RAY_MORSEL_ELEMS; + int64_t end = base + RAY_MORSEL_ELEMS; + if (end > nrows) end = nrows; + int64_t j = c->flat_offsets[seg]; + if (f == RAY_SEL_ALL) { + for (int64_t r = base; r < end; r++) c->out[j++] = r; + } else { + const uint16_t* slice = c->idx + c->offsets[seg]; + uint32_t n = c->offsets[seg + 1] - c->offsets[seg]; + for (uint32_t i = 0; i < n; i++) c->out[j++] = base + slice[i]; + } + } +} + +ray_t* ray_rowsel_to_indices(ray_t* sel) { + if (!sel) return NULL; + ray_rowsel_t* m = ray_rowsel_meta(sel); + const uint8_t* flags = ray_rowsel_flags(sel); + const uint32_t* offsets = ray_rowsel_offsets(sel); + const uint16_t* idx = ray_rowsel_idx(sel); + int64_t nrows = m->nrows; + int64_t total_pass = m->total_pass; + uint32_t n_segs = m->n_segs; + + ray_t* block = ray_alloc((size_t)total_pass * sizeof(int64_t)); + if (!block) return NULL; + int64_t* out = (int64_t*)ray_data(block); + + if (total_pass == 0 || n_segs == 0) return block; + + /* Build per-segment flat offsets into out[]. Sequential prefix + * sum over n_segs entries — cheap (n_segs ≈ nrows/1024). */ + ray_t* fo_block = ray_alloc((size_t)n_segs * sizeof(uint32_t)); + if (!fo_block) { ray_release(block); return NULL; } + uint32_t* flat_offsets = (uint32_t*)ray_data(fo_block); + uint32_t cum = 0; + for (uint32_t s = 0; s < n_segs; s++) { + flat_offsets[s] = cum; + uint8_t f = flags[s]; + if (f == RAY_SEL_NONE) continue; + if (f == RAY_SEL_ALL) { + int64_t base = (int64_t)s * RAY_MORSEL_ELEMS; + int64_t end = base + RAY_MORSEL_ELEMS; + if (end > nrows) end = nrows; + cum += (uint32_t)(end - base); + } else { + cum += offsets[s + 1] - offsets[s]; + } + } + + /* Parallel write: each worker fills its own segment range into + * out[] using flat_offsets to find the start of each segment. + * Slices are non-overlapping by construction. */ + rowsel_to_idx_ctx_t ctx = { + .flags = flags, + .offsets = offsets, + .idx = idx, + .flat_offsets = flat_offsets, + .out = out, + .nrows = nrows, + }; + ray_pool_t* pool = ray_pool_get(); + if (pool && nrows >= RAY_PARALLEL_THRESHOLD) + ray_pool_dispatch(pool, rowsel_to_idx_fn, &ctx, (int64_t)n_segs); + else + rowsel_to_idx_fn(&ctx, 0, 0, (int64_t)n_segs); + + ray_release(fo_block); + return block; +} + +/* ────────────────────────────────────────────────────────────────── + * Refine — chained filter + * ────────────────────────────────────────────────────────────────── */ + +/* refine: walk `existing`'s surviving rows, test pred at each, emit a + * new selection. Sequential — chained filters are typically applied + * to already-shrunk row sets where parallelism doesn't pay back the + * dispatch overhead. Phase 2 will revisit if measurement says + * otherwise. */ +ray_t* ray_rowsel_refine(ray_t* existing, ray_t* pred) { + if (!existing) return ray_rowsel_from_pred(pred); + if (!pred || pred->type != RAY_BOOL) return NULL; + + ray_rowsel_t* em = ray_rowsel_meta(existing); + int64_t nrows = em->nrows; + if (pred->len != nrows) return NULL; + + const uint8_t* pred_data = (const uint8_t*)ray_data(pred); + const uint8_t* e_flags = ray_rowsel_flags(existing); + const uint32_t* e_offsets = ray_rowsel_offsets(existing); + const uint16_t* e_idx = ray_rowsel_idx(existing); + uint32_t n_segs = em->n_segs; + + /* Pass 1 — count survivors per segment. */ + ray_t* pop_block = ray_alloc((size_t)n_segs * sizeof(uint32_t)); + if (!pop_block) return NULL; + uint32_t* popcount = (uint32_t*)ray_data(pop_block); + memset(popcount, 0, (size_t)n_segs * sizeof(uint32_t)); + + int64_t total_pass = 0; + int64_t idx_count = 0; + for (uint32_t s = 0; s < n_segs; s++) { + uint8_t f = e_flags[s]; + if (f == RAY_SEL_NONE) continue; + int64_t base = (int64_t)s * RAY_MORSEL_ELEMS; + int64_t end = base + RAY_MORSEL_ELEMS; + if (end > nrows) end = nrows; + int64_t seg_len = end - base; + uint32_t n = 0; + if (f == RAY_SEL_ALL) { + for (int64_t r = base; r < end; r++) + n += pred_data[r] != 0; + } else { /* MIX */ + const uint16_t* src = e_idx + e_offsets[s]; + uint32_t src_n = e_offsets[s + 1] - e_offsets[s]; + for (uint32_t i = 0; i < src_n; i++) { + int64_t r = base + src[i]; + n += pred_data[r] != 0; + } + } + popcount[s] = n; + total_pass += n; + /* This segment will be MIX in the output (and contribute to + * idx[]) iff some-but-not-all of its rows pass. */ + if (n != 0 && (int64_t)n != seg_len) + idx_count += n; + } + + if (total_pass == nrows) { + /* Refinement somehow ended up matching every source row. + * Should be impossible unless `existing` was already + * effectively all-pass and pred is all-true — but handle it. */ + ray_release(pop_block); + return NULL; + } + + ray_t* block = ray_rowsel_new(nrows, total_pass, idx_count); + if (!block) { + ray_release(pop_block); + return NULL; + } + uint8_t* seg_flags = ray_rowsel_flags(block); + uint32_t* seg_offsets = ray_rowsel_offsets(block); + uint16_t* idx_out = ray_rowsel_idx(block); + + uint32_t cum = 0; + for (uint32_t s = 0; s < n_segs; s++) { + seg_offsets[s] = cum; + int64_t base = (int64_t)s * RAY_MORSEL_ELEMS; + int64_t end = base + RAY_MORSEL_ELEMS; + if (end > nrows) end = nrows; + int64_t seg_len = end - base; + uint32_t pc = popcount[s]; + if (pc == 0) { + seg_flags[s] = RAY_SEL_NONE; + continue; + } + if ((int64_t)pc == seg_len) { + seg_flags[s] = RAY_SEL_ALL; + continue; + } + seg_flags[s] = RAY_SEL_MIX; + + /* Pass 2 (inlined, sequential) — write the surviving + * morsel-local indices for this segment. */ + uint16_t* dst = idx_out + cum; + uint32_t dn = 0; + uint8_t f = e_flags[s]; + if (f == RAY_SEL_ALL) { + for (int64_t r = base; r < end; r++) + if (pred_data[r]) + dst[dn++] = (uint16_t)(r - base); + } else { /* MIX in existing */ + const uint16_t* src = e_idx + e_offsets[s]; + uint32_t src_n = e_offsets[s + 1] - e_offsets[s]; + for (uint32_t i = 0; i < src_n; i++) { + int64_t r = base + src[i]; + if (pred_data[r]) + dst[dn++] = (uint16_t)(r - base); + } + } + cum += pc; + } + seg_offsets[n_segs] = cum; + + ray_release(pop_block); + return block; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/rowsel.h b/crates/rayforce-sys/vendor/rayforce/src/ops/rowsel.h new file mode 100644 index 0000000..c28e593 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/rowsel.h @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * ray_rowsel — morsel-local row-filter selection. + * + * Replacement for the bitmap (RAY_SEL) form of g->selection used by + * OP_FILTER on table inputs. Stores the surviving rows of a filter + * as morsel-local uint16 indices instead of a per-row bitmap, so the + * downstream group / sort / agg hot loops iterate only the live rows + * with no per-row bitmap test. + * + * Layout — single ray_alloc block, contiguous payload at ray_data(): + * + * ray_rowsel_t meta (24 bytes; at ray_data(block)) + * uint8_t seg_flags[] (n_segs, padded to 8-byte boundary) + * uint32_t seg_offsets[] (n_segs + 1, prefix sum into idx[]) + * uint16_t idx[] (total_pass entries; only MIX + * segments contribute) + * + * Per-segment flag values are the same NONE / ALL / MIX constants the + * existing RAY_SEL bitmap uses (src/ops/ops.h): + * - NONE: no rows in this morsel pass — consumer skips wholesale. + * - ALL: every row in this morsel passes — seg_offsets[seg+1] + * equals seg_offsets[seg], no indices stored, consumer + * iterates [seg_start, seg_end) densely. + * - MIX: partial pass — idx[seg_offsets[seg] .. seg_offsets[seg+1]) + * holds the morsel-local positions (0..1023) of passing + * rows in segment order. + * + * Lifetime: single-owner. Producer (ray_rowsel_from_pred / refine) + * returns a fresh ray_t* with rc=1. Consumer calls ray_rowsel_release + * to free. No COW semantics — selection data is never shared and + * never serialized. + * + * The block is allocated via ray_alloc and uses no specific type tag + * (zeroed by ray_alloc); nothing in the runtime dispatches on it. + * The accessors below are the only valid way to read its contents. + * + * Note: this is unrelated to the existing RAY_SEL type tag used by + * src/ops/join.c and src/ops/traverse.c as a generic key-bit set. + * Those continue to use ray_sel_* unchanged. + */ + +#ifndef RAY_ROWSEL_H +#define RAY_ROWSEL_H + +#include "rayforce.h" +#include "ops/ops.h" /* RAY_SEL_NONE/ALL/MIX, RAY_MORSEL_ELEMS */ + +#include + +/* RAY_MORSEL_ELEMS must fit in uint16_t for morsel-local indices. */ +_Static_assert(RAY_MORSEL_ELEMS <= 65536, + "morsel size exceeds uint16_t index range"); + +/* Inline header at ray_data(block). Pointer fields are NOT stored + * here — they are reconstructed from this header's n_segs / total_pass + * via the accessor inlines below. The payload arrays live immediately + * after this struct in the same allocation. */ +typedef struct { + int64_t total_pass; /* number of passing rows */ + int64_t nrows; /* source row count this selection covers */ + uint32_t n_segs; /* ceil(nrows / RAY_MORSEL_ELEMS) */ + uint32_t _pad; +} ray_rowsel_t; + +/* Round n up to a multiple of 8 so the next array starts aligned. */ +static inline size_t ray_rowsel_pad8(size_t n) { + return (n + 7u) & ~(size_t)7u; +} + +static inline ray_rowsel_t* ray_rowsel_meta(ray_t* block) { + return (ray_rowsel_t*)ray_data(block); +} + +static inline uint8_t* ray_rowsel_flags(ray_t* block) { + return (uint8_t*)ray_data(block) + sizeof(ray_rowsel_t); +} + +static inline uint32_t* ray_rowsel_offsets(ray_t* block) { + ray_rowsel_t* m = ray_rowsel_meta(block); + return (uint32_t*)(ray_rowsel_flags(block) + ray_rowsel_pad8(m->n_segs)); +} + +static inline uint16_t* ray_rowsel_idx(ray_t* block) { + ray_rowsel_t* m = ray_rowsel_meta(block); + return (uint16_t*)(ray_rowsel_offsets(block) + (m->n_segs + 1)); +} + +/* Compute the total bytes needed for the inline payload. + * `idx_count` is the number of uint16_t entries the idx[] array + * needs to hold — this is the sum of popcounts over MIX segments + * only, NOT the total passing-row count. ALL segments contribute + * zero to idx[]. */ +static inline size_t ray_rowsel_payload_bytes(int64_t nrows, int64_t idx_count) { + uint32_t n_segs = (uint32_t)((nrows + RAY_MORSEL_ELEMS - 1) / RAY_MORSEL_ELEMS); + if (nrows <= 0) n_segs = 0; + return sizeof(ray_rowsel_t) + + ray_rowsel_pad8(n_segs) + + (size_t)(n_segs + 1) * sizeof(uint32_t) + + (size_t)idx_count * sizeof(uint16_t); +} + +/* Allocate a rowsel block. + * + * `nrows` — source row count this selection covers. + * `total_pass` — number of passing rows (ALL + MIX). Stored in + * meta; consumers read it for sizing decisions. + * `idx_count` — number of uint16_t slots the idx[] array needs. + * Equal to the sum of popcounts over segments + * tagged MIX in the final layout. ALL and NONE + * segments contribute zero. + * + * Header fields are populated; arrays are uninitialized. Caller + * fills seg_flags, seg_offsets, and idx, then hands the block off + * (g->selection, etc.) or releases via ray_rowsel_release. + * Returns NULL on OOM. */ +ray_t* ray_rowsel_new(int64_t nrows, int64_t total_pass, int64_t idx_count); + +/* Release a rowsel block. Equivalent to ray_release / ray_free of + * the underlying allocation — exposed under its own name for clarity + * at call sites. */ +void ray_rowsel_release(ray_t* block); + +/* Build a rowsel from a RAY_BOOL predicate vector. + * + * pred must be a flat RAY_BOOL vec (byte-per-row). Returns: + * - NULL if all rows pass (the all-pass convention is "no + * selection", same as g->selection == NULL). + * - A fresh rowsel block (rc=1) otherwise, including the + * none-pass case (zero-length idx, all flags NONE). + * + * The build runs in two parallel passes when nrows is large enough + * to benefit (>= RAY_PARALLEL_THRESHOLD): pass 1 computes per-segment + * popcount + flag, an inline prefix sum fills seg_offsets, pass 2 + * writes the morsel-local indices into the global idx[] (each worker + * writes its own non-overlapping slice). Smaller pred vecs run the + * same logic single-threaded. */ +ray_t* ray_rowsel_from_pred(ray_t* pred); + +/* Flatten a rowsel into a dense int64 array of global row indices, + * sorted ascending. Length of the array is `meta->total_pass`. + * + * Returned block is a ray_t* byte buffer whose ray_data() points to + * an `int64_t[total_pass]`. Consumer gets a raw pointer via + * ray_data() and releases the block when done via ray_release. + * Returns NULL on OOM. + * + * Used by exec_group and similar consumers that can't cheaply walk + * the morsel-local rowsel inline (yet) — they dispatch workers over + * [0, total_pass) using the flattened indices directly. */ +ray_t* ray_rowsel_to_indices(ray_t* sel); + +/* Refine an existing rowsel by AND-ing it with a fresh predicate vec. + * + * Used by chained OP_FILTER on a table input that already has a + * g->selection. Walks `existing`'s surviving rows, tests pred at each, + * emits a new rowsel containing only the positions that pass both. + * Returns NULL if the result is all-pass (impossible here unless + * existing was already all-pass), or a fresh block otherwise. + * + * Does not consume `existing` — caller is responsible for releasing + * the old selection after replacing it. */ +ray_t* ray_rowsel_refine(ray_t* existing, ray_t* pred); + +#endif /* RAY_ROWSEL_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/sort.c b/crates/rayforce-sys/vendor/rayforce/src/ops/sort.c new file mode 100644 index 0000000..4b0b502 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/sort.c @@ -0,0 +1,3682 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ops/internal.h" +#include "lang/internal.h" +#include "ops/ops.h" +#include "mem/sys.h" + +/* -------------------------------------------------------------------------- + * Sort comparator: compare two row indices across all sort keys. + * Returns negative if a < b, positive if a > b, 0 if equal. + * -------------------------------------------------------------------------- */ +/* sort_cmp_ctx_t defined in exec_internal.h */ + +int sort_cmp(const sort_cmp_ctx_t* ctx, int64_t a, int64_t b) { + for (uint8_t k = 0; k < ctx->n_sort; k++) { + ray_t* col = ctx->vecs[k]; + if (!col) continue; + int cmp = 0; + int null_cmp = 0; + int desc = ctx->desc ? ctx->desc[k] : 0; + int nf = ctx->nulls_first ? ctx->nulls_first[k] : desc; + + /* Check null bitmap for both elements */ + int a_null = ray_vec_is_null(col, a); + int b_null = ray_vec_is_null(col, b); + if (a_null || b_null) { + null_cmp = 1; + if (a_null && b_null) cmp = 0; + else if (a_null) cmp = nf ? -1 : 1; + else cmp = nf ? 1 : -1; + } else if (col->type == RAY_F64) { + double va = ((double*)ray_data(col))[a]; + double vb = ((double*)ray_data(col))[b]; + if (va < vb) cmp = -1; + else if (va > vb) cmp = 1; + } else if (col->type == RAY_I64 || col->type == RAY_TIMESTAMP) { + int64_t va = ((int64_t*)ray_data(col))[a]; + int64_t vb = ((int64_t*)ray_data(col))[b]; + if (va < vb) cmp = -1; + else if (va > vb) cmp = 1; + } else if (col->type == RAY_I32) { + int32_t va = ((int32_t*)ray_data(col))[a]; + int32_t vb = ((int32_t*)ray_data(col))[b]; + if (va < vb) cmp = -1; + else if (va > vb) cmp = 1; + } else if (RAY_IS_SYM(col->type)) { + int64_t va = ray_read_sym(ray_data(col), a, col->type, col->attrs); + int64_t vb = ray_read_sym(ray_data(col), b, col->type, col->attrs); + ray_t* sa = ray_sym_str(va); + ray_t* sb = ray_sym_str(vb); + if (sa && sb) cmp = ray_str_cmp(sa, sb); + } else if (col->type == RAY_I16) { + int16_t va = ((int16_t*)ray_data(col))[a]; + int16_t vb = ((int16_t*)ray_data(col))[b]; + if (va < vb) cmp = -1; + else if (va > vb) cmp = 1; + } else if (col->type == RAY_BOOL || col->type == RAY_U8) { + uint8_t va = ((uint8_t*)ray_data(col))[a]; + uint8_t vb = ((uint8_t*)ray_data(col))[b]; + if (va < vb) cmp = -1; + else if (va > vb) cmp = 1; + } else if (col->type == RAY_DATE || col->type == RAY_TIME) { + int32_t va = ((int32_t*)ray_data(col))[a]; + int32_t vb = ((int32_t*)ray_data(col))[b]; + if (va < vb) cmp = -1; + else if (va > vb) cmp = 1; + } else if (col->type == RAY_GUID) { + const uint8_t* base = (const uint8_t*)ray_data(col); + cmp = memcmp(base + a * 16, base + b * 16, 16); + } else if (col->type == RAY_STR) { + const ray_str_t* elems; + const char* pool; + str_resolve(col, &elems, &pool); + cmp = ray_str_t_cmp(&elems[a], pool, &elems[b], pool); + } + + if (desc && !null_cmp) cmp = -cmp; + if (cmp != 0) return cmp; + } + return 0; +} + +/* -------------------------------------------------------------------------- + * Small-array sort: introsort on (key, idx) pairs. + * + * For arrays ≤ RADIX_SORT_THRESHOLD, a single-pass encode + comparison sort + * beats multi-pass radix sort. Uses quicksort with median-of-3 pivot and + * heapsort fallback (introsort) to guarantee O(n log n) worst case. + * -------------------------------------------------------------------------- */ + +/* RADIX_SORT_THRESHOLD, SMALL_POOL_THRESHOLD defined in exec_internal.h */ + +static void key_sift_down(uint64_t* keys, int64_t* idx, int64_t n, int64_t i) { + for (;;) { + int64_t largest = i, l = 2*i+1, r = 2*i+2; + if (l < n && keys[l] > keys[largest]) largest = l; + if (r < n && keys[r] > keys[largest]) largest = r; + if (largest == i) return; + uint64_t tk = keys[i]; keys[i] = keys[largest]; keys[largest] = tk; + int64_t ti = idx[i]; idx[i] = idx[largest]; idx[largest] = ti; + i = largest; + } +} + +static void key_heapsort(uint64_t* keys, int64_t* idx, int64_t n) { + for (int64_t i = n/2 - 1; i >= 0; i--) + key_sift_down(keys, idx, n, i); + for (int64_t i = n - 1; i > 0; i--) { + uint64_t tk = keys[0]; keys[0] = keys[i]; keys[i] = tk; + int64_t ti = idx[0]; idx[0] = idx[i]; idx[i] = ti; + key_sift_down(keys, idx, i, 0); + } +} + +static void key_insertion_sort(uint64_t* keys, int64_t* idx, int64_t n) { + for (int64_t i = 1; i < n; i++) { + uint64_t kk = keys[i]; + int64_t ii = idx[i]; + int64_t j = i - 1; + while (j >= 0 && keys[j] > kk) { + keys[j+1] = keys[j]; + idx[j+1] = idx[j]; + j--; + } + keys[j+1] = kk; + idx[j+1] = ii; + } +} + +static void key_introsort_impl(uint64_t* keys, int64_t* idx, + int64_t n, int depth) { + while (n > 32) { + if (depth == 0) { + key_heapsort(keys, idx, n); + return; + } + depth--; + + /* Median-of-3 pivot */ + int64_t mid = n / 2; + uint64_t a = keys[0], b = keys[mid], c = keys[n-1]; + int64_t pi; + if (a < b) pi = (b < c) ? mid : (a < c ? n-1 : 0); + else pi = (a < c) ? 0 : (b < c ? n-1 : mid); + + /* Move pivot to end */ + uint64_t pk = keys[pi]; keys[pi] = keys[n-1]; keys[n-1] = pk; + int64_t pv = idx[pi]; idx[pi] = idx[n-1]; idx[n-1] = pv; + + /* Partition */ + int64_t lo = 0; + for (int64_t i = 0; i < n - 1; i++) { + if (keys[i] < pk) { + uint64_t tk = keys[i]; keys[i] = keys[lo]; keys[lo] = tk; + int64_t ti = idx[i]; idx[i] = idx[lo]; idx[lo] = ti; + lo++; + } + } + keys[n-1] = keys[lo]; keys[lo] = pk; + idx[n-1] = idx[lo]; idx[lo] = pv; + + /* Recurse on smaller partition, iterate on larger */ + if (lo < n - 1 - lo) { + key_introsort_impl(keys, idx, lo, depth); + keys += lo + 1; idx += lo + 1; n -= lo + 1; + } else { + key_introsort_impl(keys + lo + 1, idx + lo + 1, n - lo - 1, depth); + n = lo; + } + } + key_insertion_sort(keys, idx, n); +} + +/* Sort (key, idx) pairs in-place by key. O(n log n) guaranteed. */ +void key_introsort(uint64_t* keys, int64_t* idx, int64_t n) { + if (n <= 1) return; + int depth = 0; + for (int64_t nn = n; nn > 1; nn >>= 1) depth++; + depth *= 2; + key_introsort_impl(keys, idx, n, depth); +} + +/* -------------------------------------------------------------------------- + * Adaptive pre-sort detection. + * + * Scans encoded keys to detect already-sorted and nearly-sorted data. + * Returns a sortedness metric: fraction of out-of-order pairs [0.0, 1.0]. + * 0.0 = perfectly sorted → skip sort entirely + * small = nearly sorted → prefer comparison-based sort (adaptive mergesort) + * large = random → use radix sort + * -------------------------------------------------------------------------- */ + +typedef struct { + const uint64_t* keys; + int64_t* pw_unsorted; /* per-worker out-of-order count */ +} sortedness_ctx_t; + +/* Each worker counts out-of-order pairs in [start, end). + * Also checks the boundary: keys[start-1] vs keys[start] (for start > 0). */ +static void sortedness_fn(void* arg, uint32_t wid, int64_t start, int64_t end) { + sortedness_ctx_t* c = (sortedness_ctx_t*)arg; + const uint64_t* keys = c->keys; + int64_t unsorted = 0; + for (int64_t i = start + 1; i < end; i++) { + if (keys[i] < keys[i - 1]) unsorted++; + } + c->pw_unsorted[wid] += unsorted; +} + +/* Detect sortedness of encoded keys. Returns fraction of out-of-order pairs. + * If the result is 0.0, data is already sorted and sort can be skipped. + * If < threshold (e.g. 0.05), comparison sort is faster than radix. */ +double detect_sortedness(ray_pool_t* pool, const uint64_t* keys, int64_t n) { + if (n <= 1) return 0.0; + + int64_t total_unsorted; + if (pool && n > SMALL_POOL_THRESHOLD) { + uint32_t nw = ray_pool_total_workers(pool); + int64_t pw[nw]; + memset(pw, 0, (size_t)nw * sizeof(int64_t)); + sortedness_ctx_t ctx = { .keys = keys, .pw_unsorted = pw }; + ray_pool_dispatch(pool, sortedness_fn, &ctx, n); + + total_unsorted = 0; + for (uint32_t t = 0; t < nw; t++) + total_unsorted += pw[t]; + + /* Check cross-task boundaries (each task starts at a TASK_GRAIN + * boundary; the sortedness_fn only checks within [start+1, end) + * so boundaries between adjacent tasks are missed). */ + int64_t grain = RAY_DISPATCH_MORSELS * RAY_MORSEL_ELEMS; + for (int64_t b = grain; b < n; b += grain) { + if (keys[b] < keys[b - 1]) + total_unsorted++; + } + } else { + total_unsorted = 0; + for (int64_t i = 1; i < n; i++) { + if (keys[i] < keys[i - 1]) total_unsorted++; + } + } + + return (double)total_unsorted / (double)(n - 1); +} + +/* Threshold: if fewer than 5% of pairs are out of order, data is + * "nearly sorted" and adaptive comparison sort beats radix. */ +/* NEARLY_SORTED_FRAC, radix_key_bytes defined in exec_internal.h */ + +/* Scan encoded keys to compute actual significant byte count from data range. + * Eliminates histogram passes for bytes that are uniform across all keys. */ +typedef struct { + const uint64_t* keys; + uint64_t* pw_or; /* per-worker XOR-diff accumulator */ +} key_range_ctx_t; + +static void key_range_fn(void* arg, uint32_t wid, int64_t start, int64_t end) { + key_range_ctx_t* c = (key_range_ctx_t*)arg; + const uint64_t* keys = c->keys; + uint64_t local_or = c->pw_or[wid]; + uint64_t first = keys[start]; + for (int64_t i = start; i < end; i++) + local_or |= keys[i] ^ first; + c->pw_or[wid] = local_or; +} + +uint8_t compute_key_nbytes(ray_pool_t* pool, const uint64_t* keys, + int64_t n, uint8_t type_max) { + if (n <= 1) return 1; + uint64_t diff; + if (pool && n > SMALL_POOL_THRESHOLD) { + uint32_t nw = ray_pool_total_workers(pool); + uint64_t pw_or[nw]; + memset(pw_or, 0, nw * sizeof(uint64_t)); + key_range_ctx_t ctx = { .keys = keys, .pw_or = pw_or }; + ray_pool_dispatch(pool, key_range_fn, &ctx, n); + diff = 0; + for (uint32_t w = 0; w < nw; w++) diff |= pw_or[w]; + /* Also XOR the first element from different worker ranges to + * catch cross-worker differences (workers' "first" may differ) */ + uint64_t first = keys[0]; + int64_t chunk = (n + nw - 1) / nw; + for (uint32_t w = 1; w < nw; w++) { + int64_t wstart = (int64_t)w * chunk; + if (wstart < n) diff |= keys[wstart] ^ first; + } + } else { + diff = 0; + uint64_t first = keys[0]; + for (int64_t i = 1; i < n; i++) + diff |= keys[i] ^ first; + } + uint8_t nb = 0; + while (diff) { nb++; diff >>= 8; } + if (nb < 1) nb = 1; + return nb < type_max ? nb : type_max; +} + +/* -------------------------------------------------------------------------- + * Parallel LSB radix sort (8-bit digits, 256 buckets) + * + * Used for single-key sorts on I64/F64/I32/SYM/TIMESTAMP columns, + * and composite-key sorts where all keys are integer types with total + * bit width <= 64. + * + * Three phases per byte: + * 1. Parallel histogram — each task counts byte occurrences in its range + * 2. Sequential prefix-sum — compute per-task scatter offsets + * 3. Parallel scatter — write elements to sorted positions + * + * Byte-skip: after histogram, if all elements share the same byte value, + * skip that pass entirely. Critical for small-range integers where most + * upper bytes are identical. + * -------------------------------------------------------------------------- */ + +/* radix_pass_ctx_t defined in exec_internal.h */ + +/* Phase 1: histogram — each task counts byte values in its fixed range */ +static void radix_hist_fn(void* arg, uint32_t wid, int64_t start, int64_t end) { + (void)wid; (void)end; + radix_pass_ctx_t* c = (radix_pass_ctx_t*)arg; + int64_t task = start; /* dispatch_n: [task, task+1) */ + + /* Zero histogram slice BEFORE early return — empty tasks must still + * clear their slice so the prefix-sum sees zeros, not garbage. */ + uint32_t* h = c->hist + task * 256; + memset(h, 0, 256 * sizeof(uint32_t)); + + int64_t chunk = (c->n + c->n_tasks - 1) / c->n_tasks; + int64_t lo = task * chunk; + int64_t hi = lo + chunk; + if (hi > c->n) hi = c->n; + if (lo >= hi) return; + + const uint64_t* keys = c->keys; + uint8_t shift = c->shift; + for (int64_t i = lo; i < hi; i++) + h[(keys[i] >> shift) & 0xFF]++; +} + +/* Phase 3: scatter with software write-combining (SWC). + * Buffers entries per bucket before flushing, converting random writes + * into sequential bursts that are friendlier to the cache hierarchy. */ +#define SWC_N 8 /* entries per bucket buffer; 8*8=64B per bucket = 32KB total */ +static void radix_scatter_fn(void* arg, uint32_t wid, int64_t start, int64_t end) { + (void)wid; (void)end; + radix_pass_ctx_t* c = (radix_pass_ctx_t*)arg; + int64_t task = start; + + int64_t chunk = (c->n + c->n_tasks - 1) / c->n_tasks; + int64_t lo = task * chunk; + int64_t hi = lo + chunk; + if (hi > c->n) hi = c->n; + if (lo >= hi) return; + + int64_t* off = c->offsets + task * 256; + const uint64_t* k_in = c->keys; + const int64_t* i_in = c->idx; + uint64_t* k_out = c->keys_out; + int64_t* i_out = c->idx_out; + uint8_t shift = c->shift; + + /* SWC buffers: separate key/idx arrays to match output layout */ + uint64_t kbuf[256][SWC_N]; + int64_t ibuf[256][SWC_N]; + uint8_t bcnt[256]; + memset(bcnt, 0, 256); + + for (int64_t i = lo; i < hi; i++) { + uint8_t byte = (k_in[i] >> shift) & 0xFF; + uint8_t bp = bcnt[byte]; + kbuf[byte][bp] = k_in[i]; + ibuf[byte][bp] = i_in[i]; + if (++bp == SWC_N) { + int64_t pos = off[byte]; + memcpy(&k_out[pos], kbuf[byte], SWC_N * sizeof(uint64_t)); + memcpy(&i_out[pos], ibuf[byte], SWC_N * sizeof(int64_t)); + off[byte] = pos + SWC_N; + bp = 0; + } + bcnt[byte] = bp; + } + + /* Flush remaining entries */ + for (int b = 0; b < 256; b++) { + int64_t pos = off[b]; + for (uint8_t j = 0; j < bcnt[b]; j++) { + k_out[pos + j] = kbuf[b][j]; + i_out[pos + j] = ibuf[b][j]; + } + off[b] = pos + bcnt[b]; + } +} +#undef SWC_N + +/* Run radix sort on pre-encoded uint64_t keys + int64_t indices. + * n_bytes limits the number of byte passes (1..8) based on key width. + * Returns pointer to the final sorted index array (either `indices` or + * `idx_tmp`). Caller must keep both alive until done reading indices + * (the result may point into idx_tmp if an odd number of passes executed). + * If sorted_keys_out is non-NULL, stores the pointer to the final sorted + * keys buffer (either `keys` or `keys_tmp`). + * Returns NULL on failure. */ +int64_t* radix_sort_run(ray_pool_t* pool, + uint64_t* keys, int64_t* indices, + uint64_t* keys_tmp, int64_t* idx_tmp, + int64_t n, uint8_t n_bytes, + uint64_t** sorted_keys_out) { + uint32_t n_tasks = pool ? ray_pool_total_workers(pool) : 1; + if (n_tasks < 1) n_tasks = 1; + + ray_t *hist_hdr = NULL, *off_hdr = NULL; + uint32_t* hist = (uint32_t*)scratch_alloc(&hist_hdr, + (size_t)n_tasks * 256 * sizeof(uint32_t)); + int64_t* offsets = (int64_t*)scratch_alloc(&off_hdr, + (size_t)n_tasks * 256 * sizeof(int64_t)); + if (!hist || !offsets) { + scratch_free(hist_hdr); scratch_free(off_hdr); + return NULL; + } + + uint64_t* src_k = keys, *dst_k = keys_tmp; + int64_t* src_i = indices, *dst_i = idx_tmp; + + for (uint8_t bp = 0; bp < n_bytes; bp++) { + uint8_t shift = bp * 8; + + radix_pass_ctx_t ctx = { + .keys = src_k, .idx = src_i, + .keys_out = dst_k, .idx_out = dst_i, + .n = n, .shift = shift, .n_tasks = n_tasks, + .hist = hist, .offsets = offsets, + }; + + /* Phase 1: parallel histogram */ + if (pool && n_tasks > 1) + ray_pool_dispatch_n(pool, radix_hist_fn, &ctx, n_tasks); + else + radix_hist_fn(&ctx, 0, 0, 1); + + /* Check uniformity via global histogram */ + bool uniform = false; + for (int b = 0; b < 256; b++) { + uint32_t total = 0; + for (uint32_t t = 0; t < n_tasks; t++) + total += hist[t * 256 + b]; + if (total == (uint32_t)n) { uniform = true; break; } + } + if (uniform) continue; /* all same byte — skip this pass */ + + /* Phase 2: prefix sum → per-task scatter offsets */ + int64_t running = 0; + for (int b = 0; b < 256; b++) { + for (uint32_t t = 0; t < n_tasks; t++) { + offsets[t * 256 + b] = running; + running += hist[t * 256 + b]; + } + } + + /* Phase 3: parallel scatter */ + if (pool && n_tasks > 1) + ray_pool_dispatch_n(pool, radix_scatter_fn, &ctx, n_tasks); + else + radix_scatter_fn(&ctx, 0, 0, 1); + + /* Swap double-buffer pointers */ + uint64_t* tk = src_k; src_k = dst_k; dst_k = tk; + int64_t* ti = src_i; src_i = dst_i; dst_i = ti; + } + + scratch_free(hist_hdr); + scratch_free(off_hdr); + if (sorted_keys_out) *sorted_keys_out = src_k; + return src_i; /* pointer to final sorted indices */ +} + +/* ============================================================================ + * Packed radix sort — key+index in a single uint64_t + * + * When key_nbytes * 8 + index_bits ≤ 64, we pack the encoded key and the + * row index into one uint64_t: + * packed[i] = encoded_key[i] | ((uint64_t)i << idx_shift) + * + * Radix sort then moves ONE 8-byte value per element per pass instead of + * TWO 8-byte values (key + index). This halves all memory traffic: + * - SWC buffer: 16KB instead of 32KB (fits better in L1) + * - Scatter writes: 8B instead of 16B per element + * - Total traffic per pass: n×8B instead of n×16B + * + * After sorting, indices are extracted: idx = packed >> idx_shift + * ============================================================================ */ + +/* Packed scatter: single-array SWC scatter, no separate index array. */ +#define PSWC_N 8 +static void packed_scatter_fn(void* arg, uint32_t wid, int64_t start, int64_t end) { + (void)wid; (void)end; + radix_pass_ctx_t* c = (radix_pass_ctx_t*)arg; + int64_t task = start; + + int64_t chunk = (c->n + c->n_tasks - 1) / c->n_tasks; + int64_t lo = task * chunk; + int64_t hi = lo + chunk; + if (hi > c->n) hi = c->n; + if (lo >= hi) return; + + int64_t* off = c->offsets + task * 256; + const uint64_t* in = c->keys; + uint64_t* out = c->keys_out; + uint8_t shift = c->shift; + + /* Single SWC buffer: 256 × 8 × 8B = 16KB — fits in L1 */ + uint64_t buf[256][PSWC_N]; + uint8_t bcnt[256]; + memset(bcnt, 0, 256); + + for (int64_t i = lo; i < hi; i++) { + uint8_t byte = (in[i] >> shift) & 0xFF; + uint8_t bp = bcnt[byte]; + buf[byte][bp] = in[i]; + if (++bp == PSWC_N) { + int64_t pos = off[byte]; + memcpy(&out[pos], buf[byte], PSWC_N * sizeof(uint64_t)); + off[byte] = pos + PSWC_N; + bp = 0; + } + bcnt[byte] = bp; + } + + /* Flush remaining entries */ + for (int b = 0; b < 256; b++) { + int64_t pos = off[b]; + for (uint8_t j = 0; j < bcnt[b]; j++) + out[pos + j] = buf[b][j]; + off[b] = pos + bcnt[b]; + } +} +#undef PSWC_N + +/* Packed radix sort: sorts an array of packed (key|index) uint64_t values. + * Sorts by bytes lo_byte to hi_byte-1 (the key bytes). + * Returns pointer to final sorted array (data or tmp). */ +uint64_t* packed_radix_sort_run(ray_pool_t* pool, + uint64_t* data, uint64_t* tmp, + int64_t n, uint8_t n_bytes) { + uint32_t n_tasks = pool ? ray_pool_total_workers(pool) : 1; + if (n_tasks < 1) n_tasks = 1; + + ray_t *hist_hdr = NULL, *off_hdr = NULL; + uint32_t* hist = (uint32_t*)scratch_alloc(&hist_hdr, + (size_t)n_tasks * 256 * sizeof(uint32_t)); + int64_t* offsets = (int64_t*)scratch_alloc(&off_hdr, + (size_t)n_tasks * 256 * sizeof(int64_t)); + if (!hist || !offsets) { + scratch_free(hist_hdr); scratch_free(off_hdr); + return NULL; + } + + uint64_t* src = data, *dst = tmp; + + for (uint8_t bp = 0; bp < n_bytes; bp++) { + uint8_t shift = bp * 8; + + /* Reuse radix_pass_ctx_t — only .keys and .keys_out are used + * by radix_hist_fn and packed_scatter_fn. */ + radix_pass_ctx_t ctx = { + .keys = src, .keys_out = dst, + .n = n, .shift = shift, .n_tasks = n_tasks, + .hist = hist, .offsets = offsets, + }; + + /* Phase 1: parallel histogram (reuses existing radix_hist_fn) */ + if (pool && n_tasks > 1) + ray_pool_dispatch_n(pool, radix_hist_fn, &ctx, n_tasks); + else + radix_hist_fn(&ctx, 0, 0, 1); + + /* Check uniformity */ + bool uniform = false; + for (int b = 0; b < 256; b++) { + uint32_t total = 0; + for (uint32_t t = 0; t < n_tasks; t++) + total += hist[t * 256 + b]; + if (total == (uint32_t)n) { uniform = true; break; } + } + if (uniform) continue; + + /* Phase 2: prefix sum */ + int64_t running = 0; + for (int b = 0; b < 256; b++) { + for (uint32_t t = 0; t < n_tasks; t++) { + offsets[t * 256 + b] = running; + running += hist[t * 256 + b]; + } + } + + /* Phase 3: packed scatter (half the traffic of dual-array scatter) */ + if (pool && n_tasks > 1) + ray_pool_dispatch_n(pool, packed_scatter_fn, &ctx, n_tasks); + else + packed_scatter_fn(&ctx, 0, 0, 1); + + uint64_t* t2 = src; src = dst; dst = t2; + } + + scratch_free(hist_hdr); + scratch_free(off_hdr); + return src; +} + +/* Fused pack + sortedness detection for packed radix sort. + * Packs keys[i] |= (i << key_bits) in-place while counting: + * - forward inversions (keys[i] < keys[i-1]) → unsorted + * - reverse inversions (keys[i] > keys[i-1]) → not_reverse + * If unsorted==0: already sorted. If not_reverse==0: reverse-sorted. */ +typedef struct { + uint64_t* keys; + uint8_t key_bits; + uint64_t key_mask; /* mask for significant key bytes */ + int64_t* pw_unsorted; /* count of forward inversions */ + int64_t* pw_not_reverse; /* count of strict ascending pairs */ +} packed_detect_ctx_t; + +static void packed_detect_fn(void* arg, uint32_t wid, + int64_t start, int64_t end) { + packed_detect_ctx_t* c = (packed_detect_ctx_t*)arg; + uint64_t* k = c->keys; + uint8_t kb = c->key_bits; + uint64_t km = c->key_mask; + int64_t unsorted = 0, not_rev = 0; + uint64_t prev = (start > 0) ? (k[start - 1] & km) : 0; + for (int64_t i = start; i < end; i++) { + uint64_t cur = k[i] & km; /* mask to significant bytes */ + if (i > start) { + if (cur < prev) unsorted++; + if (cur > prev) not_rev++; + } + /* Pack: significant key bits | (index << key_bits) */ + k[i] = cur | ((uint64_t)i << kb); + prev = cur; + } + c->pw_unsorted[wid] += unsorted; + c->pw_not_reverse[wid] += not_rev; +} + +/* Parallel unpack: extract indices (and optionally sorted keys) from + * packed values after packed radix sort. */ +typedef struct { + const uint64_t* sorted; + int64_t* indices; + uint64_t* keys_out; + uint8_t key_bits; + uint64_t idx_mask; + uint64_t key_mask; + bool extract_keys; +} packed_unpack_ctx_t; + +static void packed_unpack_fn(void* arg, uint32_t wid, + int64_t start, int64_t end) { + (void)wid; + packed_unpack_ctx_t* c = (packed_unpack_ctx_t*)arg; + for (int64_t i = start; i < end; i++) { + uint64_t v = c->sorted[i]; + c->indices[i] = (int64_t)((v >> c->key_bits) & c->idx_mask); + if (c->extract_keys) + c->keys_out[i] = v & c->key_mask; + } +} + +/* ============================================================================ + * MSD+LSB hybrid radix sort + * + * First pass: MSD partition by the most significant non-uniform byte. + * Creates up to 256 buckets, each small enough to fit in L2 cache. + * Subsequent passes: LSB radix sort within each bucket (in-cache, fast). + * + * For 10M I64 values with 3 significant bytes: + * LSB: 3 full passes over 160MB (keys+indices) = ~960MB random traffic + * MSD+LSB: 1 full pass + 256 × 2 in-cache passes ≈ ~400MB random + ~5ms in-cache + * + * Cache behavior: after the first MSD partition, each bucket (10M/256 ≈ 39K + * elements ≈ 625KB) fits in L2. Subsequent passes operate entirely within + * cache, making them effectively free compared to the first pass. + * ============================================================================ */ + +/* Per-bucket LSB radix sort (non-parallel, for cache-resident data). + * No SWC needed since data fits in L2/L1 cache. */ +static int64_t* bucket_lsb_sort(uint64_t* keys, int64_t* idx, + uint64_t* ktmp, int64_t* itmp, + int64_t n, uint8_t n_bytes) { + if (n <= 64) { + key_introsort(keys, idx, n); + return idx; + } + + uint64_t* src_k = keys, *dst_k = ktmp; + int64_t* src_i = idx, *dst_i = itmp; + + for (uint8_t bp = 0; bp < n_bytes; bp++) { + uint8_t shift = bp * 8; + + uint32_t hist[256]; + memset(hist, 0, sizeof(hist)); + for (int64_t i = 0; i < n; i++) + hist[(src_k[i] >> shift) & 0xFF]++; + + /* Check uniformity — skip this byte if all values share the same digit */ + bool uniform = false; + for (int b = 0; b < 256; b++) { + if (hist[b] == (uint32_t)n) { uniform = true; break; } + } + if (uniform) continue; + + /* Prefix sum */ + int64_t off[256]; + off[0] = 0; + for (int b = 1; b < 256; b++) + off[b] = off[b-1] + (int64_t)hist[b-1]; + + /* Scatter (no SWC — data is cache-resident) */ + for (int64_t i = 0; i < n; i++) { + uint8_t byte = (src_k[i] >> shift) & 0xFF; + int64_t pos = off[byte]++; + dst_k[pos] = src_k[i]; + dst_i[pos] = src_i[i]; + } + + uint64_t* tk = src_k; src_k = dst_k; dst_k = tk; + int64_t* ti = src_i; src_i = dst_i; dst_i = ti; + } + + return src_i; +} + +/* Context for parallel per-bucket sorting after MSD partition */ +typedef struct { + uint64_t* data_k; /* MSD output: partitioned keys */ + int64_t* data_i; /* MSD output: partitioned indices */ + uint64_t* tmp_k; /* scratch (MSD input buffer, now free) */ + int64_t* tmp_i; + int64_t bucket_offsets[257]; /* prefix-sum of bucket sizes */ + uint8_t n_bytes; /* remaining bytes to sort per bucket */ +} msd_bucket_ctx_t; + +static void msd_bucket_sort_fn(void* arg, uint32_t wid, + int64_t start, int64_t end) { + (void)wid; + msd_bucket_ctx_t* c = (msd_bucket_ctx_t*)arg; + + for (int64_t b = start; b < end; b++) { + int64_t off = c->bucket_offsets[b]; + int64_t cnt = c->bucket_offsets[b + 1] - off; + if (cnt <= 1) continue; + + int64_t* sorted = bucket_lsb_sort( + c->data_k + off, c->data_i + off, + c->tmp_k + off, c->tmp_i + off, + cnt, c->n_bytes); + + /* Ensure result is in the canonical buffer (data_k/data_i). + * bucket_lsb_sort may leave result in the scratch buffer if an + * odd number of scatter passes executed. */ + if (sorted != c->data_i + off) { + memcpy(c->data_k + off, c->tmp_k + off, + (size_t)cnt * sizeof(uint64_t)); + memcpy(c->data_i + off, c->tmp_i + off, + (size_t)cnt * sizeof(int64_t)); + } + } +} + +/* MSD+LSB hybrid radix sort. + * Returns pointer to final sorted indices (always idx_tmp). + * If sorted_keys_out is non-NULL, stores sorted keys pointer (always keys_tmp). + * Falls back to LSB radix sort for small arrays or single-byte keys. */ +int64_t* msd_radix_sort_run(ray_pool_t* pool, + uint64_t* keys, int64_t* indices, + uint64_t* keys_tmp, int64_t* idx_tmp, + int64_t n, uint8_t n_bytes, + uint64_t** sorted_keys_out) { + /* MSD is beneficial when: + * (1) Many significant bytes (≥4) — saving 1 of 4+ LSB passes is worth it. + * (2) Data is large enough that full passes dominate over MSD overhead. + * (3) Average bucket fits in L2 cache (~256KB = 16K elements × 16B). + * For ≤3 byte keys, LSB radix with range-adaptive byte skip is already fast + * and MSD adds partitioning + dispatch overhead without enough payoff. */ + /* MSD adds partitioning + dispatch overhead that only pays off for + * very wide keys (≥6 bytes) where saving multiple LSB passes matters. + * For typical data (≤5 bytes after range analysis), LSB with SWC is faster. */ + if (n_bytes <= 5 || n <= 1000000) { + return radix_sort_run(pool, keys, indices, keys_tmp, idx_tmp, + n, n_bytes, sorted_keys_out); + } + + uint32_t n_tasks = pool ? ray_pool_total_workers(pool) : 1; + if (n_tasks < 1) n_tasks = 1; + + /* Allocate histogram and offsets for MSD pass */ + ray_t *hist_hdr = NULL, *off_hdr = NULL; + uint32_t* hist = (uint32_t*)scratch_alloc(&hist_hdr, + (size_t)n_tasks * 256 * sizeof(uint32_t)); + int64_t* offsets = (int64_t*)scratch_alloc(&off_hdr, + (size_t)n_tasks * 256 * sizeof(int64_t)); + if (!hist || !offsets) { + scratch_free(hist_hdr); scratch_free(off_hdr); + return radix_sort_run(pool, keys, indices, keys_tmp, idx_tmp, + n, n_bytes, sorted_keys_out); + } + + /* MSD pass: partition by the most significant non-uniform byte */ + uint8_t msd_byte = n_bytes - 1; + uint8_t shift = msd_byte * 8; + + radix_pass_ctx_t ctx = { + .keys = keys, .idx = indices, + .keys_out = keys_tmp, .idx_out = idx_tmp, + .n = n, .shift = shift, .n_tasks = n_tasks, + .hist = hist, .offsets = offsets, + }; + + /* Phase 1: parallel histogram */ + if (pool && n_tasks > 1) + ray_pool_dispatch_n(pool, radix_hist_fn, &ctx, n_tasks); + else + radix_hist_fn(&ctx, 0, 0, 1); + + /* Check uniformity */ + bool uniform = false; + for (int b = 0; b < 256; b++) { + uint32_t total = 0; + for (uint32_t t = 0; t < n_tasks; t++) + total += hist[t * 256 + b]; + if (total == (uint32_t)n) { uniform = true; break; } + } + + if (uniform) { + /* All keys share the same MSB — skip this byte, try next */ + scratch_free(hist_hdr); scratch_free(off_hdr); + return msd_radix_sort_run(pool, keys, indices, keys_tmp, idx_tmp, + n, n_bytes - 1, sorted_keys_out); + } + + /* Phase 2: prefix sum → per-task scatter offsets + bucket boundaries */ + int64_t bucket_offsets[257]; + { + int64_t running = 0; + for (int b = 0; b < 256; b++) { + bucket_offsets[b] = running; + for (uint32_t t = 0; t < n_tasks; t++) { + offsets[t * 256 + b] = running; + running += hist[t * 256 + b]; + } + } + bucket_offsets[256] = running; + } + + /* Phase 3: parallel scatter with SWC */ + if (pool && n_tasks > 1) + ray_pool_dispatch_n(pool, radix_scatter_fn, &ctx, n_tasks); + else + radix_scatter_fn(&ctx, 0, 0, 1); + + scratch_free(hist_hdr); + scratch_free(off_hdr); + + /* Data is now in keys_tmp/idx_tmp, partitioned by MSB. + * Sort each bucket independently using the remaining bytes. + * Use keys/indices as scratch (MSD input, now free to reuse). */ + uint8_t remaining_bytes = msd_byte; /* bytes 0..msd_byte-1 */ + + msd_bucket_ctx_t bctx = { + .data_k = keys_tmp, .data_i = idx_tmp, + .tmp_k = keys, .tmp_i = indices, + .n_bytes = remaining_bytes, + }; + memcpy(bctx.bucket_offsets, bucket_offsets, sizeof(bucket_offsets)); + + if (pool) + ray_pool_dispatch_n(pool, msd_bucket_sort_fn, &bctx, 256); + else + msd_bucket_sort_fn(&bctx, 0, 0, 256); + + /* Result is always in keys_tmp/idx_tmp */ + if (sorted_keys_out) *sorted_keys_out = keys_tmp; + return idx_tmp; +} + +/* radix_encode_ctx_t defined in exec_internal.h */ + +void radix_encode_fn(void* arg, uint32_t wid, int64_t start, int64_t end) { + (void)wid; + radix_encode_ctx_t* c = (radix_encode_ctx_t*)arg; + + /* Fused iota: initialize index array alongside key encoding */ + if (c->indices) { + int64_t* idx = c->indices; + for (int64_t i = start; i < end; i++) idx[i] = i; + } + + if (c->n_keys <= 1) { + /* Single-key fast path */ + switch (c->type) { + case RAY_I64: case RAY_TIMESTAMP: { + const int64_t* d = (const int64_t*)c->data; + bool has_nulls = c->col && (c->col->attrs & RAY_ATTR_HAS_NULLS); + bool nf = c->nulls_first; + bool desc = c->desc; + /* Null key: nf=true→sort first, nf=false→sort last. + * For ASC NULLS FIRST → e=0 (smallest) + * For ASC NULLS LAST → e=UINT64_MAX (largest) + * For DESC NULLS FIRST → e=UINT64_MAX (~e=0, smallest after flip) + * For DESC NULLS LAST → e=0 (~e=UINT64_MAX, largest after flip) */ + uint64_t null_e = (nf ^ desc) ? 0 : UINT64_MAX; + if (desc) { + for (int64_t i = start; i < end; i++) { + if (has_nulls && ray_vec_is_null(c->col, i)) + c->keys[i] = ~null_e; + else + c->keys[i] = ~((uint64_t)d[i] ^ ((uint64_t)1 << 63)); + } + } else { + for (int64_t i = start; i < end; i++) { + if (has_nulls && ray_vec_is_null(c->col, i)) + c->keys[i] = null_e; + else + c->keys[i] = (uint64_t)d[i] ^ ((uint64_t)1 << 63); + } + } + break; + } + case RAY_F64: { + const double* d = (const double*)c->data; + bool nf = c->nulls_first; + bool desc = c->desc; + /* NaN override: encode NaN so it sorts first or last. + * For ASC NULLS FIRST → e=0 (smallest key) + * For ASC NULLS LAST → e=UINT64_MAX (largest key) + * For DESC NULLS FIRST → e=UINT64_MAX (~e=0, smallest) + * For DESC NULLS LAST → e=0 (~e=UINT64_MAX, largest) + * Pattern: e = (nf ^ desc) ? 0 : UINT64_MAX */ + uint64_t nan_e = (nf ^ desc) ? 0 : UINT64_MAX; + for (int64_t i = start; i < end; i++) { + uint64_t bits; + memcpy(&bits, &d[i], 8); + /* NaN: exponent all-1s (0x7FF) and mantissa non-zero */ + if ((bits & 0x7FF0000000000000ULL) == 0x7FF0000000000000ULL && + (bits & 0x000FFFFFFFFFFFFFULL)) { + c->keys[i] = desc ? ~nan_e : nan_e; + } else { + uint64_t mask = -(bits >> 63) | ((uint64_t)1 << 63); + uint64_t e = bits ^ mask; + c->keys[i] = desc ? ~e : e; + } + } + break; + } + case RAY_I32: case RAY_DATE: case RAY_TIME: { + const int32_t* d = (const int32_t*)c->data; + bool has_nulls = c->col && (c->col->attrs & RAY_ATTR_HAS_NULLS); + bool nf = c->nulls_first; + bool desc = c->desc; + uint64_t null_e = (nf ^ desc) ? 0 : UINT64_MAX; + if (desc) { + for (int64_t i = start; i < end; i++) { + if (has_nulls && ray_vec_is_null(c->col, i)) + c->keys[i] = ~null_e; + else + c->keys[i] = ~((uint64_t)((uint32_t)d[i] ^ ((uint32_t)1 << 31))); + } + } else { + for (int64_t i = start; i < end; i++) { + if (has_nulls && ray_vec_is_null(c->col, i)) + c->keys[i] = null_e; + else + c->keys[i] = (uint64_t)((uint32_t)d[i] ^ ((uint32_t)1 << 31)); + } + } + break; + } + case RAY_SYM: { + const uint32_t* rank = c->enum_rank; + if (c->desc) { + for (int64_t i = start; i < end; i++) { + uint32_t raw = (uint32_t)ray_read_sym(c->data, i, c->type, c->col_attrs); + c->keys[i] = ~(uint64_t)rank[raw]; + } + } else { + for (int64_t i = start; i < end; i++) { + uint32_t raw = (uint32_t)ray_read_sym(c->data, i, c->type, c->col_attrs); + c->keys[i] = (uint64_t)rank[raw]; + } + } + break; + } + case RAY_I16: { + const int16_t* d = (const int16_t*)c->data; + if (c->desc) { + for (int64_t i = start; i < end; i++) + c->keys[i] = ~((uint64_t)((uint16_t)d[i] ^ ((uint16_t)1 << 15))); + } else { + for (int64_t i = start; i < end; i++) + c->keys[i] = (uint64_t)((uint16_t)d[i] ^ ((uint16_t)1 << 15)); + } + break; + } + case RAY_BOOL: case RAY_U8: { + const uint8_t* d = (const uint8_t*)c->data; + if (c->desc) { + for (int64_t i = start; i < end; i++) + c->keys[i] = ~(uint64_t)d[i]; + } else { + for (int64_t i = start; i < end; i++) + c->keys[i] = (uint64_t)d[i]; + } + break; + } + } + } else { + /* Composite-key encoding */ + for (int64_t i = start; i < end; i++) { + uint64_t composite = 0; + for (uint8_t k = 0; k < c->n_keys; k++) { + ray_t* col = c->vecs[k]; + int64_t val; + if (c->enum_ranks[k]) { + uint32_t raw = (uint32_t)ray_read_sym(ray_data(col), i, col->type, col->attrs); + val = (int64_t)c->enum_ranks[k][raw]; + } else if (col->type == RAY_I64 || col->type == RAY_TIMESTAMP) { + val = ((const int64_t*)ray_data(col))[i]; + } else if (col->type == RAY_F64) { + uint64_t bits; + memcpy(&bits, &((const double*)ray_data(col))[i], 8); + uint64_t mask = -(bits >> 63) | ((uint64_t)1 << 63); + val = (int64_t)(bits ^ mask); + } else if (col->type == RAY_I32 || col->type == RAY_DATE || col->type == RAY_TIME) { + val = (int64_t)((const int32_t*)ray_data(col))[i]; + } else if (col->type == RAY_I16) { + val = (int64_t)((const int16_t*)ray_data(col))[i]; + } else if (col->type == RAY_BOOL || col->type == RAY_U8) { + val = (int64_t)((const uint8_t*)ray_data(col))[i]; + } else { + val = 0; + } + uint64_t part = (uint64_t)val - (uint64_t)c->mins[k]; + if (c->descs[k]) part = (uint64_t)c->ranges[k] - part; + composite |= part << c->bit_shifts[k]; + } + c->keys[i] = composite; + } + } +} + +/* ============================================================================ + * Adaptive string sort (single-key RAY_STR) + * + * Pipeline: + * 1. Null partition — move nulls to sorted_idx[n_live..nrows). + * 2. Probe — one linear pass over the non-null range computes + * • max_len (→ key width) + * • run_count / run_all_asc/desc (→ pre-sorted short-circuit) + * • card_estimate on the first 1024 rows via an exact hashset + * (future-facing; unused today) + * Every downstream decision is taken from these runtime numbers — + * nothing in this file branches on "we know the bench is str8". + * 3. Single-run short-circuit — if the probe reports one monotone + * run across the entire non-null range, we're done: copy (or + * reverse, for DESC × ASC mismatch) and skip sorting entirely. + * This is the vergesort trivial case; the general multi-run + * merge path is scoped for a follow-up. + * 4. Key materialization — pack each non-null string into a record + * struct { uint64_t parts[parts]; uint32_t row; uint32_t len; } + * where parts = min(4, ceil(max_len/8)) and each part holds 8 + * bytes of the string byte-swapped into big-endian u64 form, so + * raw u64 comparison == lex comparison. One sequential pass + * over the input, zero per-byte function calls downstream. + * 5. American-Flag in-place MSD byte radix on the packed records. + * Top-level byte histogram → 256 buckets → one in-place swap + * pass → recurse. Sub-base-case buckets (≤ 24) finish with + * insertion sort using the full multi-u64 comparator. When + * recursion exhausts the packed prefix (depth == parts*8), + * ties fall through to a tail comparator that walks the + * original bytes via ray_str_t_cmp — the only place cold + * pool memory is touched during the sort proper. + * 6. Scatter row indices back to sorted_idx. + * 7. DESC reverses the non-null range; nulls-first rotates nulls + * to the front. + * + * Every threshold and resource allocation here is driven by runtime + * numbers (n, max_len, worker count) or machine geometry (cache line, + * pool workers) — never by assumptions about input shape. + * ============================================================================ */ + +#define RAY_STRSORT_KEY_PARTS_MAX 4 /* 32-byte packed prefix cap */ +#define RAY_STRSORT_BASE_CASE 24 /* small-bucket insertion-sort threshold */ +#define RAY_STRSORT_PROBE_HEAD 1024 /* rows sampled for exact distinct count */ + +typedef struct { + uint64_t parts[RAY_STRSORT_KEY_PARTS_MAX]; + uint32_t row; + uint32_t len; +} ray_strkey_t; + +/* Convert a native-endian u64 to big-endian so raw u64 comparison yields + * lex order over the original byte layout. On LE targets (everything we + * build for today) this is a single bswap instruction. */ +static inline uint64_t strkey_lex_u64(uint64_t v) { +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return __builtin_bswap64(v); +#else + return v; +#endif +} + +/* Load 8 bytes starting at src[offset], zero-padding past `len`, then + * byte-swap into lex u64 form. Returns 0 when offset ≥ len. */ +static inline uint64_t strkey_load_part(const char* src, int64_t len, int offset) { + int64_t remaining = len - offset; + if (remaining <= 0) return 0; + uint64_t raw = 0; + int64_t take = remaining < 8 ? remaining : 8; + memcpy(&raw, src + offset, (size_t)take); + return strkey_lex_u64(raw); +} + +/* Full-depth comparator. Fast path: the packed parts. Tail fallback: + * only fires if both records have len > parts*8 and their packed + * prefixes are equal — touches pool memory via ray_str_t_cmp only + * at the base case, never during the radix partitioning loop. */ +static int strkey_cmp(const ray_strkey_t* a, const ray_strkey_t* b, + int parts, + const ray_str_t* elems, const char* pool) { + for (int p = 0; p < parts; p++) { + if (a->parts[p] < b->parts[p]) return -1; + if (a->parts[p] > b->parts[p]) return 1; + } + int64_t parts_bytes = (int64_t)parts * 8; + /* Both strings fit inside the packed prefix — the only way their + * parts can tie is if one is a zero-padded suffix of the other, in + * which case the shorter one sorts first. (Equal length means they + * are actually equal and stability via row is handled by the caller.) */ + if ((int64_t)a->len <= parts_bytes && (int64_t)b->len <= parts_bytes) { + return (int)a->len - (int)b->len; + } + /* Tail comparison on bytes [parts_bytes, len). */ + const ray_str_t* sa = &elems[a->row]; + const ray_str_t* sb = &elems[b->row]; + const char* pa = ray_str_t_ptr(sa, pool); + const char* pb = ray_str_t_ptr(sb, pool); + int64_t la = (int64_t)sa->len - parts_bytes; if (la < 0) la = 0; + int64_t lb = (int64_t)sb->len - parts_bytes; if (lb < 0) lb = 0; + int64_t m = la < lb ? la : lb; + int r = m ? memcmp(pa + parts_bytes, pb + parts_bytes, (size_t)m) : 0; + if (r != 0) return r; + return (la > lb) - (la < lb); +} + +static void strkey_insertion_sort(ray_strkey_t* a, int64_t n, int parts, + const ray_str_t* elems, const char* pool) { + for (int64_t i = 1; i < n; i++) { + ray_strkey_t cur = a[i]; + int64_t j = i - 1; + while (j >= 0 && strkey_cmp(&a[j], &cur, parts, elems, pool) > 0) { + a[j + 1] = a[j]; + j--; + } + a[j + 1] = cur; + } +} + +/* Extract the bp'th big-endian byte of the packed prefix. */ +static inline uint8_t strkey_byte_at(const ray_strkey_t* k, int bp) { + int part = bp >> 3; + int shift = 56 - ((bp & 7) << 3); + return (uint8_t)(k->parts[part] >> shift); +} + +/* Cheap max-len probe — one sequential pass over the `len` field of each + * live row's ray_str_t. Reads only 4 bytes per row (the len), so at 10M + * rows this is ~5ms bandwidth-bound. Everything else the old probe + * computed (monotonicity, distinct-count sample) is folded into the + * parallel key-build pass below, where it's nearly free. */ +static int strsort_probe_parts(const int64_t* indices, int64_t n_live, + const ray_str_t* elems) { + int64_t max_len = 0; + for (int64_t i = 0; i < n_live; i++) { + int64_t l = (int64_t)elems[indices[i]].len; + if (l > max_len) max_len = l; + } + int64_t pcalc = (max_len + 7) / 8; + if (pcalc < 1) pcalc = 1; + if (pcalc > RAY_STRSORT_KEY_PARTS_MAX) pcalc = RAY_STRSORT_KEY_PARTS_MAX; + return (int)pcalc; +} + +/* Parallel key materialization (morsel range). */ +typedef struct { + ray_strkey_t* out; + const int64_t* indices; + const ray_str_t* elems; + const char* pool; + int parts; +} strsort_build_ctx_t; + +static void strsort_build_fn(void* vctx, uint32_t wid, int64_t s, int64_t e) { + (void)wid; + strsort_build_ctx_t* c = (strsort_build_ctx_t*)vctx; + for (int64_t i = s; i < e; i++) { + int64_t row = c->indices[i]; + const ray_str_t* str = &c->elems[row]; + c->out[i].row = (uint32_t)row; + c->out[i].len = str->len; + int64_t len = str->len; + const char* src = len ? ray_str_t_ptr(str, c->pool) : NULL; + for (int p = 0; p < RAY_STRSORT_KEY_PARTS_MAX; p++) { + c->out[i].parts[p] = (p < c->parts) + ? strkey_load_part(src, len, p * 8) + : 0; + } + } +} + +static void strsort_build_keys(ray_strkey_t* out, int64_t n_live, + const int64_t* indices, + const ray_str_t* elems, const char* pool, + int parts) { + strsort_build_ctx_t c = { out, indices, elems, pool, parts }; + ray_pool_t* p = ray_pool_get(); + if (p && n_live >= RAY_PARALLEL_THRESHOLD) { + ray_pool_dispatch(p, strsort_build_fn, &c, n_live); + } else { + strsort_build_fn(&c, 0, 0, n_live); + } +} + +/* Emit sorted row indices back to sorted_idx (parallel). */ +typedef struct { + int64_t* out; + const ray_strkey_t* keys; +} strsort_emit_ctx_t; + +static void strsort_emit_fn(void* vctx, uint32_t wid, int64_t s, int64_t e) { + (void)wid; + strsort_emit_ctx_t* c = (strsort_emit_ctx_t*)vctx; + for (int64_t i = s; i < e; i++) c->out[i] = (int64_t)c->keys[i].row; +} + +/* Packed-key lexicographic compare. Fast path for run-detection and + * insertion sort at the radix base case. No pool access. */ +static inline int strkey_cmp_packed(const ray_strkey_t* a, + const ray_strkey_t* b, int parts) { + for (int p = 0; p < parts; p++) { + if (a->parts[p] < b->parts[p]) return -1; + if (a->parts[p] > b->parts[p]) return 1; + } + return (int)a->len - (int)b->len; +} + +/* Sequential run detection over packed keys, with early abort. + * For random data the first inversion appears within a few elements + * and the scan exits in O(1). For fully sorted data it does one + * linear pass over the packed key array (contiguous memory, ~10ms + * sequential at 10M × 40B records — bandwidth bound). + * Returns the detected direction: -1 = all descending, +1 = all + * ascending, 0 = neither (or tail bytes remain to be sorted). + * + * IMPORTANT: when two adjacent packed keys tie AND either string is + * longer than the packed window, we CANNOT declare a sorted run — + * the tail bytes may impose ordering we haven't examined. The + * shortcut is safe only when every pair is either strictly ordered + * by the packed key or both sides fit entirely inside the window. */ +static int strsort_detect_runs(const ray_strkey_t* keys, int64_t n, + int parts, int parts_bytes) { + if (n < 2) return 0; + bool asc = true, desc = true; + for (int64_t i = 1; i < n; i++) { + int r = strkey_cmp_packed(&keys[i - 1], &keys[i], parts); + if (r == 0) { + if ((int64_t)keys[i - 1].len > parts_bytes || + (int64_t)keys[i].len > parts_bytes) { + /* Tail bytes unresolved — fall through to the real sort. */ + return 0; + } + /* Both fully fit in the packed prefix and their parts tie + * → the strings are equal in the sorted order, which is + * compatible with both ascending and descending runs. */ + } else if (r > 0) { + asc = false; + } else { + desc = false; + } + if (!asc && !desc) return 0; + } + if (asc) return 1; + if (desc) return -1; + return 0; +} + +/* Parallel top-level byte-0 partition: per-task histogram, global + * prefix-sum, parallel scatter into a second contiguous buffer. + * This is the same pattern as the numeric radix_sort_run up above, + * adapted for 40-byte packed string keys. */ +typedef struct { + const ray_strkey_t* src; + ray_strkey_t* dst; + int64_t n; + uint32_t n_tasks; + uint32_t* hist; /* [n_tasks × 256] */ + int64_t* offsets; /* [n_tasks × 256] */ +} strsort_top_ctx_t; + +static void strsort_top_hist_fn(void* vctx, uint32_t wid, + int64_t start, int64_t end) { + (void)wid; (void)end; + strsort_top_ctx_t* c = (strsort_top_ctx_t*)vctx; + int64_t task = start; + uint32_t* h = c->hist + task * 256; + memset(h, 0, 256 * sizeof(uint32_t)); + int64_t chunk = (c->n + c->n_tasks - 1) / c->n_tasks; + int64_t lo = task * chunk; + int64_t hi = lo + chunk; + if (hi > c->n) hi = c->n; + if (lo >= hi) return; + const ray_strkey_t* src = c->src; + for (int64_t i = lo; i < hi; i++) { + h[strkey_byte_at(&src[i], 0)]++; + } +} + +static void strsort_top_scatter_fn(void* vctx, uint32_t wid, + int64_t start, int64_t end) { + (void)wid; (void)end; + strsort_top_ctx_t* c = (strsort_top_ctx_t*)vctx; + int64_t task = start; + int64_t chunk = (c->n + c->n_tasks - 1) / c->n_tasks; + int64_t lo = task * chunk; + int64_t hi = lo + chunk; + if (hi > c->n) hi = c->n; + if (lo >= hi) return; + int64_t* off = c->offsets + task * 256; + const ray_strkey_t* src = c->src; + ray_strkey_t* dst = c->dst; + for (int64_t i = lo; i < hi; i++) { + uint8_t b = strkey_byte_at(&src[i], 0); + dst[off[b]++] = src[i]; + } +} + +/* Bucket dispatch context: each task sorts one top-level bucket. */ +typedef struct { + ray_strkey_t* keys; + const int64_t* starts; + const int64_t* counts; + int parts_bytes; + int64_t base_offset; + const ray_str_t* elems; + const char* pool; + int parts; + int start_bp; /* byte position to begin radix within bucket */ +} strsort_bucket_ctx_t; + +static void strsort_aflag(ray_strkey_t* keys, int64_t n, int bp, + int parts_bytes, int64_t base_offset, + const ray_str_t* elems, const char* pool, + int parts); + +static void strsort_bucket_fn(void* vctx, uint32_t wid, int64_t s, int64_t e) { + (void)wid; + strsort_bucket_ctx_t* c = (strsort_bucket_ctx_t*)vctx; + for (int64_t b = s; b < e; b++) { + int64_t cnt = c->counts[b]; + if (cnt <= 1) continue; + strsort_aflag(c->keys + c->starts[b], cnt, c->start_bp, + c->parts_bytes, c->base_offset, + c->elems, c->pool, c->parts); + } +} + +/* In-place quicksort by packed key `len` field. Used as the + * finalization step for buckets where every record's string ended + * at or before the current base_offset — such records tied on the + * packed prefix but still need to be ordered by length (shorter + * strings sort before longer ones that extend them, per + * ray_str_t_cmp). Single-key integer quicksort with median-of-3 + * pivot; stack depth bounded via tail-recursion on the larger half. + * Falls back to insertion sort for small ranges. */ +static void strkey_qsort_by_len(ray_strkey_t* a, int64_t lo, int64_t hi) { + while (hi - lo > 16) { + int64_t mid = lo + (hi - lo) / 2; + /* Median-of-3. */ + if (a[lo].len > a[hi].len) { ray_strkey_t t=a[lo]; a[lo]=a[hi]; a[hi]=t; } + if (a[mid].len > a[hi].len) { ray_strkey_t t=a[mid]; a[mid]=a[hi]; a[hi]=t; } + if (a[lo].len > a[mid].len) { ray_strkey_t t=a[lo]; a[lo]=a[mid]; a[mid]=t; } + uint32_t pivot = a[mid].len; + /* Hoare partition. */ + int64_t i = lo - 1, j = hi + 1; + for (;;) { + do { i++; } while (a[i].len < pivot); + do { j--; } while (a[j].len > pivot); + if (i >= j) break; + ray_strkey_t t = a[i]; a[i] = a[j]; a[j] = t; + } + /* Recurse on smaller half, loop on the larger. */ + if (j - lo < hi - (j + 1)) { + strkey_qsort_by_len(a, lo, j); + lo = j + 1; + } else { + strkey_qsort_by_len(a, j + 1, hi); + hi = j; + } + } + /* Insertion sort base case. */ + for (int64_t i = lo + 1; i <= hi; i++) { + ray_strkey_t cur = a[i]; + int64_t j = i - 1; + while (j >= lo && a[j].len > cur.len) { + a[j + 1] = a[j]; + j--; + } + a[j + 1] = cur; + } +} + +/* Re-pack the next window of bytes for records whose previous window + * tied on the full packed prefix. `base_offset` is the byte position + * in the original string that will become byte 0 of the new packed + * prefix. Returns true if any record still has bytes to contribute + * past base_offset — false means every record's string ended at or + * before base_offset. + * + * When this returns false the caller MUST NOT simply move on: strings + * that ended before base_offset may still have differing lengths, and + * ray_str_t_cmp sorts shorter-before-longer on tie. We handle that + * right here by sorting the bucket in place on `len` before returning, + * so the caller can just stop recursing. */ +static bool strsort_repack_window(ray_strkey_t* keys, int64_t n, + int64_t base_offset, + const ray_str_t* elems, const char* pool, + int parts) { + bool any_tail = false; + /* Track min/max len alongside the repack so we can skip the + * finalize-by-len step when every string in the bucket has the + * same length — the very common case where the bucket is full + * of identical strings (e.g. few_unique radix sub-bucket). */ + uint32_t min_len = UINT32_MAX; + uint32_t max_len = 0; + for (int64_t i = 0; i < n; i++) { + const ray_str_t* s = &elems[keys[i].row]; + int64_t len = s->len; + if (len > base_offset) any_tail = true; + if ((uint32_t)len < min_len) min_len = (uint32_t)len; + if ((uint32_t)len > max_len) max_len = (uint32_t)len; + const char* src = len > 0 ? ray_str_t_ptr(s, pool) : NULL; + for (int p = 0; p < parts; p++) { + int64_t off = base_offset + (int64_t)p * 8; + keys[i].parts[p] = (src && len > off) + ? strkey_load_part(src, len, (int)off) + : 0; + } + } + if (!any_tail && n > 1 && min_len != max_len) { + /* Every string ended at or before base_offset, they tied on + * the zero-padded packed prefix, and at least two of them + * differ in length. A string of length 3 whose bytes match + * a prefix of a length-5 string must sort before it (per + * ray_str_t_cmp), so finalize the bucket by sorting on len. + * When min_len == max_len every record is bitwise equal and + * any order is valid — we skip the sort entirely. */ + strkey_qsort_by_len(keys, 0, n - 1); + } + return any_tail; +} + +/* American Flag in-place MSD byte radix on keys[0..n) at byte position bp + * within the current window. All records share the same prefix from + * byte 0 up to `base_offset + bp` of the original string. When the + * current window is exhausted (`bp >= parts_bytes`) we re-pack the next + * window and continue — keeps worst case at O(total_bytes) even when + * records share arbitrarily long common prefixes. + * + * parts_bytes = parts * 8 (cached). base_offset tracks how many bytes + * of the original string have already been consumed by earlier windows. */ +static void strsort_aflag(ray_strkey_t* keys, int64_t n, int bp, + int parts_bytes, int64_t base_offset, + const ray_str_t* elems, const char* pool, + int parts) { + /* Tail-recursive inline loop on the largest bucket to bound stack + * depth independent of n. */ + for (;;) { + if (n <= 1) return; + if (n <= RAY_STRSORT_BASE_CASE) { + /* Small bucket — finish with a bounded comparison sort. + * strkey_cmp walks the original string bytes past the + * current window when necessary, so long tails are fine + * at this size. */ + strkey_insertion_sort(keys, n, parts, elems, pool); + return; + } + if (bp >= parts_bytes) { + /* Exhausted the packed prefix for this window with a big + * bucket still to resolve. Re-pack the next window and + * restart the radix — keeps total work linear in string + * bytes, never quadratic. */ + int64_t next_offset = base_offset + parts_bytes; + if (!strsort_repack_window(keys, n, next_offset, + elems, pool, parts)) { + /* Every record's string ends at or before next_offset; + * they are all equal from here on, order preserved. */ + return; + } + base_offset = next_offset; + bp = 0; + continue; + } + + int64_t counts[256] = {0}; + for (int64_t i = 0; i < n; i++) { + counts[strkey_byte_at(&keys[i], bp)]++; + } + /* Fast path: all records share the same byte at this position. + * Skip the partition pass and advance one byte deeper. */ + int uniq_b = -1; + bool uniform = true; + for (int b = 0; b < 256; b++) { + if (counts[b] == 0) continue; + if (uniq_b < 0) uniq_b = b; + else { uniform = false; break; } + } + if (uniform) { + bp++; + continue; + } + + int64_t starts[256]; + int64_t ends[256]; + { + int64_t sum = 0; + for (int b = 0; b < 256; b++) { + starts[b] = sum; + sum += counts[b]; + ends[b] = sum; + } + } + + /* In-place swap loop: classic American Flag. For each bucket b, + * drain records out of its slice whose current byte != b into + * their correct destination, cycling until the bucket slice + * contains only records that belong in b. */ + int64_t cursors[256]; + memcpy(cursors, starts, sizeof(cursors)); + for (int b = 0; b < 256; b++) { + while (cursors[b] < ends[b]) { + ray_strkey_t v = keys[cursors[b]]; + int bb = strkey_byte_at(&v, bp); + while (bb != b) { + ray_strkey_t tmp = keys[cursors[bb]]; + keys[cursors[bb]] = v; + cursors[bb]++; + v = tmp; + bb = strkey_byte_at(&v, bp); + } + keys[cursors[b]] = v; + cursors[b]++; + } + } + + /* Find the largest bucket; recurse on the rest and loop on the + * largest to keep stack shallow. */ + int big_b = 0; + int64_t big_cnt = counts[0]; + for (int b = 1; b < 256; b++) { + if (counts[b] > big_cnt) { big_cnt = counts[b]; big_b = b; } + } + for (int b = 0; b < 256; b++) { + if (b == big_b) continue; + int64_t cnt = counts[b]; + if (cnt > 1) { + strsort_aflag(keys + starts[b], cnt, bp + 1, + parts_bytes, base_offset, elems, pool, parts); + } + } + keys += starts[big_b]; + n = big_cnt; + bp++; + } +} + +/* Top-level adaptive string sort. Nulls partitioned first, then the + * non-null range runs through probe → single-run short-circuit → + * key materialization → American-Flag MSD → scatter row indices back. + * Returns false on OOM (caller should fall back to comparison sort). */ +static bool sort_str_msd_inplace(int64_t* sorted_idx, int64_t nrows, + ray_t* col, bool desc, bool nulls_first) { + if (nrows <= 0) return true; + + /* Initial iota — caller may or may not have already filled it. */ + for (int64_t i = 0; i < nrows; i++) sorted_idx[i] = i; + + /* Partition nulls to the tail. Slice vecs inherit the null bitmap + * from slice_parent, so check both attr slots — matches the + * exec_sort post-sort propagation pattern. */ + int64_t null_count = 0; + bool has_nulls = (col->attrs & RAY_ATTR_HAS_NULLS) || + ((col->attrs & RAY_ATTR_SLICE) && col->slice_parent && + (col->slice_parent->attrs & RAY_ATTR_HAS_NULLS)); + if (has_nulls) { + int64_t w = 0; + int64_t null_pos; + for (int64_t i = 0; i < nrows; i++) { + if (!ray_vec_is_null(col, i)) sorted_idx[w++] = i; + } + null_count = nrows - w; + null_pos = w; + for (int64_t i = 0; i < nrows; i++) { + if (ray_vec_is_null(col, i)) sorted_idx[null_pos++] = i; + } + } + int64_t n_live = nrows - null_count; + + if (n_live > 1) { + const ray_str_t* elems; + const char* pool; + str_resolve(col, &elems, &pool); + ray_pool_t* pool_p = ray_pool_get(); + bool go_parallel = (pool_p && n_live >= RAY_PARALLEL_THRESHOLD); + + /* --- Cheap max-len probe (one pass over len fields). --- + * Chooses how many 8-byte parts to pack per key. Everything + * else (monotonicity, cardinality sampling) is folded into the + * key-build / run-detection passes below. */ + int parts = strsort_probe_parts(sorted_idx, n_live, elems); + int parts_bytes = parts * 8; + + /* --- Parallel key materialization. --- */ + ray_t* keys_hdr = NULL; + ray_strkey_t* keys = (ray_strkey_t*)scratch_alloc(&keys_hdr, + (size_t)n_live * sizeof(ray_strkey_t)); + if (!keys) return false; + strsort_build_keys(keys, n_live, sorted_idx, elems, pool, parts); + + /* --- Vergesort run detection on packed keys. --- + * Early-aborts on the first inversion (so random input pays O(1)). + * When the entire non-null range is a single monotone run we + * skip the sort proper and emit row indices directly. */ + int run_dir = strsort_detect_runs(keys, n_live, parts, parts_bytes); + bool want_asc = !desc; + if (run_dir == 1 && want_asc) { + /* Already ascending — emit as-is. */ + strsort_emit_ctx_t ectx = { sorted_idx, keys }; + if (go_parallel) + ray_pool_dispatch(pool_p, strsort_emit_fn, &ectx, n_live); + else + strsort_emit_fn(&ectx, 0, 0, n_live); + } else if (run_dir == -1 && !want_asc) { + /* Already descending — emit as-is. */ + strsort_emit_ctx_t ectx = { sorted_idx, keys }; + if (go_parallel) + ray_pool_dispatch(pool_p, strsort_emit_fn, &ectx, n_live); + else + strsort_emit_fn(&ectx, 0, 0, n_live); + } else if (run_dir != 0) { + /* Single run but wrong direction — emit row-indices reversed. */ + for (int64_t i = 0, j = n_live - 1; i < j; i++, j--) { + ray_strkey_t t = keys[i]; keys[i] = keys[j]; keys[j] = t; + } + strsort_emit_ctx_t ectx = { sorted_idx, keys }; + if (go_parallel) + ray_pool_dispatch(pool_p, strsort_emit_fn, &ectx, n_live); + else + strsort_emit_fn(&ectx, 0, 0, n_live); + } else { + /* --- Top-level byte-0 partition. --- + * When parallel: per-task histograms, prefix-sum, parallel + * scatter into a second contiguous buffer, pointer-swap + * so `keys` holds the partitioned records. When sequential: + * single-pass American-Flag in-place swap loop. */ + ray_t* tmp_hdr = NULL; + ray_strkey_t* keys_sorted = keys; /* where the final data lands */ + + if (!go_parallel || parts_bytes == 0) { + strsort_aflag(keys, n_live, /*bp=*/0, parts_bytes, + /*base_offset=*/0, elems, pool, parts); + } else { + ray_strkey_t* tmp = (ray_strkey_t*)scratch_alloc(&tmp_hdr, + (size_t)n_live * sizeof(ray_strkey_t)); + if (!tmp) { + /* Fall back to sequential sort on OOM. */ + strsort_aflag(keys, n_live, /*bp=*/0, parts_bytes, + /*base_offset=*/0, elems, pool, parts); + } else { + uint32_t n_tasks = ray_pool_total_workers(pool_p); + if (n_tasks < 1) n_tasks = 1; + + ray_t* hist_hdr = NULL; + ray_t* off_hdr = NULL; + uint32_t* hist = (uint32_t*)scratch_alloc(&hist_hdr, + (size_t)n_tasks * 256 * sizeof(uint32_t)); + int64_t* off = (int64_t*)scratch_alloc(&off_hdr, + (size_t)n_tasks * 256 * sizeof(int64_t)); + if (!hist || !off) { + /* Free only the hist/off scratch we own here; tmp_hdr + * belongs to the outer cleanup block (line below) and + * MUST NOT be freed twice. */ + scratch_free(hist_hdr); scratch_free(off_hdr); + strsort_aflag(keys, n_live, /*bp=*/0, parts_bytes, + /*base_offset=*/0, elems, pool, parts); + } else { + strsort_top_ctx_t tctx = { + .src = keys, .dst = tmp, .n = n_live, + .n_tasks = n_tasks, .hist = hist, .offsets = off, + }; + + /* Phase 1: parallel histogram. */ + ray_pool_dispatch_n(pool_p, strsort_top_hist_fn, + &tctx, n_tasks); + + /* Phase 2: sequential prefix-sum. For each bucket + * b, the starting offset is the sum of all counts + * in earlier buckets plus all counts in earlier + * tasks for this bucket. */ + int64_t bucket_counts[256]; + int64_t bucket_starts[256]; + int64_t sum = 0; + for (int b = 0; b < 256; b++) { + bucket_starts[b] = sum; + int64_t bc = 0; + for (uint32_t t = 0; t < n_tasks; t++) { + off[t * 256 + b] = sum + bc; + bc += hist[t * 256 + b]; + } + bucket_counts[b] = bc; + sum += bc; + } + + /* Phase 3: parallel scatter into tmp. */ + ray_pool_dispatch_n(pool_p, strsort_top_scatter_fn, + &tctx, n_tasks); + + /* tmp now holds the records partitioned by byte 0. */ + scratch_free(hist_hdr); + scratch_free(off_hdr); + + /* Phase 4: parallel per-bucket recursive sort. */ + strsort_bucket_ctx_t bctx = { + .keys = tmp, + .starts = bucket_starts, + .counts = bucket_counts, + .parts_bytes = parts_bytes, + .base_offset = 0, + .elems = elems, + .pool = pool, + .parts = parts, + .start_bp = 1, + }; + ray_pool_dispatch_n(pool_p, strsort_bucket_fn, + &bctx, 256); + + keys_sorted = tmp; + } + } + } + + /* Scatter row indices back (ASC order, parallel). */ + strsort_emit_ctx_t ectx = { sorted_idx, keys_sorted }; + if (go_parallel) + ray_pool_dispatch(pool_p, strsort_emit_fn, &ectx, n_live); + else + strsort_emit_fn(&ectx, 0, 0, n_live); + + if (tmp_hdr) scratch_free(tmp_hdr); + + /* DESC reverses the sorted non-null range. */ + if (desc) { + for (int64_t i = 0, j = n_live - 1; i < j; i++, j--) { + int64_t t = sorted_idx[i]; + sorted_idx[i] = sorted_idx[j]; + sorted_idx[j] = t; + } + } + } + + scratch_free(keys_hdr); + } + + /* If nulls should be first, rotate them to the front. */ + if (null_count > 0 && nulls_first) { + /* Cheap rotation via three reverses: + * reverse [0, n_live); reverse [n_live, nrows); reverse [0, nrows) + * Takes O(nrows) swaps, no extra memory. */ + int64_t a = 0, b = n_live - 1; + while (a < b) { int64_t t = sorted_idx[a]; sorted_idx[a] = sorted_idx[b]; sorted_idx[b] = t; a++; b--; } + a = n_live; b = nrows - 1; + while (a < b) { int64_t t = sorted_idx[a]; sorted_idx[a] = sorted_idx[b]; sorted_idx[b] = t; a++; b--; } + a = 0; b = nrows - 1; + while (a < b) { int64_t t = sorted_idx[a]; sorted_idx[a] = sorted_idx[b]; sorted_idx[b] = t; a++; b--; } + } + + return true; +} + +/* Build SYM rank mapping: intern_id → sorted rank by string value. + * Caller must scratch_free(*hdr_out) when done. + * Returns pointer to rank array of size (max_id + 1), or NULL on error. */ +/* Parallel max_id scan context */ +typedef struct { + const void* data; + int8_t type; + uint8_t attrs; + uint32_t* pw_max; /* per-worker max */ +} enum_max_ctx_t; + +static void enum_max_fn(void* arg, uint32_t wid, + int64_t start, int64_t end) { + enum_max_ctx_t* c = (enum_max_ctx_t*)arg; + uint32_t local_max = c->pw_max[wid]; + for (int64_t i = start; i < end; i++) { + uint32_t v = (uint32_t)ray_read_sym(c->data, i, c->type, c->attrs); + if (v > local_max) local_max = v; + } + c->pw_max[wid] = local_max; +} + +uint32_t* build_enum_rank(ray_t* col, int64_t nrows, ray_t** hdr_out) { + const void* data = ray_data(col); + int8_t type = col->type; + uint8_t attrs = col->attrs; + + /* Find max intern ID (parallel for large columns) */ + uint32_t max_id = 0; + ray_pool_t* pool = ray_pool_get(); + if (pool && nrows > 100000) { + uint32_t nw = ray_pool_total_workers(pool); + uint32_t pw_max[nw]; + memset(pw_max, 0, nw * sizeof(uint32_t)); + enum_max_ctx_t ectx = { .data = data, .type = type, .attrs = attrs, .pw_max = pw_max }; + ray_pool_dispatch(pool, enum_max_fn, &ectx, nrows); + for (uint32_t w = 0; w < nw; w++) + if (pw_max[w] > max_id) max_id = pw_max[w]; + } else { + for (int64_t i = 0; i < nrows; i++) { + uint32_t v = (uint32_t)ray_read_sym(data, i, type, attrs); + if (v > max_id) max_id = v; + } + } + + if (max_id >= UINT32_MAX - 1) { *hdr_out = NULL; return NULL; } + uint32_t n_ids = max_id + 1; + + /* Arena for temporaries (ids, ptrs, lens, tmp) — single reset at end */ + ray_scratch_arena_t arena; + ray_scratch_arena_init(&arena); + + /* Allocate array of intern IDs to sort */ + uint32_t* ids = (uint32_t*)ray_scratch_arena_push(&arena, + (size_t)n_ids * sizeof(uint32_t)); + if (!ids) { ray_scratch_arena_reset(&arena); *hdr_out = NULL; return NULL; } + for (uint32_t i = 0; i < n_ids; i++) ids[i] = i; + + /* Pre-cache raw string pointers and lengths for fast comparison */ + const char** ptrs = (const char**)ray_scratch_arena_push(&arena, + (size_t)n_ids * sizeof(const char*)); + uint32_t* lens = (uint32_t*)ray_scratch_arena_push(&arena, + (size_t)n_ids * sizeof(uint32_t)); + if (!ptrs || !lens) { + ray_scratch_arena_reset(&arena); *hdr_out = NULL; return NULL; + } + for (uint32_t i = 0; i < n_ids; i++) { + ray_t* s = ray_sym_str((int64_t)i); + if (s) { + ptrs[i] = ray_str_ptr(s); + lens[i] = (uint32_t)ray_str_len(s); + } else { + ptrs[i] = NULL; + lens[i] = 0; + } + } + + /* Merge sort intern IDs by full string comparison. For ≤100K SYM + * values this completes in <1ms and correctly handles strings that + * share long common prefixes (e.g. "id000000001"–"id000099999"). */ + { + uint32_t* tmp = (uint32_t*)ray_scratch_arena_push(&arena, + (size_t)n_ids * sizeof(uint32_t)); + if (!tmp) { ray_scratch_arena_reset(&arena); + *hdr_out = NULL; return NULL; } + + /* Bottom-up merge sort */ + for (uint32_t width = 1; width < n_ids; width *= 2) { + for (uint32_t i = 0; i < n_ids; i += 2 * width) { + uint32_t lo = i; + uint32_t mid = lo + width; + if (mid > n_ids) mid = n_ids; + uint32_t hi = lo + 2 * width; + if (hi > n_ids) hi = n_ids; + /* Merge ids[lo..mid) and ids[mid..hi) into tmp[lo..hi) */ + uint32_t a = lo, b = mid, k = lo; + while (a < mid && b < hi) { + uint32_t ia = ids[a], ib = ids[b]; + uint32_t la = lens[ia], lb = lens[ib]; + uint32_t ml = la < lb ? la : lb; + int cmp = 0; + if (ml > 0) cmp = memcmp(ptrs[ia], ptrs[ib], ml); + if (cmp == 0) cmp = (la > lb) - (la < lb); + if (cmp <= 0) tmp[k++] = ids[a++]; + else tmp[k++] = ids[b++]; + } + while (a < mid) tmp[k++] = ids[a++]; + while (b < hi) tmp[k++] = ids[b++]; + } + /* Swap ids and tmp */ + uint32_t* s = ids; ids = tmp; tmp = s; + } + } + + /* Build rank[intern_id] = sorted position (output — not arena'd) */ + ray_t* rank_hdr; + uint32_t* rank = (uint32_t*)scratch_calloc(&rank_hdr, + (size_t)n_ids * sizeof(uint32_t)); + if (!rank) { ray_scratch_arena_reset(&arena); *hdr_out = NULL; return NULL; } + + for (uint32_t i = 0; i < n_ids; i++) + rank[ids[i]] = i; + + ray_scratch_arena_reset(&arena); /* free all temporaries at once */ + *hdr_out = rank_hdr; + return rank; +} + +/* Insertion sort for small arrays — used as base case for merge sort */ +void sort_insertion(const sort_cmp_ctx_t* ctx, int64_t* arr, int64_t n) { + for (int64_t i = 1; i < n; i++) { + int64_t key = arr[i]; + int64_t j = i - 1; + while (j >= 0 && sort_cmp(ctx, arr[j], key) > 0) { + arr[j + 1] = arr[j]; + j--; + } + arr[j + 1] = key; + } +} + +/* Single-threaded merge sort (recursive, with insertion sort base case) */ +void sort_merge_recursive(const sort_cmp_ctx_t* ctx, + int64_t* arr, int64_t* tmp, int64_t n) { + if (n <= 64) { + sort_insertion(ctx, arr, n); + return; + } + int64_t mid = n / 2; + sort_merge_recursive(ctx, arr, tmp, mid); + sort_merge_recursive(ctx, arr + mid, tmp + mid, n - mid); + + /* Merge arr[0..mid) and arr[mid..n) into tmp, then copy back */ + int64_t i = 0, j = mid, k = 0; + while (i < mid && j < n) { + if (sort_cmp(ctx, arr[i], arr[j]) <= 0) + tmp[k++] = arr[i++]; + else + tmp[k++] = arr[j++]; + } + while (i < mid) tmp[k++] = arr[i++]; + while (j < n) tmp[k++] = arr[j++]; + memcpy(arr, tmp, (size_t)n * sizeof(int64_t)); +} + +/* sort_phase1_ctx_t defined in exec_internal.h */ + +void sort_phase1_fn(void* arg, uint32_t worker_id, int64_t start, int64_t end) { + (void)worker_id; + sort_phase1_ctx_t* ctx = (sort_phase1_ctx_t*)arg; + for (int64_t chunk_idx = start; chunk_idx < end; chunk_idx++) { + int64_t chunk_size = (ctx->nrows + ctx->n_chunks - 1) / ctx->n_chunks; + int64_t lo = chunk_idx * chunk_size; + int64_t hi = lo + chunk_size; + if (hi > ctx->nrows) hi = ctx->nrows; + if (lo >= hi) continue; + sort_merge_recursive(ctx->cmp_ctx, ctx->indices + lo, ctx->tmp + lo, hi - lo); + } +} + +/* Merge two adjacent sorted runs: [lo..mid) and [mid..hi) from src into dst */ +static void merge_runs(const sort_cmp_ctx_t* ctx, + const int64_t* src, int64_t* dst, + int64_t lo, int64_t mid, int64_t hi) { + int64_t i = lo, j = mid, k = lo; + while (i < mid && j < hi) { + if (sort_cmp(ctx, src[i], src[j]) <= 0) + dst[k++] = src[i++]; + else + dst[k++] = src[j++]; + } + while (i < mid) dst[k++] = src[i++]; + while (j < hi) dst[k++] = src[j++]; +} + +/* sort_merge_ctx_t defined in exec_internal.h */ + +void sort_merge_fn(void* arg, uint32_t worker_id, int64_t start, int64_t end) { + (void)worker_id; + sort_merge_ctx_t* ctx = (sort_merge_ctx_t*)arg; + for (int64_t pair_idx = start; pair_idx < end; pair_idx++) { + int64_t lo = pair_idx * 2 * ctx->run_size; + int64_t mid = lo + ctx->run_size; + int64_t hi = mid + ctx->run_size; + if (mid > ctx->nrows) mid = ctx->nrows; + if (hi > ctx->nrows) hi = ctx->nrows; + if (lo >= ctx->nrows) continue; + if (mid >= hi) { + /* Only one run — copy directly */ + memcpy(ctx->dst + lo, ctx->src + lo, (size_t)(hi - lo) * sizeof(int64_t)); + } else { + merge_runs(ctx->cmp_ctx, ctx->src, ctx->dst, lo, mid, hi); + } + } +} + +/* -------------------------------------------------------------------------- + * Parallel multi-key min/max prescan for composite radix sort. + * Each worker scans all n_keys columns over its row range, then the main + * thread merges per-worker results. + * -------------------------------------------------------------------------- */ + +/* MK_PRESCAN_MAX_KEYS, mk_prescan_ctx_t defined in exec_internal.h */ + +void mk_prescan_fn(void* arg, uint32_t wid, + int64_t start, int64_t end) { + mk_prescan_ctx_t* c = (mk_prescan_ctx_t*)arg; + uint8_t nk = c->n_keys; + int64_t* my_mins = c->pw_mins + (int64_t)wid * nk; + int64_t* my_maxs = c->pw_maxs + (int64_t)wid * nk; + + /* Initialize on first morsel, merge on subsequent */ + for (uint8_t k = 0; k < nk; k++) { + if (my_mins[k] == INT64_MAX) { + /* first morsel for this worker — will be set below */ + } + } + + for (uint8_t k = 0; k < nk; k++) { + ray_t* col = c->vecs[k]; + int64_t kmin = my_mins[k], kmax = my_maxs[k]; + + if (c->enum_ranks[k]) { + const void* cdata = ray_data(col); + int8_t ctype = col->type; + uint8_t cattrs = col->attrs; + const uint32_t* ranks = c->enum_ranks[k]; + for (int64_t i = start; i < end; i++) { + uint32_t raw = (uint32_t)ray_read_sym(cdata, i, ctype, cattrs); + int64_t v = (int64_t)ranks[raw]; + if (v < kmin) kmin = v; + if (v > kmax) kmax = v; + } + } else if (col->type == RAY_I64 || col->type == RAY_TIMESTAMP) { + const int64_t* d = (const int64_t*)ray_data(col); + for (int64_t i = start; i < end; i++) { + if (d[i] < kmin) kmin = d[i]; + if (d[i] > kmax) kmax = d[i]; + } + } else if (col->type == RAY_F64) { + const double* d = (const double*)ray_data(col); + for (int64_t i = start; i < end; i++) { + uint64_t bits; + memcpy(&bits, &d[i], 8); + uint64_t mask = -(bits >> 63) | ((uint64_t)1 << 63); + int64_t v = (int64_t)(bits ^ mask); + if (v < kmin) kmin = v; + if (v > kmax) kmax = v; + } + } else if (col->type == RAY_I32 || col->type == RAY_DATE || col->type == RAY_TIME) { + const int32_t* d = (const int32_t*)ray_data(col); + for (int64_t i = start; i < end; i++) { + int64_t v = (int64_t)d[i]; + if (v < kmin) kmin = v; + if (v > kmax) kmax = v; + } + } else if (col->type == RAY_I16) { + const int16_t* d = (const int16_t*)ray_data(col); + for (int64_t i = start; i < end; i++) { + int64_t v = (int64_t)d[i]; + if (v < kmin) kmin = v; + if (v > kmax) kmax = v; + } + } else if (col->type == RAY_BOOL || col->type == RAY_U8) { + const uint8_t* d = (const uint8_t*)ray_data(col); + for (int64_t i = start; i < end; i++) { + int64_t v = (int64_t)d[i]; + if (v < kmin) kmin = v; + if (v > kmax) kmax = v; + } + } + + my_mins[k] = kmin; + my_maxs[k] = kmax; + } +} + +/* -------------------------------------------------------------------------- + * Top-N heap selection: for ORDER BY ... LIMIT N where N is small, + * a single-pass heap beats the 8-pass radix sort. + * -------------------------------------------------------------------------- */ + +typedef struct { uint64_t key; int64_t idx; } topn_entry_t; + +static inline void topn_sift_down(topn_entry_t* h, int64_t n, int64_t i) { + for (;;) { + int64_t largest = i, l = 2*i+1, r = 2*i+2; + if (l < n && h[l].key > h[largest].key) largest = l; + if (r < n && h[r].key > h[largest].key) largest = r; + if (largest == i) return; + topn_entry_t t = h[i]; h[i] = h[largest]; h[largest] = t; + i = largest; + } +} + +/* -------------------------------------------------------------------------- + * Fused encode + top-N: composite-key encode and heap insert in one pass, + * avoiding the 80MB intermediate keys array. + * -------------------------------------------------------------------------- */ + +typedef struct { + int64_t limit; + topn_entry_t* heaps; /* [n_workers][limit] */ + int64_t* counts; + /* Composite-key encode params (same as radix_encode_ctx_t fields): */ + uint8_t n_keys; + ray_t** vecs; + int64_t mins[16]; + int64_t ranges[16]; + uint8_t bit_shifts[16]; + uint8_t descs[16]; + const uint32_t* enum_ranks[16]; +} fused_topn_ctx_t; + +__attribute__((unused)) +static void fused_topn_fn(void* arg, uint32_t wid, + int64_t start, int64_t end) { + fused_topn_ctx_t* c = (fused_topn_ctx_t*)arg; + int64_t K = c->limit; + topn_entry_t* heap = c->heaps + (int64_t)wid * K; + int64_t cnt = c->counts[wid]; + uint8_t nk = c->n_keys; + + for (int64_t i = start; i < end; i++) { + /* Inline composite key encode */ + uint64_t composite = 0; + for (uint8_t k = 0; k < nk; k++) { + ray_t* col = c->vecs[k]; + int64_t val; + if (c->enum_ranks[k]) { + uint32_t raw = (uint32_t)ray_read_sym(ray_data(col), i, col->type, col->attrs); + val = (int64_t)c->enum_ranks[k][raw]; + } else if (col->type == RAY_I64 || col->type == RAY_TIMESTAMP) { + val = ((const int64_t*)ray_data(col))[i]; + } else if (col->type == RAY_F64) { + uint64_t bits; + memcpy(&bits, &((const double*)ray_data(col))[i], 8); + uint64_t mask = -(bits >> 63) | ((uint64_t)1 << 63); + val = (int64_t)(bits ^ mask); + } else if (col->type == RAY_I32 || col->type == RAY_DATE || col->type == RAY_TIME) { + val = (int64_t)((const int32_t*)ray_data(col))[i]; + } else if (col->type == RAY_I16) { + val = (int64_t)((const int16_t*)ray_data(col))[i]; + } else if (col->type == RAY_BOOL || col->type == RAY_U8) { + val = (int64_t)((const uint8_t*)ray_data(col))[i]; + } else { + val = 0; + } + uint64_t part = (uint64_t)val - (uint64_t)c->mins[k]; + if (c->descs[k]) part = (uint64_t)c->ranges[k] - part; + composite |= part << c->bit_shifts[k]; + } + + /* Inline heap insert */ + if (cnt < K) { + heap[cnt].key = composite; + heap[cnt].idx = i; + cnt++; + if (cnt == K) { + for (int64_t j = K/2 - 1; j >= 0; j--) + topn_sift_down(heap, K, j); + } + } else if (composite < heap[0].key) { + heap[0].key = composite; + heap[0].idx = i; + topn_sift_down(heap, K, 0); + } + } + c->counts[wid] = cnt; +} + +typedef struct { + const uint64_t* keys; + int64_t limit; + topn_entry_t* heaps; /* [n_workers][limit] */ + int64_t* counts; /* actual count per worker */ +} topn_ctx_t; + +__attribute__((unused)) +static void topn_scan_fn(void* arg, uint32_t wid, int64_t start, int64_t end) { + topn_ctx_t* c = (topn_ctx_t*)arg; + int64_t K = c->limit; + topn_entry_t* heap = c->heaps + (int64_t)wid * K; + const uint64_t* keys = c->keys; + int64_t cnt = c->counts[wid]; /* accumulate across morsels */ + + for (int64_t i = start; i < end; i++) { + uint64_t k = keys[i]; + if (cnt < K) { + heap[cnt].key = k; + heap[cnt].idx = i; + cnt++; + if (cnt == K) { + for (int64_t j = K/2 - 1; j >= 0; j--) + topn_sift_down(heap, K, j); + } + } else if (k < heap[0].key) { + heap[0].key = k; + heap[0].idx = i; + topn_sift_down(heap, K, 0); + } + } + c->counts[wid] = cnt; +} + +#define TOPN_MAX 8192 /* max limit for heap-based top-N (merge VLA ≤ 128KB) */ + +__attribute__((unused)) +static int64_t topn_merge_fused(fused_topn_ctx_t* ctx, uint32_t n_workers, + int64_t* out, int64_t limit) { + /* Clamp to TOPN_MAX for VLA stack safety (≤ 128KB). */ + if (limit > TOPN_MAX) limit = TOPN_MAX; + topn_entry_t merge[limit]; + int64_t cnt = 0; + for (uint32_t w = 0; w < n_workers; w++) { + topn_entry_t* wh = ctx->heaps + (int64_t)w * limit; + int64_t wc = ctx->counts[w]; + for (int64_t j = 0; j < wc; j++) { + if (cnt < limit) { + merge[cnt++] = wh[j]; + if (cnt == limit) { + for (int64_t m = limit/2 - 1; m >= 0; m--) + topn_sift_down(merge, limit, m); + } + } else if (wh[j].key < merge[0].key) { + merge[0] = wh[j]; + topn_sift_down(merge, limit, 0); + } + } + } + if (cnt > 1) { + for (int64_t m = cnt/2 - 1; m >= 0; m--) + topn_sift_down(merge, cnt, m); + for (int64_t i = cnt - 1; i > 0; i--) { + topn_entry_t t = merge[0]; merge[0] = merge[i]; merge[i] = t; + topn_sift_down(merge, i, 0); + } + } + for (int64_t i = 0; i < cnt; i++) + out[i] = merge[i].idx; + return cnt; +} + +/* Merge per-worker heaps → sorted indices in out[0..return_val-1]. */ +__attribute__((unused)) +static int64_t topn_merge(topn_ctx_t* ctx, uint32_t n_workers, + int64_t* out, int64_t limit) { + /* Clamp to TOPN_MAX for VLA stack safety (≤ 128KB). */ + if (limit > TOPN_MAX) limit = TOPN_MAX; + topn_entry_t merge[limit]; + int64_t cnt = 0; + + for (uint32_t w = 0; w < n_workers; w++) { + topn_entry_t* wh = ctx->heaps + (int64_t)w * limit; + int64_t wc = ctx->counts[w]; + for (int64_t j = 0; j < wc; j++) { + if (cnt < limit) { + merge[cnt++] = wh[j]; + if (cnt == limit) { + for (int64_t m = limit/2 - 1; m >= 0; m--) + topn_sift_down(merge, limit, m); + } + } else if (wh[j].key < merge[0].key) { + merge[0] = wh[j]; + topn_sift_down(merge, limit, 0); + } + } + } + + /* Heapsort for ascending order */ + if (cnt > 1) { + for (int64_t m = cnt/2 - 1; m >= 0; m--) + topn_sift_down(merge, cnt, m); + for (int64_t i = cnt - 1; i > 0; i--) { + topn_entry_t t = merge[0]; merge[0] = merge[i]; merge[i] = t; + topn_sift_down(merge, i, 0); + } + } + + for (int64_t i = 0; i < cnt; i++) + out[i] = merge[i].idx; + return cnt; +} + +/* Decode sorted radix keys directly into a typed output vector. + * Sequential writes — no random access. */ +static void radix_decode_into(void* dst, int8_t type, const uint64_t* sorted_keys, + int64_t n, bool desc) { + if (type == RAY_I64 || type == RAY_TIMESTAMP) { + int64_t* d = (int64_t*)dst; + if (desc) + for (int64_t i = 0; i < n; i++) + d[i] = (int64_t)(~sorted_keys[i] ^ ((uint64_t)1 << 63)); + else + for (int64_t i = 0; i < n; i++) + d[i] = (int64_t)(sorted_keys[i] ^ ((uint64_t)1 << 63)); + } else if (type == RAY_F64) { + double* d = (double*)dst; + for (int64_t i = 0; i < n; i++) { + uint64_t k = desc ? ~sorted_keys[i] : sorted_keys[i]; + /* Inverse of encode: positive originals have MSB=1 in key (flip sign bit), + * negative originals have MSB=0 in key (flip all bits). */ + uint64_t mask = (k >> 63) ? ((uint64_t)1 << 63) : ~(uint64_t)0; + uint64_t bits = k ^ mask; + memcpy(&d[i], &bits, 8); + } + } else if (type == RAY_I32 || type == RAY_DATE || type == RAY_TIME) { + int32_t* d = (int32_t*)dst; + if (desc) + for (int64_t i = 0; i < n; i++) + d[i] = (int32_t)((uint32_t)(~sorted_keys[i]) ^ ((uint32_t)1 << 31)); + else + for (int64_t i = 0; i < n; i++) + d[i] = (int32_t)((uint32_t)sorted_keys[i] ^ ((uint32_t)1 << 31)); + } else if (type == RAY_I16) { + int16_t* d = (int16_t*)dst; + if (desc) + for (int64_t i = 0; i < n; i++) + d[i] = (int16_t)((uint16_t)(~sorted_keys[i]) ^ ((uint16_t)1 << 15)); + else + for (int64_t i = 0; i < n; i++) + d[i] = (int16_t)((uint16_t)sorted_keys[i] ^ ((uint16_t)1 << 15)); + } else if (type == RAY_BOOL || type == RAY_U8) { + uint8_t* d = (uint8_t*)dst; + if (desc) + for (int64_t i = 0; i < n; i++) d[i] = (uint8_t)(~sorted_keys[i]); + else + for (int64_t i = 0; i < n; i++) d[i] = (uint8_t)sorted_keys[i]; + } +} + +/* Sort columns and return index array (extended: optionally returns sorted keys). + * cols: array of n_cols vectors (sort keys, most significant first) + * descs: array of n_cols flags (0=asc, 1=desc), or NULL for all-asc + * nulls_first: array of n_cols flags (0=nulls last, 1=nulls first), or NULL + * for default convention (nulls last for asc, nulls first for desc) + * n_cols: number of sort key columns (max 16) + * nrows: number of rows in each column + * sorted_keys_out: if non-NULL, receives sorted radix keys (caller frees keys_hdr_out) + * keys_hdr_out: if non-NULL, receives scratch header for sorted_keys_out + * Returns: ray_t* I64 vector of sorted indices (caller owns), or RAY_ERROR */ +static ray_t* sort_indices_ex(ray_t** cols, uint8_t* descs, uint8_t* nulls_first, + uint8_t n_cols, int64_t nrows, + uint64_t** sorted_keys_out, ray_t** keys_hdr_out) { + if (n_cols == 0 || nrows <= 0) + return ray_vec_new(RAY_I64, 0); + if (n_cols > 16) + return ray_error("nyi", NULL); + + /* Allocate index array */ + ray_t* indices_hdr; + int64_t* indices = (int64_t*)scratch_alloc(&indices_hdr, + (size_t)nrows * sizeof(int64_t)); + if (!indices) return ray_error("oom", NULL); + bool iota_done = false; + + /* --- Radix sort fast path ------------------------------------------------ + * Try radix sort for integer/float/enum keys. Falls back to merge sort + * for unsupported types (SYM with arbitrary strings, mixed types, etc.). */ + bool radix_done = false; + int64_t* sorted_idx = indices; /* may point to itmp after radix sort */ + ray_t* radix_itmp_hdr = NULL; /* kept alive until we copy out */ + ray_t* enum_rank_hdrs[n_cols]; + memset(enum_rank_hdrs, 0, n_cols * sizeof(ray_t*)); + + if (nrows > 64) { + /* RAY_STR single-key fast path — dedicated MSD byte-radix + * sort. Handles variable-width strings, nulls, and DESC + * internally; skips the rest of sort_indices_ex on success. */ + if (n_cols == 1 && cols[0]->type == RAY_STR) { + bool desc = descs ? descs[0] : 0; + bool nf = nulls_first ? nulls_first[0] : !desc; + if (sort_str_msd_inplace(indices, nrows, cols[0], desc, nf)) { + sorted_idx = indices; + iota_done = true; + radix_done = true; + goto str_msd_done; + } + /* OOM — fall through to comparison merge sort. */ + } + + /* Check if all sort keys are radix-sortable types. + * RAY_STR and RAY_GUID are accepted for multi-key sorts only: + * they have no packed uint64 encoding, so the composite-radix + * path can't fit them, but the rank-then-compose fallback handles + * them via single-key sort_indices_ex recursion (which hits the + * RAY_STR MSD byte-radix path for strings, or the merge-sort + * path with the new RAY_GUID comparator for guids). */ + bool can_radix = true; + bool has_wide_key = false; /* RAY_STR or RAY_GUID — forces rank fallback */ + for (uint8_t k = 0; k < n_cols; k++) { + if (!cols[k]) { can_radix = false; break; } + int8_t t = cols[k]->type; + if (t == RAY_STR || t == RAY_GUID) { has_wide_key = true; continue; } + if (t != RAY_I64 && t != RAY_F64 && t != RAY_I32 && t != RAY_I16 && + t != RAY_BOOL && t != RAY_U8 && t != RAY_SYM && + t != RAY_DATE && t != RAY_TIME && t != RAY_TIMESTAMP) { + can_radix = false; break; + } + } + /* Single-key wide types: RAY_STR has its own MSD fast path above; + * single-key RAY_GUID falls through to merge sort with the new + * comparator. In both cases the multi-key composite path is not + * applicable, so disable the radix branch. */ + if (has_wide_key && n_cols == 1) can_radix = false; + + if (can_radix) { + ray_pool_t* pool = ray_pool_get(); + + /* Build SYM rank mappings (intern_id -> sorted rank by string) */ + uint32_t* enum_ranks[n_cols]; + memset(enum_ranks, 0, n_cols * sizeof(uint32_t*)); + for (uint8_t k = 0; k < n_cols; k++) { + if (RAY_IS_SYM(cols[k]->type)) { + enum_ranks[k] = build_enum_rank(cols[k], nrows, + &enum_rank_hdrs[k]); + if (!enum_ranks[k]) { can_radix = false; break; } + } + } + + if (can_radix && n_cols == 1) { + /* --- Single-key sort --- */ + uint8_t key_nbytes_max = radix_key_bytes(cols[0]->type); + + /* Skip pool for small arrays - dispatch overhead dominates */ + ray_pool_t* sk_pool = (nrows >= SMALL_POOL_THRESHOLD) ? pool : NULL; + + /* Encode keys (needed by all paths) */ + ray_t *keys_hdr; + uint64_t* keys = (uint64_t*)scratch_alloc(&keys_hdr, + (size_t)nrows * sizeof(uint64_t)); + if (keys) { + bool desc = descs ? descs[0] : 0; + /* Null = minimum value. + * ASC → nulls first, DESC → nulls last. */ + bool nf = nulls_first ? nulls_first[0] : !desc; + radix_encode_ctx_t enc = { + .keys = keys, .indices = indices, + .data = ray_data(cols[0]), + .col = cols[0], + .type = cols[0]->type, + .col_attrs = cols[0]->attrs, + .desc = desc, + .nulls_first = nf, + .enum_rank = enum_ranks[0], .n_keys = 1, + }; + if (sk_pool) + ray_pool_dispatch(sk_pool, radix_encode_fn, &enc, nrows); + else + radix_encode_fn(&enc, 0, 0, nrows); + iota_done = true; + + if (nrows <= RADIX_SORT_THRESHOLD) { + /* Introsort on encoded keys - faster than multi-pass + * radix for small arrays (avoids scatter overhead). */ + key_introsort(keys, indices, nrows); + sorted_idx = indices; + radix_done = true; + } else { + /* Data-range-adaptive byte count: scan encoded keys + * to skip bytes that are uniform across all values, + * avoiding wasteful histogram passes. */ + uint8_t key_nbytes = compute_key_nbytes( + sk_pool, keys, nrows, key_nbytes_max); + + /* Try packed radix sort: pack key + index into one + * uint64_t to halve memory traffic per pass. + * Feasible when key_nbytes*8 + index_bits <= 64. */ + uint8_t idx_bits = 0; + { int64_t nn = nrows; while (nn > 0) { idx_bits++; nn >>= 1; } } + bool use_packed = (key_nbytes <= 3 + && key_nbytes * 8 + idx_bits <= 64); + + if (use_packed) { + uint8_t key_bits = key_nbytes * 8; + ray_t *ptmp_hdr; + uint64_t* ptmp = (uint64_t*)scratch_alloc(&ptmp_hdr, + (size_t)nrows * sizeof(uint64_t)); + if (ptmp) { + /* Fuse packing with sortedness + reverse detection */ + uint32_t pd_nw = sk_pool ? ray_pool_total_workers(sk_pool) : 1; + int64_t pd_pw[pd_nw], pd_nr[pd_nw]; + memset(pd_pw, 0, (size_t)pd_nw * sizeof(int64_t)); + memset(pd_nr, 0, (size_t)pd_nw * sizeof(int64_t)); + uint64_t key_mask_pd = + (key_bits < 64) ? ((1ULL << key_bits) - 1) : ~0ULL; + packed_detect_ctx_t pd_ctx = { + .keys = keys, .key_bits = key_bits, + .key_mask = key_mask_pd, + .pw_unsorted = pd_pw, .pw_not_reverse = pd_nr, + }; + + if (sk_pool) + ray_pool_dispatch(sk_pool, packed_detect_fn, &pd_ctx, nrows); + else + packed_detect_fn(&pd_ctx, 0, 0, nrows); + + /* Aggregate sortedness results */ + int64_t total_unsorted = 0, total_not_rev = 0; + for (uint32_t t = 0; t < pd_nw; t++) { + total_unsorted += pd_pw[t]; + total_not_rev += pd_nr[t]; + } + /* Check cross-task boundaries */ + int64_t grain = RAY_DISPATCH_MORSELS * RAY_MORSEL_ELEMS; + uint64_t key_mask_s = + (key_bits < 64) ? ((1ULL << key_bits) - 1) : ~0ULL; + for (int64_t b = grain; b < nrows; b += grain) { + uint64_t ka = keys[b-1] & key_mask_s; + uint64_t kb2 = keys[b] & key_mask_s; + if (kb2 < ka) total_unsorted++; + if (kb2 > ka) total_not_rev++; + } + + if (total_unsorted == 0) { + /* Already sorted - identity permutation */ + sorted_idx = indices; + radix_done = true; + } else if (total_not_rev == 0 && nrows > 1) { + /* Reverse-sorted - reverse indices in O(n) */ + for (int64_t i = 0; i < nrows; i++) + indices[i] = nrows - 1 - i; + sorted_idx = indices; + radix_done = true; + } else { + /* Packed radix sort - half the memory traffic */ + uint64_t* sorted = packed_radix_sort_run( + sk_pool, keys, ptmp, nrows, key_nbytes); + + if (sorted) { + uint64_t idx_mask = + (idx_bits < 64) ? ((1ULL << idx_bits) - 1) : ~0ULL; + + /* Packed path: keys are truncated to key_bits, + * not full 64-bit encoded keys — can't decode. */ + packed_unpack_ctx_t up = { + .sorted = sorted, .indices = indices, + .keys_out = NULL, + .key_bits = key_bits, + .idx_mask = idx_mask, .key_mask = 0, + .extract_keys = false, + }; + if (sk_pool) + ray_pool_dispatch(sk_pool, packed_unpack_fn, &up, nrows); + else + packed_unpack_fn(&up, 0, 0, nrows); + + sorted_idx = indices; + radix_done = true; + } + } + } + scratch_free(ptmp_hdr); + } else { + /* Non-packed path: detect sortedness first */ + double us_frac2 = detect_sortedness(sk_pool, keys, nrows); + if (us_frac2 == 0.0) { + sorted_idx = indices; + radix_done = true; + } + /* Standard dual-array radix sort */ + if (!radix_done) { + ray_t *ktmp_hdr, *itmp_hdr; + uint64_t* ktmp = (uint64_t*)scratch_alloc(&ktmp_hdr, + (size_t)nrows * sizeof(uint64_t)); + int64_t* itmp = (int64_t*)scratch_alloc(&itmp_hdr, + (size_t)nrows * sizeof(int64_t)); + if (ktmp && itmp) { + bool want_sk = sorted_keys_out + && !RAY_IS_SYM(cols[0]->type); + uint64_t* sk_out = NULL; + sorted_idx = msd_radix_sort_run(sk_pool, keys, indices, + ktmp, itmp, nrows, + key_nbytes, + want_sk ? &sk_out : NULL); + radix_done = (sorted_idx != NULL); + if (radix_done && want_sk && sk_out) { + *sorted_keys_out = sk_out; + if (sk_out == ktmp) { + *keys_hdr_out = ktmp_hdr; + ktmp_hdr = NULL; + } else { + /* Even number of radix passes: + * sorted keys ended up in the + * original keys buffer. */ + *keys_hdr_out = keys_hdr; + keys_hdr = NULL; + } + } + } + if (ktmp_hdr) scratch_free(ktmp_hdr); + if (sorted_idx != itmp) scratch_free(itmp_hdr); + else radix_itmp_hdr = itmp_hdr; + } + } + } + } + scratch_free(keys_hdr); + + } else if (can_radix && n_cols > 1) { + /* --- Multi-key composite radix sort --- */ + int64_t mins[n_cols], maxs[n_cols]; + /* Wider accumulator: up to 16 keys * 63 bits = 1008, + * which would wrap a uint8_t and let an oversized + * budget falsely pass the <=64 fits check. */ + uint16_t total_bits = 0; + bool fits = true; + + ray_pool_t* mk_prescan_pool = (nrows >= SMALL_POOL_THRESHOLD) ? pool : NULL; + if (has_wide_key) { + /* RAY_STR / RAY_GUID can't be packed into a composite + * uint64 key. Force the rank-then-compose fallback. */ + total_bits = UINT16_MAX; + fits = false; + } else if (n_cols <= MK_PRESCAN_MAX_KEYS && mk_prescan_pool) { + uint32_t nw = ray_pool_total_workers(mk_prescan_pool); + size_t pw_count = (size_t)nw * n_cols; + int64_t pw_mins_stack[512], pw_maxs_stack[512]; + ray_t *pw_mins_hdr = NULL, *pw_maxs_hdr = NULL; + int64_t* pw_mins = (pw_count <= 512) + ? pw_mins_stack + : (int64_t*)scratch_alloc(&pw_mins_hdr, pw_count * sizeof(int64_t)); + int64_t* pw_maxs = (pw_count <= 512) + ? pw_maxs_stack + : (int64_t*)scratch_alloc(&pw_maxs_hdr, pw_count * sizeof(int64_t)); + for (size_t i = 0; i < pw_count; i++) { + pw_mins[i] = INT64_MAX; + pw_maxs[i] = INT64_MIN; + } + mk_prescan_ctx_t pctx = { + .vecs = cols, .enum_ranks = enum_ranks, + .n_keys = n_cols, .nrows = nrows, .n_workers = nw, + .pw_mins = pw_mins, .pw_maxs = pw_maxs, + }; + ray_pool_dispatch(mk_prescan_pool, mk_prescan_fn, &pctx, nrows); + + /* Merge per-worker results */ + for (uint8_t k = 0; k < n_cols; k++) { + int64_t kmin = INT64_MAX, kmax = INT64_MIN; + for (uint32_t w = 0; w < nw; w++) { + int64_t wmin = pw_mins[w * n_cols + k]; + int64_t wmax = pw_maxs[w * n_cols + k]; + if (wmin < kmin) kmin = wmin; + if (wmax > kmax) kmax = wmax; + } + mins[k] = kmin; + maxs[k] = kmax; + uint64_t range = (uint64_t)(kmax - kmin); + uint8_t bits = 1; + while (((uint64_t)1 << bits) <= range && bits < 64) + bits++; + total_bits = (uint16_t)(total_bits + bits); + } + if (pw_mins_hdr) scratch_free(pw_mins_hdr); + if (pw_maxs_hdr) scratch_free(pw_maxs_hdr); + } else { + /* Sequential fallback (no pool or too many keys) */ + for (uint8_t k = 0; k < n_cols; k++) { + ray_t* col = cols[k]; + int64_t kmin = INT64_MAX, kmax = INT64_MIN; + + if (enum_ranks[k]) { + const void* cdata = ray_data(col); + int8_t ctype = col->type; + uint8_t cattrs = col->attrs; + for (int64_t i = 0; i < nrows; i++) { + uint32_t raw = (uint32_t)ray_read_sym(cdata, i, ctype, cattrs); + int64_t v = (int64_t)enum_ranks[k][raw]; + if (v < kmin) kmin = v; + if (v > kmax) kmax = v; + } + } else if (col->type == RAY_I64 || col->type == RAY_TIMESTAMP) { + const int64_t* d = (const int64_t*)ray_data(col); + for (int64_t i = 0; i < nrows; i++) { + if (d[i] < kmin) kmin = d[i]; + if (d[i] > kmax) kmax = d[i]; + } + } else if (col->type == RAY_F64) { + const double* d = (const double*)ray_data(col); + for (int64_t i = 0; i < nrows; i++) { + uint64_t bits; + memcpy(&bits, &d[i], 8); + uint64_t mask = -(bits >> 63) | ((uint64_t)1 << 63); + int64_t v = (int64_t)(bits ^ mask); + if (v < kmin) kmin = v; + if (v > kmax) kmax = v; + } + } else if (col->type == RAY_I32 || col->type == RAY_DATE || col->type == RAY_TIME) { + const int32_t* d = (const int32_t*)ray_data(col); + for (int64_t i = 0; i < nrows; i++) { + if (d[i] < kmin) kmin = (int64_t)d[i]; + if (d[i] > kmax) kmax = (int64_t)d[i]; + } + } else if (col->type == RAY_I16) { + const int16_t* d = (const int16_t*)ray_data(col); + for (int64_t i = 0; i < nrows; i++) { + if (d[i] < kmin) kmin = (int64_t)d[i]; + if (d[i] > kmax) kmax = (int64_t)d[i]; + } + } else if (col->type == RAY_BOOL || col->type == RAY_U8) { + const uint8_t* d = (const uint8_t*)ray_data(col); + for (int64_t i = 0; i < nrows; i++) { + if (d[i] < kmin) kmin = (int64_t)d[i]; + if (d[i] > kmax) kmax = (int64_t)d[i]; + } + } + + mins[k] = kmin; + maxs[k] = kmax; + uint64_t range = (uint64_t)(kmax - kmin); + uint8_t bits = 1; + while (((uint64_t)1 << bits) <= range && bits < 64) + bits++; + total_bits = (uint16_t)(total_bits + bits); + } + } + + if (total_bits > 64) { + fits = false; + /* --- Rank-then-compose fallback --- + * The composite bit budget overflows because at least + * one key has a value range that doesn't fit (typical: + * F64 columns whose sign-flipped IEEE-754 encoding + * spans most of the 64-bit space). Fall back to a + * rank-encoded composite: for each key, run a single- + * key sort to produce a dense rank in [0..K_k), then + * compose the ranks. Bits per key shrinks from + * "data range" to "ceil(log2 distinct_count)", which + * always fits for n_cols * ceil(log2 nrows) <= 64. */ + ray_t* rank_hdrs[n_cols]; + uint32_t* ranks[n_cols]; + uint32_t rank_max[n_cols]; + bool rank_ok = true; + for (uint8_t k = 0; k < n_cols; k++) { + rank_hdrs[k] = NULL; ranks[k] = NULL; rank_max[k] = 0; + } + for (uint8_t k = 0; k < n_cols && rank_ok; k++) { + uint8_t kdesc = descs ? descs[k] : 0; + uint8_t knf = nulls_first ? nulls_first[k] : !kdesc; + ray_t* col_arg[1] = { cols[k] }; + uint8_t desc_arg[1] = { kdesc }; + uint8_t nf_arg[1] = { knf }; + ray_t* sk_idx = sort_indices_ex(col_arg, desc_arg, + nf_arg, 1, nrows, + NULL, NULL); + if (!sk_idx || RAY_IS_ERR(sk_idx)) { rank_ok = false; break; } + int64_t* sk_idx_data = (int64_t*)ray_data(sk_idx); + uint32_t* r = (uint32_t*)scratch_alloc(&rank_hdrs[k], + (size_t)nrows * sizeof(uint32_t)); + if (!r) { ray_release(sk_idx); rank_ok = false; break; } + ranks[k] = r; + /* Dense-rank tie detection must use the same null + * ordering as the sub-sort so that null/non-null + * pairs aren't treated as ties (and so that two + * nulls do collapse to the same rank). */ + sort_cmp_ctx_t cctx = { + .vecs = col_arg, .desc = desc_arg, + .nulls_first = nf_arg, .n_sort = 1, + }; + uint32_t cur = 0; + r[sk_idx_data[0]] = 0; + for (int64_t i = 1; i < nrows; i++) { + if (sort_cmp(&cctx, sk_idx_data[i-1], sk_idx_data[i]) != 0) + cur++; + r[sk_idx_data[i]] = cur; + } + rank_max[k] = cur; + ray_release(sk_idx); + } + if (rank_ok) { + uint8_t rank_bits[n_cols]; + /* Accumulate in a wider type: up to 16 keys * 63 + * bits each = 1008, which would wrap a uint8_t. */ + uint16_t rank_total = 0; + for (uint8_t k = 0; k < n_cols; k++) { + uint8_t b = 1; + while (((uint64_t)1 << b) <= rank_max[k] && b < 64) b++; + rank_bits[k] = b; + rank_total = (uint16_t)(rank_total + b); + } + if (rank_total <= 64) { + uint8_t rshift[n_cols]; + uint16_t accum = 0; + for (int k = n_cols - 1; k >= 0; k--) { + rshift[k] = (uint8_t)accum; + accum = (uint16_t)(accum + rank_bits[k]); + } + uint8_t rcomp_nbytes = (uint8_t)((rank_total + 7) / 8); + if (rcomp_nbytes < 1) rcomp_nbytes = 1; + ray_pool_t* rk_pool = + (nrows >= SMALL_POOL_THRESHOLD) ? pool : NULL; + ray_t* rkeys_hdr; + uint64_t* rkeys = (uint64_t*)scratch_alloc(&rkeys_hdr, + (size_t)nrows * sizeof(uint64_t)); + if (rkeys) { + for (int64_t i = 0; i < nrows; i++) { + uint64_t composite = 0; + for (uint8_t k = 0; k < n_cols; k++) + composite |= ((uint64_t)ranks[k][i]) << rshift[k]; + rkeys[i] = composite; + indices[i] = i; + } + iota_done = true; + if (nrows <= RADIX_SORT_THRESHOLD) { + key_introsort(rkeys, indices, nrows); + sorted_idx = indices; + radix_done = true; + } else { + ray_t *rktmp_hdr, *ritmp_hdr; + uint64_t* rktmp = (uint64_t*)scratch_alloc(&rktmp_hdr, + (size_t)nrows * sizeof(uint64_t)); + int64_t* ritmp = (int64_t*)scratch_alloc(&ritmp_hdr, + (size_t)nrows * sizeof(int64_t)); + if (rktmp && ritmp) { + sorted_idx = msd_radix_sort_run( + rk_pool, rkeys, indices, + rktmp, ritmp, nrows, rcomp_nbytes, NULL); + radix_done = (sorted_idx != NULL); + } + if (rktmp_hdr) scratch_free(rktmp_hdr); + if (sorted_idx != ritmp) { + if (ritmp_hdr) scratch_free(ritmp_hdr); + } else { + radix_itmp_hdr = ritmp_hdr; + } + } + scratch_free(rkeys_hdr); + } + } + } + for (uint8_t k = 0; k < n_cols; k++) + if (rank_hdrs[k]) scratch_free(rank_hdrs[k]); + } + + if (fits) { + /* Compute bit-shift for each key: primary key in MSBs */ + uint8_t bit_shifts[n_cols]; + uint8_t accum = 0; + for (int k = n_cols - 1; k >= 0; k--) { + bit_shifts[k] = accum; + uint64_t range = (uint64_t)(maxs[k] - mins[k]); + uint8_t bits = 1; + while (((uint64_t)1 << bits) <= range && bits < 64) + bits++; + accum += bits; + } + + uint8_t comp_nbytes = (total_bits + 7) / 8; + if (comp_nbytes < 1) comp_nbytes = 1; + ray_pool_t* mk_pool = (nrows >= SMALL_POOL_THRESHOLD) ? pool : NULL; + + { + /* Encode composite keys */ + ray_t *keys_hdr; + uint64_t* keys = (uint64_t*)scratch_alloc(&keys_hdr, + (size_t)nrows * sizeof(uint64_t)); + if (keys) { + radix_encode_ctx_t enc = { + .keys = keys, .indices = indices, + .n_keys = n_cols, .vecs = cols, + }; + for (uint8_t k = 0; k < n_cols; k++) { + enc.mins[k] = mins[k]; + enc.ranges[k] = maxs[k] - mins[k]; + enc.bit_shifts[k] = bit_shifts[k]; + enc.descs[k] = descs ? descs[k] : 0; + enc.enum_ranks[k] = enum_ranks[k]; + } + if (mk_pool) + ray_pool_dispatch(mk_pool, radix_encode_fn, &enc, nrows); + else + radix_encode_fn(&enc, 0, 0, nrows); + iota_done = true; + + /* Adaptive: detect sortedness */ + double unsorted_frac = detect_sortedness(mk_pool, keys, nrows); + + if (unsorted_frac == 0.0) { + /* Already sorted */ + sorted_idx = indices; + radix_done = true; + } else if (nrows <= RADIX_SORT_THRESHOLD) { + /* Small arrays - introsort */ + key_introsort(keys, indices, nrows); + sorted_idx = indices; + radix_done = true; + } else { + /* Radix sort with type-aware pass count */ + ray_t *ktmp_hdr, *itmp_hdr; + uint64_t* ktmp = (uint64_t*)scratch_alloc(&ktmp_hdr, + (size_t)nrows * sizeof(uint64_t)); + int64_t* itmp = (int64_t*)scratch_alloc(&itmp_hdr, + (size_t)nrows * sizeof(int64_t)); + if (ktmp && itmp) { + sorted_idx = msd_radix_sort_run(mk_pool, keys, indices, + ktmp, itmp, nrows, + comp_nbytes, NULL); + radix_done = (sorted_idx != NULL); + } + scratch_free(ktmp_hdr); + if (sorted_idx != itmp) scratch_free(itmp_hdr); + else radix_itmp_hdr = itmp_hdr; + } + } + scratch_free(keys_hdr); + } + } + } + } + } + + /* --- Merge sort fallback ------------------------------------------------ */ + if (!radix_done) { + if (!iota_done) + for (int64_t i = 0; i < nrows; i++) indices[i] = i; + /* Null = minimum value. + * ASC → nulls first (nf=1), DESC → nulls last (nf=0). */ + uint8_t default_nf[n_cols > 0 ? n_cols : 1]; + if (!nulls_first) { + for (uint8_t k = 0; k < n_cols; k++) + default_nf[k] = descs ? !descs[k] : 1; + nulls_first = default_nf; + } + sort_cmp_ctx_t cmp_ctx = { + .vecs = cols, + .desc = descs, + .nulls_first = nulls_first, + .n_sort = n_cols, + }; + + if (nrows <= 64) { + sort_insertion(&cmp_ctx, indices, nrows); + } else { + ray_pool_t* pool = ray_pool_get(); + uint32_t n_workers = pool ? ray_pool_total_workers(pool) : 1; + + ray_t* tmp_hdr; + int64_t* tmp = (int64_t*)scratch_alloc(&tmp_hdr, + (size_t)nrows * sizeof(int64_t)); + if (!tmp) { + for (uint8_t k = 0; k < n_cols; k++) + scratch_free(enum_rank_hdrs[k]); + scratch_free(indices_hdr); + return ray_error("oom", NULL); + } + + uint32_t n_chunks = n_workers; + if (pool && n_chunks > 1 && nrows > 1024) { + sort_phase1_ctx_t p1ctx = { + .cmp_ctx = &cmp_ctx, .indices = indices, .tmp = tmp, + .nrows = nrows, .n_chunks = n_chunks, + }; + ray_pool_dispatch_n(pool, sort_phase1_fn, &p1ctx, n_chunks); + } else { + n_chunks = 1; + sort_merge_recursive(&cmp_ctx, indices, tmp, nrows); + } + + if (n_chunks > 1) { + int64_t chunk_size = (nrows + n_chunks - 1) / n_chunks; + int64_t run_size = chunk_size; + int64_t* src = indices; + int64_t* dst = tmp; + + while (run_size < nrows) { + int64_t n_pairs = (nrows + 2 * run_size - 1) / (2 * run_size); + sort_merge_ctx_t mctx = { + .cmp_ctx = &cmp_ctx, .src = src, .dst = dst, + .nrows = nrows, .run_size = run_size, + }; + if (pool && n_pairs > 1) + ray_pool_dispatch_n(pool, sort_merge_fn, &mctx, + (uint32_t)n_pairs); + else + sort_merge_fn(&mctx, 0, 0, n_pairs); + int64_t* t = src; src = dst; dst = t; + run_size *= 2; + } + + if (src != indices) + memcpy(indices, src, (size_t)nrows * sizeof(int64_t)); + } + + scratch_free(tmp_hdr); + } + } + +str_msd_done:; + /* If sorted_keys_out was requested but never set, null it out */ + if (sorted_keys_out && !*sorted_keys_out) { + *sorted_keys_out = NULL; + if (keys_hdr_out) *keys_hdr_out = NULL; + } + + /* Build result I64 vector containing sorted indices */ + ray_t* result = ray_vec_new(RAY_I64, nrows); + if (!result || RAY_IS_ERR(result)) { + if (sorted_keys_out && *sorted_keys_out && keys_hdr_out) + scratch_free(*keys_hdr_out); + for (uint8_t k = 0; k < n_cols; k++) + scratch_free(enum_rank_hdrs[k]); + scratch_free(radix_itmp_hdr); + scratch_free(indices_hdr); + return result ? result : ray_error("oom", NULL); + } + result->len = nrows; + + /* Copy final sorted indices into the result vector. + * sorted_idx may point to indices or itmp - either way, copy out. */ + memcpy(ray_data(result), sorted_idx, (size_t)nrows * sizeof(int64_t)); + + /* Free all scratch allocations */ + for (uint8_t k = 0; k < n_cols; k++) + scratch_free(enum_rank_hdrs[k]); + scratch_free(radix_itmp_hdr); + scratch_free(indices_hdr); + return result; +} + +ray_t* ray_sort_indices(ray_t** cols, uint8_t* descs, uint8_t* nulls_first, + uint8_t n_cols, int64_t nrows) { + return sort_indices_ex(cols, descs, nulls_first, n_cols, nrows, NULL, NULL); +} + +ray_t* ray_sort(ray_t** cols, uint8_t* descs, uint8_t* nulls_first, + uint8_t n_cols, int64_t nrows) { + if (n_cols == 1) { + uint64_t* sorted_keys = NULL; + ray_t* keys_hdr = NULL; + ray_t* idx = sort_indices_ex(cols, descs, nulls_first, 1, nrows, + &sorted_keys, &keys_hdr); + if (!idx || RAY_IS_ERR(idx)) return idx; + + if (sorted_keys && !RAY_IS_SYM(cols[0]->type)) { + /* Decode path: sequential writes, no random access */ + ray_t* result = ray_vec_new(cols[0]->type, nrows); + if (!result || RAY_IS_ERR(result)) { + ray_release(idx); + if (keys_hdr) scratch_free(keys_hdr); + return result ? result : ray_error("oom", NULL); + } + result->len = nrows; + radix_decode_into(ray_data(result), cols[0]->type, sorted_keys, + nrows, descs ? descs[0] : 0); + /* Propagate null bitmap using sorted indices */ + if (cols[0]->attrs & RAY_ATTR_HAS_NULLS) { + int64_t* idx_data = (int64_t*)ray_data(idx); + for (int64_t i = 0; i < nrows; i++) + if (ray_vec_is_null(cols[0], idx_data[i])) + ray_vec_set_null(result, i, true); + } + ray_release(idx); + scratch_free(keys_hdr); + return result; + } + + /* Fallback: gather by index */ + if (keys_hdr) scratch_free(keys_hdr); + ray_t* result = gather_by_idx(cols[0], (int64_t*)ray_data(idx), nrows); + ray_release(idx); + return result; + } + + /* Multi-column: index sort + gather (decode only helps single-key) */ + ray_t* idx = ray_sort_indices(cols, descs, nulls_first, n_cols, nrows); + if (!idx || RAY_IS_ERR(idx)) return idx; + ray_t* result = gather_by_idx(cols[0], (int64_t*)ray_data(idx), nrows); + ray_release(idx); + return result; +} + +ray_t* exec_sort(ray_graph_t* g, ray_op_t* op, ray_t* tbl, int64_t limit) { + if (!tbl || RAY_IS_ERR(tbl)) return tbl; + + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + int64_t nrows = ray_table_nrows(tbl); + int64_t ncols = ray_table_ncols(tbl); + if (ncols > 4096) return ray_error("nyi", NULL); /* stack safety */ + uint8_t n_sort = ext->sort.n_cols; + if (n_sort > 16) return ray_error("nyi", NULL); /* radix_encode_ctx_t limit */ + + /* Resolve sort key vectors */ + ray_t* sort_vecs[n_sort > 0 ? n_sort : 1]; + uint8_t sort_owned[n_sort > 0 ? n_sort : 1]; + memset(sort_vecs, 0, (n_sort > 0 ? n_sort : 1) * sizeof(ray_t*)); + memset(sort_owned, 0, n_sort > 0 ? n_sort : 1); + + for (uint8_t k = 0; k < n_sort; k++) { + ray_op_t* key_op = ext->sort.columns[k]; + ray_op_ext_t* key_ext = find_ext(g, key_op->id); + if (key_ext && key_ext->base.opcode == OP_SCAN) { + sort_vecs[k] = ray_table_get_col(tbl, key_ext->sym); + } else { + ray_t* saved = g->table; + g->table = tbl; + sort_vecs[k] = exec_node(g, key_op); + g->table = saved; + sort_owned[k] = 1; + } + if (!sort_vecs[k] || RAY_IS_ERR(sort_vecs[k])) { + ray_t* err = sort_vecs[k] ? sort_vecs[k] : ray_error("nyi", NULL); + for (uint8_t j = 0; j < k; j++) { + if (sort_owned[j] && sort_vecs[j] && !RAY_IS_ERR(sort_vecs[j])) + ray_release(sort_vecs[j]); + } + return err; + } + } + + /* Sort columns -> get index permutation (with optional sorted radix keys) */ + uint64_t* sorted_keys = NULL; + ray_t* sorted_keys_hdr = NULL; + ray_t* idx_vec = sort_indices_ex(sort_vecs, ext->sort.desc, + ext->sort.nulls_first, n_sort, nrows, + &sorted_keys, &sorted_keys_hdr); + if (!idx_vec || RAY_IS_ERR(idx_vec)) { + if (sorted_keys_hdr) scratch_free(sorted_keys_hdr); + for (uint8_t k = 0; k < n_sort; k++) { + if (sort_owned[k] && sort_vecs[k] && !RAY_IS_ERR(sort_vecs[k])) + ray_release(sort_vecs[k]); + } + return idx_vec ? idx_vec : ray_error("oom", NULL); + } + int64_t* sorted_idx = (int64_t*)ray_data(idx_vec); + + /* Check cancellation before expensive gather phase */ + { + ray_pool_t* cp = ray_pool_get(); + if (pool_cancelled(cp)) { + if (sorted_keys_hdr) scratch_free(sorted_keys_hdr); + for (uint8_t k = 0; k < n_sort; k++) { + if (sort_owned[k] && sort_vecs[k] && !RAY_IS_ERR(sort_vecs[k])) + ray_release(sort_vecs[k]); + } + ray_release(idx_vec); + return ray_error("cancel", NULL); + } + } + + /* Materialize sorted result - fused multi-column gather. + * When limit > 0, only gather the first `limit` rows (SORT+LIMIT fusion). */ + int64_t gather_rows = nrows; + if (limit > 0 && limit < nrows) gather_rows = limit; + + ray_t* result = ray_table_new(ncols); + if (!result || RAY_IS_ERR(result)) { + if (sorted_keys_hdr) scratch_free(sorted_keys_hdr); + for (uint8_t k = 0; k < n_sort; k++) { + if (sort_owned[k] && sort_vecs[k] && !RAY_IS_ERR(sort_vecs[k])) + ray_release(sort_vecs[k]); + } + ray_release(idx_vec); + return result; + } + + /* Pre-allocate all output columns, then do a single fused gather pass */ + ray_pool_t* gather_pool = (gather_rows > RAY_PARALLEL_THRESHOLD) ? ray_pool_get() : NULL; + ray_t* new_cols[ncols]; + int64_t col_names[ncols]; + for (int64_t c = 0; c < ncols; c++) { + ray_t* col = ray_table_get_col_idx(tbl, c); + col_names[c] = ray_table_col_name(tbl, c); + if (!col) { new_cols[c] = NULL; continue; } + ray_t* nc; + if (col->type == RAY_LIST) { + /* LIST: element-wise gather with retain (not memcpy-safe) */ + nc = ray_list_new(gather_rows); + } else { + nc = col_vec_new(col, gather_rows); + } + if (!nc || RAY_IS_ERR(nc)) { + for (int64_t j = 0; j < c; j++) + if (new_cols[j]) ray_release(new_cols[j]); + ray_release(result); + if (sorted_keys_hdr) scratch_free(sorted_keys_hdr); + for (uint8_t k = 0; k < n_sort; k++) + if (sort_owned[k] && sort_vecs[k] && !RAY_IS_ERR(sort_vecs[k])) + ray_release(sort_vecs[k]); + ray_release(idx_vec); + return nc ? nc : ray_error("oom", NULL); + } + if (col->type == RAY_LIST) { + ray_t** src_ptrs = (ray_t**)ray_data(col); + ray_t** dst_ptrs = (ray_t**)ray_data(nc); + for (int64_t r = 0; r < gather_rows; r++) { + dst_ptrs[r] = src_ptrs[sorted_idx[r]]; + if (dst_ptrs[r]) ray_retain(dst_ptrs[r]); + } + } + nc->len = gather_rows; + new_cols[c] = nc; + } + + /* Decode-gather optimisation: decode the sort key column directly from + * sorted radix keys (sequential writes) instead of random-access gather. + * Only for single-key, non-SYM sorts where radix keys are available. */ + int64_t sort_key_sym = -1; + if (sorted_keys && n_sort == 1 && !RAY_IS_SYM(sort_vecs[0]->type)) { + ray_op_ext_t* key_ext = find_ext(g, ext->sort.columns[0]->id); + if (key_ext && key_ext->base.opcode == OP_SCAN) + sort_key_sym = key_ext->sym; + } + int64_t decode_col_idx = -1; + if (sort_key_sym >= 0) { + for (int64_t c = 0; c < ncols; c++) { + if (col_names[c] == sort_key_sym && new_cols[c]) { + decode_col_idx = c; + break; + } + } + } + + if (decode_col_idx >= 0) { + radix_decode_into(ray_data(new_cols[decode_col_idx]), + sort_vecs[0]->type, sorted_keys, + gather_rows, ext->sort.desc ? ext->sort.desc[0] : 0); + } + + /* Gather all columns using sorted indices, in batches of MGATHER_MAX_COLS. + * LIST columns are skipped here — they were gathered with retain above. */ + for (int64_t base = 0; base < ncols; ) { + char* g_srcs[MGATHER_MAX_COLS]; + char* g_dsts[MGATHER_MAX_COLS]; + uint8_t g_esz[MGATHER_MAX_COLS]; + int64_t g_nc = 0; + for (; base < ncols && g_nc < MGATHER_MAX_COLS; base++) { + if (!new_cols[base] || base == decode_col_idx) continue; + ray_t* col = ray_table_get_col_idx(tbl, base); + if (col->type == RAY_LIST) continue; + g_srcs[g_nc] = (char*)ray_data(col); + g_dsts[g_nc] = (char*)ray_data(new_cols[base]); + g_esz[g_nc] = col_esz(col); + g_nc++; + } + if (g_nc == 0) continue; + if (n_sort == 1) + partitioned_gather(gather_pool, sorted_idx, gather_rows, + nrows, g_srcs, g_dsts, g_esz, g_nc); + else { + multi_gather_ctx_t mg = { .idx = sorted_idx, .ncols = g_nc }; + for (int64_t i = 0; i < g_nc; i++) { + mg.srcs[i] = g_srcs[i]; + mg.dsts[i] = g_dsts[i]; + mg.esz[i] = g_esz[i]; + } + if (gather_pool) + ray_pool_dispatch(gather_pool, multi_gather_fn, &mg, + gather_rows); + else + multi_gather_fn(&mg, 0, 0, gather_rows); + } + } + + /* Propagate str_pool / sym_dict / null bitmaps from source columns */ + for (int64_t c = 0; c < ncols; c++) { + if (!new_cols[c]) continue; + ray_t* col = ray_table_get_col_idx(tbl, c); + if (!col) continue; + col_propagate_str_pool(new_cols[c], col); + /* sym_dict lives in bytes 8-15 of the header union, which also + * hold inline-nullmap bits and slice_offset. Only read it when + * the header layout actually exposes the sym_dict/ext_nullmap + * interpretation: no slice, and either no nulls or external + * nullmap. Otherwise those bytes are bitmap payload / slice + * metadata and dereferencing them hands ray_retain garbage. */ + if (col->type == RAY_SYM && + !(col->attrs & RAY_ATTR_SLICE) && + (!(col->attrs & RAY_ATTR_HAS_NULLS) || (col->attrs & RAY_ATTR_NULLMAP_EXT)) && + col->sym_dict) { + ray_retain(col->sym_dict); + new_cols[c]->sym_dict = col->sym_dict; + } + /* Gather null bits in sorted order */ + bool src_has_nulls = (col->attrs & RAY_ATTR_HAS_NULLS) || + ((col->attrs & RAY_ATTR_SLICE) && col->slice_parent && + (col->slice_parent->attrs & RAY_ATTR_HAS_NULLS)); + if (src_has_nulls) { + for (int64_t r = 0; r < gather_rows; r++) + if (ray_vec_is_null(col, sorted_idx[r])) + ray_vec_set_null(new_cols[c], r, true); + } + } + + for (int64_t c = 0; c < ncols; c++) { + if (!new_cols[c]) continue; + result = ray_table_add_col(result, col_names[c], new_cols[c]); + ray_release(new_cols[c]); + } + + /* Free sorted radix keys scratch buffer */ + if (sorted_keys_hdr) scratch_free(sorted_keys_hdr); + + /* Free expression-evaluated sort keys */ + for (uint8_t k = 0; k < n_sort; k++) { + if (sort_owned[k] && sort_vecs[k] && !RAY_IS_ERR(sort_vecs[k])) + ray_release(sort_vecs[k]); + } + + ray_release(idx_vec); + return result; +} + +/* ── Builtins ── */ + +/* (asc v) — sort vector ascending */ +ray_t* ray_asc_fn(ray_t* x) { + if (!x || RAY_IS_ERR(x)) return x; + if (ray_is_atom(x)) { ray_retain(x); return x; } + if (!ray_is_vec(x)) return ray_error("type", "asc expects a vector"); + int64_t n = ray_len(x); + if (n <= 1) { ray_retain(x); return x; } + uint8_t desc = 0; + return ray_sort(&x, &desc, NULL, 1, n); +} + +/* (desc v) — sort vector descending */ +ray_t* ray_desc_fn(ray_t* x) { + if (!x || RAY_IS_ERR(x)) return x; + if (ray_is_atom(x)) { ray_retain(x); return x; } + if (!ray_is_vec(x)) return ray_error("type", "desc expects a vector"); + int64_t n = ray_len(x); + if (n <= 1) { ray_retain(x); return x; } + uint8_t desc = 1; + return ray_sort(&x, &desc, NULL, 1, n); +} + +/* (iasc v) — ascending sort indices */ +ray_t* ray_iasc_fn(ray_t* x) { + if (!x || RAY_IS_ERR(x)) return x; + if (!ray_is_vec(x)) return ray_error("type", "iasc expects a vector"); + + int64_t n = ray_len(x); + uint8_t desc = 0; + return ray_sort_indices(&x, &desc, NULL, 1, n); +} + +/* (idesc v) — descending sort indices */ +ray_t* ray_idesc_fn(ray_t* x) { + if (!x || RAY_IS_ERR(x)) return x; + if (!ray_is_vec(x)) return ray_error("type", "idesc expects a vector"); + + int64_t n = ray_len(x); + uint8_t desc = 1; + return ray_sort_indices(&x, &desc, NULL, 1, n); +} + +/* (rank v) — rank positions (inverse permutation of iasc) */ +ray_t* ray_rank_fn(ray_t* x) { + if (!x || RAY_IS_ERR(x)) return x; + if (!ray_is_vec(x)) return ray_error("type", "rank expects a vector"); + + int64_t n = ray_len(x); + uint8_t desc = 0; + ray_t* idx = ray_sort_indices(&x, &desc, NULL, 1, n); + if (RAY_IS_ERR(idx)) return idx; + + ray_t* result = ray_vec_new(RAY_I64, n); + if (RAY_IS_ERR(result)) { ray_release(idx); return result; } + result->len = n; + + int64_t* idx_data = (int64_t*)ray_data(idx); + int64_t* rank_data = (int64_t*)ray_data(result); + for (int64_t i = 0; i < n; i++) + rank_data[idx_data[i]] = i; + + ray_release(idx); + return result; +} + +/* Helper: resolve key symbols to table columns for xasc/xdesc */ +ray_t* sort_table_by_keys(ray_t* tbl, ray_t* keys, uint8_t descending) { + if (!tbl || tbl->type != RAY_TABLE) + return ray_error("type", "xasc/xdesc expects a table as first argument"); + + /* keys can be a SYM atom, a SYM vector, or a list of SYM atoms */ + int64_t n_keys = 0; + int64_t key_ids[16]; + + if (keys->type == -RAY_SYM) { + /* Single symbol atom */ + key_ids[0] = keys->i64; + n_keys = 1; + } else if (keys->type == RAY_SYM) { + /* SYM vector */ + int64_t* syms = (int64_t*)ray_data(keys); + n_keys = ray_len(keys); + if (n_keys > 16) return ray_error("limit", "xasc/xdesc: max 16 key columns"); + for (int64_t i = 0; i < n_keys; i++) key_ids[i] = syms[i]; + } else if (is_list(keys)) { + /* List of symbol atoms */ + ray_t** elems = (ray_t**)ray_data(keys); + n_keys = ray_len(keys); + if (n_keys > 16) return ray_error("limit", "xasc/xdesc: max 16 key columns"); + for (int64_t i = 0; i < n_keys; i++) { + if (elems[i]->type != -RAY_SYM) + return ray_error("type", "xasc/xdesc key must be a symbol"); + key_ids[i] = elems[i]->i64; + } + } else { + return ray_error("type", "xasc/xdesc key must be a symbol or list of symbols"); + } + + if (n_keys == 0) { ray_retain(tbl); return tbl; } + + int64_t nrows = ray_table_nrows(tbl); + if (nrows <= 1) { ray_retain(tbl); return tbl; } + + /* Resolve key columns */ + ray_t* key_cols[16]; + for (int64_t i = 0; i < n_keys; i++) { + key_cols[i] = ray_table_get_col(tbl, key_ids[i]); + if (!key_cols[i]) + return ray_error("domain", "xasc/xdesc: key column not found in table"); + } + + /* Build descs array */ + uint8_t descs[16]; + for (int64_t i = 0; i < n_keys; i++) descs[i] = descending; + + uint64_t* sorted_keys = NULL; + ray_t* sorted_keys_hdr = NULL; + ray_t* idx = sort_indices_ex(key_cols, descs, NULL, (uint8_t)n_keys, nrows, + &sorted_keys, &sorted_keys_hdr); + if (RAY_IS_ERR(idx)) { + if (sorted_keys_hdr) scratch_free(sorted_keys_hdr); + return idx; + } + + int64_t* idx_data = (int64_t*)ray_data(idx); + int64_t ncols = ray_table_ncols(tbl); + + /* Pre-allocate all output columns, then do a parallel multi-column + * gather — same fast path exec_sort uses. LIST columns are gathered + * element-wise with retain; all other columns go through the + * partitioned_gather / multi_gather_fn paths. Null bits, str_pool, + * and sym_dict are propagated after the gather runs. + * + * Heap-allocate the per-column scratch arrays so the fast path + * handles arbitrarily wide tables — avoids a VLA stack blow-up + * and matches the pre-regression xasc behavior which supported + * any column count via gather_by_idx. */ + ray_pool_t* gather_pool = (nrows > RAY_PARALLEL_THRESHOLD) + ? ray_pool_get() : NULL; + + ray_t* nc_hdr = NULL; + ray_t** new_cols = (ray_t**)scratch_alloc(&nc_hdr, + (size_t)ncols * sizeof(ray_t*)); + ray_t* cn_hdr = NULL; + int64_t* col_names = (int64_t*)scratch_alloc(&cn_hdr, + (size_t)ncols * sizeof(int64_t)); + if (!new_cols || !col_names) { + if (nc_hdr) scratch_free(nc_hdr); + if (cn_hdr) scratch_free(cn_hdr); + if (sorted_keys_hdr) scratch_free(sorted_keys_hdr); + ray_release(idx); + return ray_error("oom", NULL); + } + for (int64_t c = 0; c < ncols; c++) new_cols[c] = NULL; + + for (int64_t c = 0; c < ncols; c++) { + ray_t* col = ray_table_get_col_idx(tbl, c); + col_names[c] = ray_table_col_name(tbl, c); + if (!col) continue; + ray_t* nc; + if (col->type == RAY_LIST) + nc = ray_list_new(nrows); + else + nc = col_vec_new(col, nrows); + if (!nc || RAY_IS_ERR(nc)) { + for (int64_t j = 0; j < c; j++) + if (new_cols[j]) ray_release(new_cols[j]); + scratch_free(nc_hdr); + scratch_free(cn_hdr); + if (sorted_keys_hdr) scratch_free(sorted_keys_hdr); + ray_release(idx); + return nc ? nc : ray_error("oom", NULL); + } + if (col->type == RAY_LIST) { + ray_t** src_ptrs = (ray_t**)ray_data(col); + ray_t** dst_ptrs = (ray_t**)ray_data(nc); + for (int64_t r = 0; r < nrows; r++) { + dst_ptrs[r] = src_ptrs[idx_data[r]]; + if (dst_ptrs[r]) ray_retain(dst_ptrs[r]); + } + } + nc->len = nrows; + new_cols[c] = nc; + } + + /* Decode sort key column directly from sorted radix keys when + * available — sequential write, much faster than random-access + * gather. Only for single-key sorts where sort_indices_ex + * produced sorted_keys (non-packed path). */ + int64_t decode_col_idx = -1; + if (sorted_keys && n_keys == 1 && !RAY_IS_SYM(key_cols[0]->type)) { + for (int64_t c = 0; c < ncols; c++) { + if (col_names[c] == key_ids[0] && new_cols[c]) { + decode_col_idx = c; + break; + } + } + } + if (decode_col_idx >= 0) { + radix_decode_into(ray_data(new_cols[decode_col_idx]), + key_cols[0]->type, sorted_keys, + nrows, descs[0]); + } + + /* Gather remaining non-LIST, non-decode columns in batches. + * Single-key sorts use the radix-partitioned gather; multi-key + * fallback to the multi_gather pool dispatch. */ + for (int64_t base = 0; base < ncols; ) { + char* g_srcs[MGATHER_MAX_COLS]; + char* g_dsts[MGATHER_MAX_COLS]; + uint8_t g_esz[MGATHER_MAX_COLS]; + int64_t g_nc = 0; + for (; base < ncols && g_nc < MGATHER_MAX_COLS; base++) { + if (!new_cols[base] || base == decode_col_idx) continue; + ray_t* col = ray_table_get_col_idx(tbl, base); + if (col->type == RAY_LIST) continue; + g_srcs[g_nc] = (char*)ray_data(col); + g_dsts[g_nc] = (char*)ray_data(new_cols[base]); + g_esz[g_nc] = col_esz(col); + g_nc++; + } + if (g_nc == 0) continue; + if (n_keys == 1) + partitioned_gather(gather_pool, idx_data, nrows, + nrows, g_srcs, g_dsts, g_esz, g_nc); + else { + multi_gather_ctx_t mg = { .idx = idx_data, .ncols = g_nc }; + for (int64_t i = 0; i < g_nc; i++) { + mg.srcs[i] = g_srcs[i]; + mg.dsts[i] = g_dsts[i]; + mg.esz[i] = g_esz[i]; + } + if (gather_pool) + ray_pool_dispatch(gather_pool, multi_gather_fn, &mg, nrows); + else + multi_gather_fn(&mg, 0, 0, nrows); + } + } + + /* Propagate str_pool / sym_dict / null bitmaps from source columns. + * Null propagation was the reason this function got rewritten in + * commit 87981c8; do it explicitly here instead of relying on + * gather_by_idx. */ + for (int64_t c = 0; c < ncols; c++) { + if (!new_cols[c]) continue; + ray_t* col = ray_table_get_col_idx(tbl, c); + if (!col) continue; + col_propagate_str_pool(new_cols[c], col); + /* sym_dict lives in bytes 8-15 of the header union, which also + * hold inline-nullmap bits and slice_offset. Only read it when + * the header layout actually exposes the sym_dict/ext_nullmap + * interpretation: no slice, and either no nulls or external + * nullmap. Otherwise those bytes are bitmap payload / slice + * metadata and dereferencing them hands ray_retain garbage. */ + if (col->type == RAY_SYM && + !(col->attrs & RAY_ATTR_SLICE) && + (!(col->attrs & RAY_ATTR_HAS_NULLS) || (col->attrs & RAY_ATTR_NULLMAP_EXT)) && + col->sym_dict) { + ray_retain(col->sym_dict); + new_cols[c]->sym_dict = col->sym_dict; + } + bool src_has_nulls = (col->attrs & RAY_ATTR_HAS_NULLS) || + ((col->attrs & RAY_ATTR_SLICE) && col->slice_parent && + (col->slice_parent->attrs & RAY_ATTR_HAS_NULLS)); + if (src_has_nulls) { + for (int64_t r = 0; r < nrows; r++) + if (ray_vec_is_null(col, idx_data[r])) + ray_vec_set_null(new_cols[c], r, true); + } + } + + /* Assemble result table */ + ray_t* result = ray_table_new(ncols); + if (!result || RAY_IS_ERR(result)) { + for (int64_t c = 0; c < ncols; c++) + if (new_cols[c]) ray_release(new_cols[c]); + scratch_free(nc_hdr); + scratch_free(cn_hdr); + if (sorted_keys_hdr) scratch_free(sorted_keys_hdr); + ray_release(idx); + return result ? result : ray_error("oom", NULL); + } + for (int64_t c = 0; c < ncols; c++) { + if (!new_cols[c]) continue; + result = ray_table_add_col(result, col_names[c], new_cols[c]); + ray_release(new_cols[c]); + } + + scratch_free(nc_hdr); + scratch_free(cn_hdr); + if (sorted_keys_hdr) scratch_free(sorted_keys_hdr); + ray_release(idx); + return result; +} + +/* (xasc tbl keys) — sort table ascending by key columns */ +ray_t* ray_xasc_fn(ray_t* tbl, ray_t* keys) { + return sort_table_by_keys(tbl, keys, 0); +} + +/* (xdesc tbl keys) — sort table descending by key columns */ +ray_t* ray_xdesc_fn(ray_t* tbl, ray_t* keys) { + return sort_table_by_keys(tbl, keys, 1); +} + +/* (xrank n vec) — cross-rank: assign each element to one of n groups + * based on its sorted position. Uses the same O(n log n) sort + * infrastructure as `rank` / `xasc` (radix-or-merge inside + * ray_sort_indices). Replaces a per-element ray_vec_get-based + * insertion sort that was both correctness-broken (the boxed elem + * came back with type=0 so the comparison degenerated to 0.0 ≤ 0.0 + * → always true → all elements bucketed into group 0) and + * algorithmically O(n^2). */ +ray_t* ray_xrank_fn(ray_t* n_obj, ray_t* vec) { + if (!is_numeric(n_obj)) + return ray_error("type", "xrank: first arg must be integer"); + if (!ray_is_vec(vec)) + return ray_error("type", "xrank: second arg must be a vector"); + + int64_t n_groups = as_i64(n_obj); + int64_t len = ray_len(vec); + if (n_groups <= 0 || len == 0) return ray_vec_new(RAY_I64, 0); + + uint8_t desc = 0; + ray_t* idx = ray_sort_indices(&vec, &desc, NULL, 1, len); + if (!idx || RAY_IS_ERR(idx)) return idx ? idx : ray_error("oom", NULL); + + ray_t* result = ray_vec_new(RAY_I64, len); + if (!result || RAY_IS_ERR(result)) { ray_release(idx); return result ? result : ray_error("oom", NULL); } + result->len = len; + const int64_t* idx_data = (const int64_t*)ray_data(idx); + int64_t* out = (int64_t*)ray_data(result); + for (int64_t i = 0; i < len; i++) + out[idx_data[i]] = i * n_groups / len; + ray_release(idx); + return result; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/string.c b/crates/rayforce-sys/vendor/rayforce/src/ops/string.c new file mode 100644 index 0000000..e943034 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/string.c @@ -0,0 +1,604 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ops/internal.h" +#include "ops/glob.h" + +/* ============================================================================ + * OP_LIKE: glob pattern matching on STR / SYM columns. See ops/glob.[ch]. + * Syntax: * (any), ? (one char), [abc] / [a-z] / [!abc] (character class). + * ============================================================================ */ + +ray_t* exec_like(ray_graph_t* g, ray_op_t* op) { + ray_t* input = exec_node(g, op->inputs[0]); + ray_t* pat_v = exec_node(g, op->inputs[1]); + if (!input || RAY_IS_ERR(input)) { if (pat_v && !RAY_IS_ERR(pat_v)) ray_release(pat_v); return input; } + if (!pat_v || RAY_IS_ERR(pat_v)) { ray_release(input); return pat_v; } + + /* Get pattern string */ + const char* pat_str = ray_str_ptr(pat_v); + size_t pat_len = ray_str_len(pat_v); + + int64_t len = input->len; + ray_t* result = ray_vec_new(RAY_BOOL, len); + if (!result || RAY_IS_ERR(result)) { + ray_release(input); ray_release(pat_v); + return result; + } + result->len = len; + uint8_t* dst = (uint8_t*)ray_data(result); + + int8_t in_type = input->type; + if (in_type == RAY_STR) { + const ray_str_t* elems; const char* pool; + str_resolve(input, &elems, &pool); + for (int64_t i = 0; i < len; i++) { + const char* sp = ray_str_t_ptr(&elems[i], pool); + size_t sl = elems[i].len; + dst[i] = ray_glob_match(sp, sl, pat_str, pat_len) ? 1 : 0; + } + } else if (RAY_IS_SYM(in_type)) { + const void* base = ray_data(input); + for (int64_t i = 0; i < len; i++) { + int64_t sym_id = ray_read_sym(base, i, in_type, input->attrs); + ray_t* s = ray_sym_str(sym_id); + if (!s) { dst[i] = 0; continue; } + const char* sp = ray_str_ptr(s); + size_t sl = ray_str_len(s); + dst[i] = ray_glob_match(sp, sl, pat_str, pat_len) ? 1 : 0; + } + } else { + memset(dst, 0, (size_t)len); + } + + ray_release(input); ray_release(pat_v); + return result; +} + +/* Case-insensitive LIKE — same syntax as `like`, ASCII-fold both sides. */ + +ray_t* exec_ilike(ray_graph_t* g, ray_op_t* op) { + ray_t* input = exec_node(g, op->inputs[0]); + ray_t* pat_v = exec_node(g, op->inputs[1]); + if (!input || RAY_IS_ERR(input)) { if (pat_v && !RAY_IS_ERR(pat_v)) ray_release(pat_v); return input; } + if (!pat_v || RAY_IS_ERR(pat_v)) { ray_release(input); return pat_v; } + + const char* pat_str = ray_str_ptr(pat_v); + size_t pat_len = ray_str_len(pat_v); + + int64_t len = input->len; + ray_t* result = ray_vec_new(RAY_BOOL, len); + if (!result || RAY_IS_ERR(result)) { + ray_release(input); ray_release(pat_v); + return result; + } + result->len = len; + uint8_t* dst = (uint8_t*)ray_data(result); + + int8_t in_type = input->type; + if (in_type == RAY_STR) { + const ray_str_t* elems; const char* pool; + str_resolve(input, &elems, &pool); + for (int64_t i = 0; i < len; i++) { + const char* sp = ray_str_t_ptr(&elems[i], pool); + size_t sl = elems[i].len; + dst[i] = ray_glob_match_ci(sp, sl, pat_str, pat_len) ? 1 : 0; + } + } else if (RAY_IS_SYM(in_type)) { + const void* base = ray_data(input); + for (int64_t i = 0; i < len; i++) { + int64_t sym_id = ray_read_sym(base, i, in_type, input->attrs); + ray_t* s = ray_sym_str(sym_id); + if (!s) { dst[i] = 0; continue; } + dst[i] = ray_glob_match_ci(ray_str_ptr(s), ray_str_len(s), pat_str, pat_len) ? 1 : 0; + } + } else { + memset(dst, 0, (size_t)len); + } + + ray_release(input); ray_release(pat_v); + return result; +} + +/* ============================================================================ + * String functions: UPPER, LOWER, TRIM, STRLEN, SUBSTR, REPLACE, CONCAT + * + * These functions call ray_sym_intern() per output row, which is + * O(n * sym_table_lookup) per string op. Acceptable for current workloads; + * could be optimized with batch interning if profiling shows a bottleneck. + * ============================================================================ */ + +/* UPPER / LOWER / TRIM — unary SYM/STR → SYM/STR */ +ray_t* exec_string_unary(ray_graph_t* g, ray_op_t* op) { + ray_t* input = exec_node(g, op->inputs[0]); + if (!input || RAY_IS_ERR(input)) return input; + + int64_t len = input->len; + bool is_str = (input->type == RAY_STR); + + ray_t* result; + if (is_str) { + result = ray_vec_new(RAY_STR, len); + } else { + result = ray_vec_new(RAY_SYM, len); + } + if (!result || RAY_IS_ERR(result)) { ray_release(input); return result; } + if (!is_str) result->len = len; + int64_t* sym_dst = is_str ? NULL : (int64_t*)ray_data(result); + + const ray_str_t* str_elems = NULL; + const char* str_pool = NULL; + if (is_str) str_resolve(input, &str_elems, &str_pool); + + uint16_t opc = op->opcode; + for (int64_t i = 0; i < len; i++) { + /* Propagate null */ + if (ray_vec_is_null((ray_t*)input, i)) { + if (is_str) { + result = ray_str_vec_append(result, "", 0); + if (RAY_IS_ERR(result)) break; + ray_vec_set_null(result, result->len - 1, true); + } else { + sym_dst[i] = 0; + ray_vec_set_null(result, i, true); + } + continue; + } + const char* sp; size_t sl; + if (is_str) { + sp = ray_str_t_ptr(&str_elems[i], str_pool); + sl = str_elems[i].len; + } else { + sym_elem(input, i, &sp, &sl); + } + + char sbuf[8192]; + char* buf = sbuf; + ray_t* dyn_hdr = NULL; + if (sl >= sizeof(sbuf)) { + buf = (char*)scratch_alloc(&dyn_hdr, sl + 1); + if (!buf) { + ray_release(result); + ray_release(input); + return ray_error("oom", NULL); + } + } + size_t out_len = sl; + if (opc == OP_UPPER) { + for (size_t j = 0; j < out_len; j++) buf[j] = (char)toupper((unsigned char)sp[j]); + } else if (opc == OP_LOWER) { + for (size_t j = 0; j < out_len; j++) buf[j] = (char)tolower((unsigned char)sp[j]); + } else { /* OP_TRIM */ + size_t start = 0, end = sl; + while (start < sl && isspace((unsigned char)sp[start])) start++; + while (end > start && isspace((unsigned char)sp[end - 1])) end--; + out_len = end - start; + memcpy(buf, sp + start, out_len); + } + + if (is_str) { + ray_t* prev = result; + result = ray_str_vec_append(result, buf, out_len); + if (RAY_IS_ERR(result)) { ray_release(prev); scratch_free(dyn_hdr); break; } + } else { + buf[out_len] = '\0'; + sym_dst[i] = ray_sym_intern(buf, out_len); + } + scratch_free(dyn_hdr); + } + ray_release(input); + return result; +} + +/* LENGTH — SYM → I64 */ +ray_t* exec_strlen(ray_graph_t* g, ray_op_t* op) { + ray_t* input = exec_node(g, op->inputs[0]); + if (!input || RAY_IS_ERR(input)) return input; + + int64_t len = input->len; + ray_t* result = ray_vec_new(RAY_I64, len); + if (!result || RAY_IS_ERR(result)) { ray_release(input); return result; } + result->len = len; + int64_t* dst = (int64_t*)ray_data(result); + + if (input->type == RAY_STR) { + const ray_str_t* elems; const char* pool; + str_resolve(input, &elems, &pool); + for (int64_t i = 0; i < len; i++) { + if (ray_vec_is_null((ray_t*)input, i)) { + dst[i] = 0; + ray_vec_set_null(result, i, true); + continue; + } + dst[i] = (int64_t)elems[i].len; + } + } else { + for (int64_t i = 0; i < len; i++) { + if (ray_vec_is_null((ray_t*)input, i)) { + dst[i] = 0; + ray_vec_set_null(result, i, true); + continue; + } + const char* sp; size_t sl; + sym_elem(input, i, &sp, &sl); + dst[i] = (int64_t)sl; + } + } + ray_release(input); + return result; +} + +/* SUBSTR(str, start, len) — 1-based start */ +ray_t* exec_substr(ray_graph_t* g, ray_op_t* op) { + ray_t* input = exec_node(g, op->inputs[0]); + ray_t* start_v = exec_node(g, op->inputs[1]); + if (!input || RAY_IS_ERR(input)) { if (start_v && !RAY_IS_ERR(start_v)) ray_release(start_v); return input; } + if (!start_v || RAY_IS_ERR(start_v)) { ray_release(input); return start_v; } + + /* Get len arg from ext node's literal field */ + ray_op_ext_t* ext = find_ext(g, op->id); + uint32_t len_id = (uint32_t)(uintptr_t)ext->literal; + ray_t* len_v = exec_node(g, &g->nodes[len_id]); + if (!len_v || RAY_IS_ERR(len_v)) { ray_release(input); ray_release(start_v); return len_v; } + + int64_t nrows = input->len; + bool is_str = (input->type == RAY_STR); + + ray_t* result; + if (is_str) { + result = ray_vec_new(RAY_STR, nrows); + } else { + result = ray_vec_new(RAY_SYM, nrows); + } + if (!result || RAY_IS_ERR(result)) { ray_release(input); ray_release(start_v); ray_release(len_v); return result; } + if (!is_str) result->len = nrows; + int64_t* sym_dst = is_str ? NULL : (int64_t*)ray_data(result); + + const ray_str_t* str_elems = NULL; + const char* str_pool = NULL; + if (is_str) str_resolve(input, &str_elems, &str_pool); + + /* start_v and len_v may be atom scalars or vectors. + * Handle RAY_I32 vectors correctly (read as int32_t, not int64_t). */ + int64_t s_scalar = 0, l_scalar = 0; + const int64_t* s_data = NULL; + const int64_t* l_data = NULL; + const int32_t* s_data_i32 = NULL; + const int32_t* l_data_i32 = NULL; + if (start_v->type == -RAY_I64) s_scalar = start_v->i64; + else if (start_v->type == -RAY_F64) s_scalar = (int64_t)start_v->f64; + else if (start_v->len == 1) { + if (start_v->type == RAY_F64) + s_scalar = (int64_t)((double*)ray_data(start_v))[0]; + else if (start_v->type == RAY_I32) + s_scalar = (int64_t)((int32_t*)ray_data(start_v))[0]; + else + s_scalar = ((int64_t*)ray_data(start_v))[0]; + } + else if (start_v->type == RAY_I32) s_data_i32 = (const int32_t*)ray_data(start_v); + else s_data = (const int64_t*)ray_data(start_v); + if (len_v->type == -RAY_I64) l_scalar = len_v->i64; + else if (len_v->type == -RAY_F64) l_scalar = (int64_t)len_v->f64; + else if (len_v->len == 1) { + if (len_v->type == RAY_F64) + l_scalar = (int64_t)((double*)ray_data(len_v))[0]; + else if (len_v->type == RAY_I32) + l_scalar = (int64_t)((int32_t*)ray_data(len_v))[0]; + else + l_scalar = ((int64_t*)ray_data(len_v))[0]; + } + else if (len_v->type == RAY_I32) l_data_i32 = (const int32_t*)ray_data(len_v); + else l_data = (const int64_t*)ray_data(len_v); + + for (int64_t i = 0; i < nrows; i++) { + /* Propagate null — from input, start, or length */ + if (ray_vec_is_null((ray_t*)input, i) || + ((s_data || s_data_i32) && ray_vec_is_null((ray_t*)start_v, i)) || + ((l_data || l_data_i32) && ray_vec_is_null((ray_t*)len_v, i))) { + if (is_str) { + result = ray_str_vec_append(result, "", 0); + if (RAY_IS_ERR(result)) break; + ray_vec_set_null(result, result->len - 1, true); + } else { + sym_dst[i] = 0; + ray_vec_set_null(result, i, true); + } + continue; + } + const char* sp; size_t sl; + if (is_str) { + sp = ray_str_t_ptr(&str_elems[i], str_pool); + sl = str_elems[i].len; + } else { + sym_elem(input, i, &sp, &sl); + } + int64_t st = (s_data ? s_data[i] : s_data_i32 ? (int64_t)s_data_i32[i] : s_scalar) - 1; /* 1-based → 0-based */ + int64_t ln = l_data ? l_data[i] : l_data_i32 ? (int64_t)l_data_i32[i] : l_scalar; + if (st < 0) st = 0; + if ((size_t)st >= sl) { + if (is_str) { + result = ray_str_vec_append(result, "", 0); + if (RAY_IS_ERR(result)) break; + } + else { sym_dst[i] = ray_sym_intern("", 0); } + continue; + } + if (ln < 0 || ln > (int64_t)(sl - (size_t)st)) ln = (int64_t)sl - st; + if (is_str) { + result = ray_str_vec_append(result, sp + st, (size_t)ln); + if (RAY_IS_ERR(result)) break; + } else { + sym_dst[i] = ray_sym_intern(sp + st, (size_t)ln); + } + } + ray_release(input); ray_release(start_v); ray_release(len_v); + return result; +} + +/* REPLACE(str, from, to) */ +ray_t* exec_replace(ray_graph_t* g, ray_op_t* op) { + ray_t* input = exec_node(g, op->inputs[0]); + ray_t* from_v = exec_node(g, op->inputs[1]); + if (!input || RAY_IS_ERR(input)) { if (from_v && !RAY_IS_ERR(from_v)) ray_release(from_v); return input; } + if (!from_v || RAY_IS_ERR(from_v)) { ray_release(input); return from_v; } + + ray_op_ext_t* ext = find_ext(g, op->id); + uint32_t to_id = (uint32_t)(uintptr_t)ext->literal; + ray_t* to_v = exec_node(g, &g->nodes[to_id]); + if (!to_v || RAY_IS_ERR(to_v)) { ray_release(input); ray_release(from_v); return to_v; } + + /* from_v and to_v should be string constants (SYM atoms) */ + const char* from_str = ray_str_ptr(from_v); + size_t from_len = ray_str_len(from_v); + const char* to_str = ray_str_ptr(to_v); + size_t to_len = ray_str_len(to_v); + + int64_t nrows = input->len; + bool is_str = (input->type == RAY_STR); + + ray_t* result; + if (is_str) { + result = ray_vec_new(RAY_STR, nrows); + } else { + result = ray_vec_new(RAY_SYM, nrows); + } + if (!result || RAY_IS_ERR(result)) { ray_release(input); ray_release(from_v); ray_release(to_v); return result; } + if (!is_str) result->len = nrows; + int64_t* sym_dst = is_str ? NULL : (int64_t*)ray_data(result); + + const ray_str_t* str_elems = NULL; + const char* str_pool = NULL; + if (is_str) str_resolve(input, &str_elems, &str_pool); + + for (int64_t i = 0; i < nrows; i++) { + /* Propagate null */ + if (ray_vec_is_null((ray_t*)input, i)) { + if (is_str) { + result = ray_str_vec_append(result, "", 0); + if (RAY_IS_ERR(result)) break; + ray_vec_set_null(result, result->len - 1, true); + } else { + sym_dst[i] = 0; + ray_vec_set_null(result, i, true); + } + continue; + } + const char* sp; size_t sl; + if (is_str) { + sp = ray_str_t_ptr(&str_elems[i], str_pool); + sl = str_elems[i].len; + } else { + sym_elem(input, i, &sp, &sl); + } + /* Simple find-and-replace-all */ + /* Worst case: every char is a match, each replaced by to_len bytes. + * Guard against size_t overflow when to_len >> from_len. */ + size_t n_matches = (from_len > 0) ? sl / from_len : 0; + size_t worst; + if (from_len > 0 && to_len > from_len && n_matches > SIZE_MAX / to_len) { + worst = SIZE_MAX; /* overflow → cap at max; scratch_alloc will OOM */ + } else if (from_len > 0 && to_len >= from_len) { + /* Expanding or same-size: max output when every chunk matches */ + worst = n_matches * to_len + (sl % from_len) + 1; + } else { + /* Shrinking or from_len==0: max output when nothing matches → sl */ + worst = sl + 1; + } + char sbuf[8192]; + char* buf = sbuf; + ray_t* dyn_hdr = NULL; + if (worst > sizeof(sbuf)) { + buf = (char*)scratch_alloc(&dyn_hdr, worst); + if (!buf) { + ray_release(result); + ray_release(input); ray_release(from_v); ray_release(to_v); + return ray_error("oom", NULL); + } + } + size_t buf_cap = dyn_hdr ? worst : sizeof(sbuf); + size_t bi = 0; + for (size_t j = 0; j < sl; ) { + if (from_len > 0 && j + from_len <= sl && memcmp(sp + j, from_str, from_len) == 0) { + if (bi + to_len < buf_cap) { memcpy(buf + bi, to_str, to_len); bi += to_len; } + j += from_len; + } else { + if (bi < buf_cap - 1) buf[bi++] = sp[j]; + j++; + } + } + if (is_str) { + ray_t* prev = result; + result = ray_str_vec_append(result, buf, bi); + if (RAY_IS_ERR(result)) { ray_release(prev); scratch_free(dyn_hdr); break; } + } else { + buf[bi] = '\0'; + sym_dst[i] = ray_sym_intern(buf, bi); + } + scratch_free(dyn_hdr); + } + ray_release(input); ray_release(from_v); ray_release(to_v); + return result; +} + +/* CONCAT(a, b, ...) */ +ray_t* exec_concat(ray_graph_t* g, ray_op_t* op) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + int64_t raw_nargs = ext->sym; + if (raw_nargs < 2 || raw_nargs > 255) return ray_error("domain", NULL); + int n_args = (int)raw_nargs; + + /* Evaluate all inputs */ + ray_t* args_stack[16]; + ray_t** args = args_stack; + ray_t* args_hdr = NULL; + if (n_args > 16) { + args = (ray_t**)scratch_calloc(&args_hdr, (size_t)n_args * sizeof(ray_t*)); + if (!args) return ray_error("oom", NULL); + } + + args[0] = exec_node(g, op->inputs[0]); + args[1] = exec_node(g, op->inputs[1]); + uint32_t* trail = (uint32_t*)((char*)(ext + 1)); + for (int i = 2; i < n_args; i++) { + args[i] = exec_node(g, &g->nodes[trail[i - 2]]); + } + /* Error check */ + for (int i = 0; i < n_args; i++) { + if (!args[i] || RAY_IS_ERR(args[i])) { + ray_t* err = args[i]; + for (int j = 0; j < n_args; j++) { + if (j != i && args[j] && !RAY_IS_ERR(args[j])) ray_release(args[j]); + } + scratch_free(args_hdr); + return err; + } + } + + /* Derive nrows from first vector arg (scalar args have byte-length in len) */ + int64_t nrows = 1; + bool out_str = false; + for (int a = 0; a < n_args; a++) { + int8_t at = args[a]->type; + if (at == RAY_STR) { out_str = true; if (nrows == 1) nrows = args[a]->len; } + if (RAY_IS_SYM(at)) { if (nrows == 1) nrows = args[a]->len; } + if (!ray_is_atom(args[a]) && nrows == 1) { nrows = args[a]->len; } + } + ray_t* result = ray_vec_new(out_str ? RAY_STR : RAY_SYM, nrows); + if (!result || RAY_IS_ERR(result)) { + for (int i = 0; i < n_args; i++) ray_release(args[i]); + scratch_free(args_hdr); + return result; + } + if (!out_str) result->len = nrows; + int64_t* dst = out_str ? NULL : (int64_t*)ray_data(result); + + for (int64_t r = 0; r < nrows; r++) { + /* Check if any arg is null at this row */ + bool any_null = false; + for (int a = 0; a < n_args; a++) { + if (ray_is_atom(args[a])) { + if (RAY_ATOM_IS_NULL(args[a])) { any_null = true; break; } + } else if (ray_vec_is_null((ray_t*)args[a], r < args[a]->len ? r : 0)) { + any_null = true; + break; + } + } + if (any_null) { + if (out_str) { + result = ray_str_vec_append(result, "", 0); + if (RAY_IS_ERR(result)) break; + ray_vec_set_null(result, result->len - 1, true); + } else { + dst[r] = 0; + ray_vec_set_null(result, r, true); + } + continue; + } + /* Pre-scan to compute total concat length for this row */ + size_t total = 0; + for (int a = 0; a < n_args; a++) { + int8_t t = args[a]->type; + if (t == RAY_STR) { + const ray_str_t* elems; const char* p; + str_resolve(args[a], &elems, &p); + int64_t ar = ray_is_atom(args[a]) ? 0 : (r < args[a]->len ? r : 0); + total += elems[ar].len; + } else if (RAY_IS_SYM(t)) { + const char* sp; size_t sl; + int64_t ar = ray_is_atom(args[a]) ? 0 : (r < args[a]->len ? r : 0); + sym_elem(args[a], ar, &sp, &sl); + total += sl; + } else if (t == -RAY_STR) { + total += ray_str_len(args[a]); + } + } + char sbuf[8192]; + char* buf = sbuf; + ray_t* dyn_hdr = NULL; + size_t buf_cap = sizeof(sbuf); + if (total >= sizeof(sbuf)) { + buf = (char*)scratch_alloc(&dyn_hdr, total + 1); + if (!buf) { + ray_release(result); + for (int i = 0; i < n_args; i++) ray_release(args[i]); + scratch_free(args_hdr); + return ray_error("oom", NULL); + } + buf_cap = total + 1; + } + size_t bi = 0; + for (int a = 0; a < n_args; a++) { + int8_t t = args[a]->type; + if (t == RAY_STR) { + const ray_str_t* elems; const char* pool; + str_resolve(args[a], &elems, &pool); + int64_t ar = ray_is_atom(args[a]) ? 0 : (r < args[a]->len ? r : 0); + const char* sp = ray_str_t_ptr(&elems[ar], pool); + size_t sl = elems[ar].len; + if (bi + sl < buf_cap) { memcpy(buf + bi, sp, sl); bi += sl; } + } else if (RAY_IS_SYM(t)) { + const char* sp; size_t sl; + int64_t ar = ray_is_atom(args[a]) ? 0 : (r < args[a]->len ? r : 0); + sym_elem(args[a], ar, &sp, &sl); + if (bi + sl < buf_cap) { memcpy(buf + bi, sp, sl); bi += sl; } + } else if (t == -RAY_STR) { + const char* sp = ray_str_ptr(args[a]); + size_t sl = ray_str_len(args[a]); + if (sp && bi + sl < buf_cap) { memcpy(buf + bi, sp, sl); bi += sl; } + } + } + if (out_str) { + ray_t* prev = result; + result = ray_str_vec_append(result, buf, bi); + if (RAY_IS_ERR(result)) { ray_release(prev); scratch_free(dyn_hdr); break; } + } else { + buf[bi] = '\0'; + dst[r] = ray_sym_intern(buf, bi); + } + scratch_free(dyn_hdr); + } + for (int i = 0; i < n_args; i++) ray_release(args[i]); + scratch_free(args_hdr); + return result; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/strop.c b/crates/rayforce-sys/vendor/rayforce/src/ops/strop.c new file mode 100644 index 0000000..9744398 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/strop.c @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "lang/internal.h" +#include "table/sym.h" +#include "ops/glob.h" + +/* ══════════════════════════════════════════ + * String builtins + * ══════════════════════════════════════════ */ + +ray_t* ray_split_fn(ray_t* str, ray_t* delim) { + /* List split: (split list indices) → list of sub-lists */ + if (str->type == RAY_LIST && + ray_is_vec(delim) && (delim->type == RAY_I64 || delim->type == RAY_I16 || delim->type == RAY_I32)) { + int64_t nidx = delim->len; + if (nidx == 0) return NULL; /* null for empty indices */ + int64_t idx_buf[256]; + if (nidx > 256) return ray_error("limit", NULL); + for (int64_t ii = 0; ii < nidx; ii++) { + int alloc = 0; + ray_t* ie = collection_elem(delim, ii, &alloc); + idx_buf[ii] = as_i64(ie); + if (alloc) ray_release(ie); + } + int64_t total = str->len; + ray_t** items = (ray_t**)ray_data(str); + ray_t* result = ray_list_new(nidx + 1); + if (RAY_IS_ERR(result)) return result; + for (int64_t i = 0; i < nidx; i++) { + int64_t start = idx_buf[i]; + int64_t end = (i + 1 < nidx) ? idx_buf[i + 1] : total; + int64_t seglen = end - start; + if (seglen < 0) seglen = 0; + /* Try to make a typed vector if all elements are same type */ + if (seglen > 0) { + int8_t first_type = items[start]->type; + int all_same = 1; + for (int64_t j = start + 1; j < start + seglen && j < total; j++) { + if (items[j]->type != first_type) { all_same = 0; break; } + } + if (all_same && first_type < 0 && first_type != -RAY_STR) { + int8_t vtype = -first_type; + ray_t* vec = ray_vec_new(vtype, seglen); + if (!RAY_IS_ERR(vec)) { + vec->len = seglen; + for (int64_t j = 0; j < seglen && start + j < total; j++) + store_typed_elem(vec, j, items[start + j]); + result = ray_list_append(result, vec); + ray_release(vec); + if (RAY_IS_ERR(result)) return result; + continue; + } + } + } + /* Heterogeneous or string segment: make a sub-list */ + ray_t* seg = ray_list_new(seglen); + if (RAY_IS_ERR(seg)) { ray_release(result); return seg; } + for (int64_t j = 0; j < seglen && start + j < total; j++) { + ray_retain(items[start + j]); + seg = ray_list_append(seg, items[start + j]); + ray_release(items[start + j]); + if (RAY_IS_ERR(seg)) { ray_release(result); return seg; } + } + result = ray_list_append(result, seg); + ray_release(seg); + if (RAY_IS_ERR(result)) return result; + } + return result; + } + /* Vector/string split: (split vec/str indices) → list of sub-vectors/substrings */ + if ((ray_is_vec(str) || (ray_is_atom(str) && (-str->type) == RAY_STR)) && + ray_is_vec(delim) && (delim->type == RAY_I64 || delim->type == RAY_I16 || delim->type == RAY_I32)) { + int64_t nidx = delim->len; + if (nidx == 0) return NULL; /* null for empty indices */ + /* Extract indices as i64 */ + int64_t idx_buf[256]; + if (nidx > 256) return ray_error("limit", NULL); + for (int64_t ii = 0; ii < nidx; ii++) { + int alloc = 0; + ray_t* ie = collection_elem(delim, ii, &alloc); + idx_buf[ii] = as_i64(ie); + if (alloc) ray_release(ie); + } + /* String split by indices */ + if (ray_is_atom(str) && (-str->type) == RAY_STR) { + const char* sp2 = ray_str_ptr(str); + size_t total = ray_str_len(str); + ray_t* result = ray_list_new(nidx + 1); + if (RAY_IS_ERR(result)) return result; + for (int64_t i = 0; i < nidx; i++) { + int64_t start = idx_buf[i]; + int64_t end = (i + 1 < nidx) ? idx_buf[i + 1] : (int64_t)total; + int64_t seglen = end - start; + if (seglen < 0) seglen = 0; + if (start > (int64_t)total) start = (int64_t)total; + if (start + seglen > (int64_t)total) seglen = (int64_t)total - start; + ray_t* seg = ray_str(sp2 + start, (size_t)seglen); + if (RAY_IS_ERR(seg)) { ray_release(result); return seg; } + result = ray_list_append(result, seg); + ray_release(seg); + if (RAY_IS_ERR(result)) return result; + } + return result; + } + /* Vector split by indices */ + int64_t total = str->len; + int esz = ray_elem_size(str->type); + ray_t* result = ray_list_new(nidx + 1); + if (RAY_IS_ERR(result)) return result; + for (int64_t i = 0; i < nidx; i++) { + int64_t start = idx_buf[i]; + int64_t end = (i + 1 < nidx) ? idx_buf[i + 1] : total; + int64_t seglen = end - start; + if (seglen < 0) seglen = 0; + ray_t* seg = ray_vec_new(str->type, seglen); + if (RAY_IS_ERR(seg)) { ray_release(result); return seg; } + seg->len = seglen; + if (seglen > 0) memcpy(ray_data(seg), (char*)ray_data(str) + start * esz, seglen * esz); + result = ray_list_append(result, seg); + ray_release(seg); + if (RAY_IS_ERR(result)) return result; + } + return result; + } + /* Normalize str and delim to string pointers */ + const char *sp, *dp; + size_t slen, dlen; + ray_t* sym_str_s = NULL; + ray_t* sym_str_d = NULL; + if (str->type == -RAY_STR) { sp = ray_str_ptr(str); slen = ray_str_len(str); } + else if (str->type == -RAY_SYM) { sym_str_s = ray_sym_str(str->i64); if (!sym_str_s) return ray_error("domain", NULL); sp = ray_str_ptr(sym_str_s); slen = ray_str_len(sym_str_s); } + /* RAY_CHAR removed — all chars are now -RAY_STR */ + else return ray_error("type", NULL); + if (delim->type == -RAY_STR) { dp = ray_str_ptr(delim); dlen = ray_str_len(delim); } + /* RAY_CHAR removed — all chars are now -RAY_STR */ + else { if (sym_str_s) ray_release(sym_str_s); return ray_error("type", NULL); } + + ray_t* result = ray_list_new(8); + if (RAY_IS_ERR(result)) { if (sym_str_s) ray_release(sym_str_s); if (sym_str_d) ray_release(sym_str_d); return result; } + + if (dlen == 0 || slen == 0) { + ray_t* part = ray_str(sp, slen); + result = ray_list_append(result, part); + ray_release(part); + if (sym_str_s) ray_release(sym_str_s); + if (sym_str_d) ray_release(sym_str_d); + return result; + } + + size_t start = 0; + for (size_t i = 0; i <= slen - dlen; ) { + if (memcmp(sp + i, dp, dlen) == 0) { + ray_t* part = ray_str(sp + start, i - start); + if (RAY_IS_ERR(part)) { ray_release(result); if (sym_str_s) ray_release(sym_str_s); if (sym_str_d) ray_release(sym_str_d); return part; } + result = ray_list_append(result, part); + ray_release(part); + if (RAY_IS_ERR(result)) { if (sym_str_s) ray_release(sym_str_s); if (sym_str_d) ray_release(sym_str_d); return result; } + i += dlen; + start = i; + } else { + i++; + } + } + /* Last part */ + ray_t* part = ray_str(sp + start, slen - start); + if (RAY_IS_ERR(part)) { ray_release(result); if (sym_str_s) ray_release(sym_str_s); if (sym_str_d) ray_release(sym_str_d); return part; } + result = ray_list_append(result, part); + ray_release(part); + if (sym_str_s) ray_release(sym_str_s); + if (sym_str_d) ray_release(sym_str_d); + return result; +} + +/* (like str pattern) — glob-style pattern matching. + * Syntax: * (any), ? (one char), [abc] / [a-z] / [!abc] (char class). + * Implementation lives in src/ops/glob.[ch]; same matcher is used by + * the DAG executor (string.c::exec_like) for select-where contexts. */ +ray_t* ray_like_fn(ray_t* x, ray_t* pattern) { + /* Pattern must be a string atom */ + if (pattern->type != -RAY_STR) return ray_error("type", "like: pattern must be a string"); + const char* pat = ray_str_ptr(pattern); + size_t pat_len = ray_str_len(pattern); + + /* Atom: single match */ + if (x->type == -RAY_STR || x->type == -RAY_SYM) { + const char* s; size_t sl; + ray_t* sym_str = NULL; + if (x->type == -RAY_SYM) { + sym_str = ray_sym_str(x->i64); + s = sym_str ? ray_str_ptr(sym_str) : ""; + sl = sym_str ? ray_str_len(sym_str) : 0; + } else { + s = ray_str_ptr(x); + sl = ray_str_len(x); + } + bool m = ray_glob_match(s, sl, pat, pat_len); + if (sym_str) ray_release(sym_str); + return make_bool(m ? 1 : 0); + } + + /* Vector: map over elements */ + if (ray_is_vec(x) && (x->type == RAY_SYM || x->type == RAY_STR)) { + int64_t n = ray_len(x); + ray_t* result = ray_vec_new(RAY_BOOL, n); + if (RAY_IS_ERR(result)) return result; + result->len = n; + uint8_t* out = (uint8_t*)ray_data(result); + + if (x->type == RAY_SYM) { + int64_t* sym_ids = (int64_t*)ray_data(x); + for (int64_t i = 0; i < n; i++) { + ray_t* sym_str = ray_sym_str(sym_ids[i]); + const char* s = sym_str ? ray_str_ptr(sym_str) : ""; + size_t sl = sym_str ? ray_str_len(sym_str) : 0; + out[i] = ray_glob_match(s, sl, pat, pat_len) ? 1 : 0; + if (sym_str) ray_release(sym_str); + } + } else { + /* RAY_STR vector */ + for (int64_t i = 0; i < n; i++) { + size_t slen; + const char* s = ray_str_vec_get(x, i, &slen); + out[i] = (s && ray_glob_match(s, slen, pat, pat_len)) ? 1 : 0; + } + } + return result; + } + + return ray_error("type", "like: expects string or symbol"); +} + +ray_t* ray_sym_name_fn(ray_t* x) { + if (x->type == -RAY_I64) { + if (x->i64 < 0 || !ray_sym_str(x->i64)) + return ray_error("domain", "sym-name: invalid sym ID"); + return ray_sym(x->i64); + } + if (x->type == RAY_I64) { + int64_t n = x->len; + const int64_t* data = (const int64_t*)ray_data(x); + /* Validate all IDs first */ + for (int64_t i = 0; i < n; i++) { + if (data[i] < 0 || !ray_sym_str(data[i])) + return ray_error("domain", "sym-name: invalid sym ID in vector"); + } + ray_t* out = ray_vec_new(RAY_SYM, n); + if (RAY_IS_ERR(out)) return out; + for (int64_t i = 0; i < n; i++) { + out = ray_vec_append(out, &data[i]); + if (RAY_IS_ERR(out)) return out; + } + return out; + } + /* Already sym (atom or vector), or empty I64/SYM vector — passthrough */ + if (x->type == -RAY_SYM || x->type == RAY_SYM || + ((x->type == RAY_I64 || x->type == RAY_SYM) && x->len == 0)) { + ray_retain(x); return x; + } + return ray_error("type", "sym-name: expected i64 or i64 vector"); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/system.c b/crates/rayforce-sys/vendor/rayforce/src/ops/system.c new file mode 100644 index 0000000..43f5d92 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/system.c @@ -0,0 +1,827 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "lang/internal.h" +#include "lang/env.h" +#include "lang/parse.h" +#include "mem/heap.h" +#include "store/serde.h" +#include "store/splay.h" +#include "store/part.h" +#include "core/ipc.h" +#include +#include +#include +#include +#if !defined(RAY_OS_WINDOWS) +#include +#endif + +/* ══════════════════════════════════════════ + * Serialization / storage + * ══════════════════════════════════════════ */ + +/* (ser val) -> serialize to U8 vector with IPC header */ +ray_t* ray_ser_fn(ray_t* val) { + return ray_ser(val); +} + +/* (de bytes) -> deserialize from U8 vector */ +ray_t* ray_de_fn(ray_t* val) { + return ray_de(val); +} + +/* Build default sym path: dir/sym. Returns NULL if file does not exist. */ +static const char* splay_default_sym(const char* dir, char* buf, size_t bufsz, + bool must_exist) { + int n = snprintf(buf, bufsz, "%s/sym", dir); + if (n < 0 || (size_t)n >= bufsz) return NULL; + if (must_exist && access(buf, F_OK) != 0) return NULL; + return buf; +} + +/* Helper: extract null-terminated path from a STR atom into a stack buffer. + * Returns pointer to buf on success, NULL on failure. */ +static const char* str_to_cpath(ray_t* s, char* buf, size_t bufsz) { + if (!s || s->type != -RAY_STR) return NULL; + const char* p = ray_str_ptr(s); + size_t len = ray_str_len(s); + if (!p || len == 0 || len >= bufsz) return NULL; + memcpy(buf, p, len); + buf[len] = '\0'; + return buf; +} + +/* (.db.splayed.set "dir" table) or (.db.splayed.set "dir" table "sym_path") */ +ray_t* ray_set_splayed_fn(ray_t** args, int64_t n) { + if (n < 2 || n > 3) return ray_error("domain", NULL); + + char dir[1024]; + if (!str_to_cpath(args[0], dir, sizeof(dir))) return ray_error("type", NULL); + + ray_t* tbl = args[1]; + if (!tbl || tbl->type != RAY_TABLE) return ray_error("type", NULL); + + char sym[1024]; + const char* sym_path = NULL; + if (n == 3 && args[2] && args[2]->type == -RAY_STR) + sym_path = str_to_cpath(args[2], sym, sizeof(sym)); + else + sym_path = splay_default_sym(dir, sym, sizeof(sym), false); + + ray_err_t err = ray_splay_save(tbl, dir, sym_path); + if (err != RAY_OK) return ray_error(ray_err_code_str(err), NULL); + + ray_retain(tbl); + return tbl; +} + +/* (.db.splayed.get "dir") or (.db.splayed.get "dir" "sym_path") */ +ray_t* ray_get_splayed_fn(ray_t** args, int64_t n) { + if (n < 1 || n > 2) return ray_error("domain", NULL); + + char dir[1024]; + if (!str_to_cpath(args[0], dir, sizeof(dir))) return ray_error("type", NULL); + + char sym[1024]; + const char* sym_path = NULL; + if (n == 2 && args[1] && args[1]->type == -RAY_STR) + sym_path = str_to_cpath(args[1], sym, sizeof(sym)); + else + sym_path = splay_default_sym(dir, sym, sizeof(sym), true); + + return ray_splay_load(dir, sym_path); +} + +/* (.db.parted.get "db_root" `table_name) -- load partitioned table */ +ray_t* ray_get_parted_fn(ray_t** args, int64_t n) { + if (n != 2) return ray_error("domain", NULL); + + char root[1024]; + if (!str_to_cpath(args[0], root, sizeof(root))) return ray_error("type", NULL); + + /* Table name as symbol atom */ + if (!args[1] || args[1]->type != -RAY_SYM) return ray_error("type", NULL); + ray_t* name_atom = ray_sym_str(args[1]->i64); + if (!name_atom) return ray_error("name", NULL); + + char name[256]; + size_t nlen = ray_str_len(name_atom); + if (nlen == 0 || nlen >= sizeof(name)) return ray_error("domain", NULL); + memcpy(name, ray_str_ptr(name_atom), nlen); + name[nlen] = '\0'; + + return ray_read_parted(root, name); +} + +/* ══════════════════════════════════════════ + * Mount helpers (.db.splayed.mount / .db.parted.mount). + * + * `mount` walks a root directory, identifies child tables, loads each, + * binds it as a global named after the directory entry, and returns + * a `name → table` dict so callers can introspect what was loaded + * without re-scanning the filesystem. Mirrors kdb's `\l /tmp/db/` + * but split into format-specific entry points so the discovery + * heuristics can be tighter (splayed: presence of `.d` schema; + * parted: presence of partition directories matching digit/dot). + * ══════════════════════════════════════════ */ + +#include +#include + +/* True when `dir` is a splayed-table directory: contains a `.d` + * schema file at its top. Side-effect-free aside from a stat. */ +static int dir_is_splayed(const char* dir) { + char path[1024]; + int n = snprintf(path, sizeof(path), "%s/.d", dir); + if (n <= 0 || n >= (int)sizeof(path)) return 0; + return access(path, F_OK) == 0; +} + +/* True when `name` looks like a partition directory entry: + * non-empty, every char is a digit or `.`. Matches the + * collect_part_dirs heuristic in store/part.c. */ +static int name_looks_partition(const char* name) { + if (!name || !name[0]) return 0; + for (const char* c = name; *c; c++) + if (!(*c == '.' || (*c >= '0' && *c <= '9'))) return 0; + return 1; +} + +/* True when `dir` is a parted-table root: has at least one + * subdirectory whose name matches the partition heuristic. */ +static int dir_is_parted_root(const char* dir) { + DIR* d = opendir(dir); + if (!d) return 0; + int found = 0; + struct dirent* ent; + while ((ent = readdir(d)) != NULL) { + if (ent->d_name[0] == '.') continue; + if (strcmp(ent->d_name, "sym") == 0) continue; + if (!name_looks_partition(ent->d_name)) continue; + char child[2048]; + int n = snprintf(child, sizeof(child), "%s/%s", dir, ent->d_name); + if (n <= 0 || n >= (int)sizeof(child)) continue; + struct stat st; + if (stat(child, &st) == 0 && S_ISDIR(st.st_mode)) { found = 1; break; } + } + closedir(d); + return found; +} + +/* Bind `name` as a global pointing to `tbl` and append the (name, tbl) + * pair onto the building dict. Both retain — the env keeps an owned + * ref, the returned dict gets its own refs. */ +static void mount_record(int64_t* names_buf, ray_t** vals_buf, int* count, + int max, const char* name, size_t nlen, ray_t* tbl) { + if (*count >= max) return; + int64_t sym_id = ray_sym_intern(name, nlen); + ray_env_set(sym_id, tbl); + names_buf[*count] = sym_id; + ray_retain(tbl); + vals_buf[*count] = tbl; + (*count)++; +} + +static ray_t* finalize_mount_dict(int64_t* names_buf, ray_t** vals_buf, int count) { + if (count == 0) return ray_dict_new(ray_list_new(0), ray_list_new(0)); + ray_t* keys = ray_vec_new(RAY_SYM, count); + if (!keys || RAY_IS_ERR(keys)) return keys ? keys : ray_error("oom", NULL); + keys->len = count; + int64_t* k = (int64_t*)ray_data(keys); + for (int i = 0; i < count; i++) k[i] = names_buf[i]; + ray_t* vals = ray_list_new(count); + if (!vals || RAY_IS_ERR(vals)) { ray_release(keys); return vals ? vals : ray_error("oom", NULL); } + for (int i = 0; i < count; i++) { + vals = ray_list_append(vals, vals_buf[i]); + ray_release(vals_buf[i]); + } + return ray_dict_new(keys, vals); +} + +/* (.db.splayed.mount "root") — for each immediate subdirectory of + * root that contains a `.d` schema file, load it as a splayed table + * and bind it as a global named after the subdirectory. Returns a + * dict {name → table} of the bindings made. */ +ray_t* ray_db_splayed_mount_fn(ray_t** args, int64_t n) { + if (n != 1) return ray_error("domain", NULL); + char root[1024]; + if (!str_to_cpath(args[0], root, sizeof(root))) return ray_error("type", NULL); + + DIR* d = opendir(root); + if (!d) return ray_error("io", "cannot open directory"); + + int64_t names_buf[256]; + ray_t* vals_buf[256]; + int count = 0; + + struct dirent* ent; + while ((ent = readdir(d)) != NULL) { + if (ent->d_name[0] == '.') continue; + char child[2048]; + int cn = snprintf(child, sizeof(child), "%s/%s", root, ent->d_name); + if (cn <= 0 || cn >= (int)sizeof(child)) continue; + struct stat st; + if (stat(child, &st) != 0 || !S_ISDIR(st.st_mode)) continue; + if (!dir_is_splayed(child)) continue; + ray_t* tbl = ray_splay_load(child, NULL); + if (!tbl || RAY_IS_ERR(tbl)) { + if (tbl) ray_release(tbl); + continue; + } + mount_record(names_buf, vals_buf, &count, 256, + ent->d_name, strlen(ent->d_name), tbl); + ray_release(tbl); /* env_set retained; we no longer need our local ref */ + } + closedir(d); + return finalize_mount_dict(names_buf, vals_buf, count); +} + +/* (.db.parted.mount "root") — discover the table names under a + * partitioned root by inspecting the first partition directory, then + * load each name via ray_read_parted (zero-copy parted view) and + * bind it as a global. Returns a dict {name → table}. */ +ray_t* ray_db_parted_mount_fn(ray_t** args, int64_t n) { + if (n != 1) return ray_error("domain", NULL); + char root[1024]; + if (!str_to_cpath(args[0], root, sizeof(root))) return ray_error("type", NULL); + + if (!dir_is_parted_root(root)) + return ray_error("domain", "not a parted-table root (no partition directories found)"); + + /* Find the first partition directory to enumerate table names from. */ + DIR* d = opendir(root); + if (!d) return ray_error("io", "cannot open directory"); + char first_part[2048] = {0}; + struct dirent* ent; + while ((ent = readdir(d)) != NULL) { + if (ent->d_name[0] == '.') continue; + if (strcmp(ent->d_name, "sym") == 0) continue; + if (!name_looks_partition(ent->d_name)) continue; + int cn = snprintf(first_part, sizeof(first_part), "%s/%s", root, ent->d_name); + if (cn <= 0 || cn >= (int)sizeof(first_part)) { first_part[0] = '\0'; continue; } + struct stat st; + if (stat(first_part, &st) == 0 && S_ISDIR(st.st_mode)) break; + first_part[0] = '\0'; + } + closedir(d); + if (!first_part[0]) + return ray_error("io", "parted root has no readable partition"); + + /* Walk the first partition: every subdirectory is a table name. */ + DIR* dp = opendir(first_part); + if (!dp) return ray_error("io", "cannot scan partition"); + + int64_t names_buf[256]; + ray_t* vals_buf[256]; + int count = 0; + + while ((ent = readdir(dp)) != NULL) { + if (ent->d_name[0] == '.') continue; + char tbl_in_part[3072]; + int cn = snprintf(tbl_in_part, sizeof(tbl_in_part), "%s/%s", first_part, ent->d_name); + if (cn <= 0 || cn >= (int)sizeof(tbl_in_part)) continue; + struct stat st; + if (stat(tbl_in_part, &st) != 0 || !S_ISDIR(st.st_mode)) continue; + ray_t* tbl = ray_read_parted(root, ent->d_name); + if (!tbl || RAY_IS_ERR(tbl)) { + if (tbl) ray_release(tbl); + continue; + } + mount_record(names_buf, vals_buf, &count, 256, + ent->d_name, strlen(ent->d_name), tbl); + ray_release(tbl); + } + closedir(dp); + return finalize_mount_dict(names_buf, vals_buf, count); +} + +/* ══════════════════════════════════════════ + * Filesystem metadata: .os.size / .os.list + * + * Issue #36 asked for size + existence + listing primitives. We + * keep just two — `.os.size` and `.os.list` — because every other + * predicate (exists, is-file, is-dir) is reachable either via + * try-on-error against these or via the existing shell fallback + * (`(.sys.cmd "test -e p")` etc.). Both errors are flagged "io" + * so a user wrapping the call in `try` can distinguish missing / + * wrong-kind from a domain mistake without introspecting the + * message. + * ══════════════════════════════════════════ */ + +/* (.os.size "path") → i64 file size in bytes. Errors with "io" + * when the path doesn't exist or names a directory — `try` it if + * the caller wants those treated as "not a file" rather than a + * hard error. */ +ray_t* ray_os_size_fn(ray_t* x) { + if (!ray_is_atom(x) || x->type != -RAY_STR) + return ray_error("type", ".os.size expects a string path"); + char path[1024]; + if (!str_to_cpath(x, path, sizeof(path))) return ray_error("type", NULL); + + struct stat st; + if (stat(path, &st) != 0) + return ray_error("io", "%s: %s", path, strerror(errno)); + if (S_ISDIR(st.st_mode)) + return ray_error("io", "%s: is a directory", path); + return ray_i64((int64_t)st.st_size); +} + +/* qsort comparator for sorting directory entries by name. Filesystem + * order from readdir is implementation-defined; sorting gives stable + * output for tests and predictable iteration in user code. */ +static int dir_entry_cmp(const void* a, const void* b) { + const char* sa = *(const char* const*)a; + const char* sb = *(const char* const*)b; + return strcmp(sa, sb); +} + +/* (.os.list "path") → sym vec of entries, sorted, with `.` and `..` + * filtered out. Errors with "io" if the path isn't a directory or + * doesn't exist — caller can use that as a file/dir discriminator + * via `try` when they don't want to shell out for the predicate. */ +ray_t* ray_os_list_fn(ray_t* x) { + if (!ray_is_atom(x) || x->type != -RAY_STR) + return ray_error("type", ".os.list expects a string path"); + char path[1024]; + if (!str_to_cpath(x, path, sizeof(path))) return ray_error("type", NULL); + + DIR* d = opendir(path); + if (!d) return ray_error("io", "%s: %s", path, strerror(errno)); + + /* Collect names into a heap-allocated string array; capacity grows + * geometrically so big directories don't quadratic-realloc. */ + char** names = NULL; + int64_t count = 0; + int64_t cap = 0; + struct dirent* ent; + while ((ent = readdir(d)) != NULL) { + if (ent->d_name[0] == '.' && + (ent->d_name[1] == '\0' || (ent->d_name[1] == '.' && ent->d_name[2] == '\0'))) + continue; + if (count >= cap) { + int64_t new_cap = cap == 0 ? 16 : cap * 2; + char** tmp = (char**)realloc(names, (size_t)new_cap * sizeof(char*)); + if (!tmp) { closedir(d); for (int64_t i = 0; i < count; i++) free(names[i]); free(names); return ray_error("oom", NULL); } + names = tmp; + cap = new_cap; + } + size_t nlen = strlen(ent->d_name) + 1; + names[count] = (char*)malloc(nlen); + if (!names[count]) { closedir(d); for (int64_t i = 0; i < count; i++) free(names[i]); free(names); return ray_error("oom", NULL); } + memcpy(names[count], ent->d_name, nlen); + count++; + } + closedir(d); + + qsort(names, (size_t)count, sizeof(char*), dir_entry_cmp); + + ray_t* result = ray_vec_new(RAY_SYM, count); + if (!result || RAY_IS_ERR(result)) { + for (int64_t i = 0; i < count; i++) free(names[i]); + free(names); + return result ? result : ray_error("oom", NULL); + } + result->len = count; + int64_t* out = (int64_t*)ray_data(result); + for (int64_t i = 0; i < count; i++) { + out[i] = ray_sym_intern(names[i], strlen(names[i])); + free(names[i]); + } + free(names); + return result; +} + +/* xorshift64* — ~1ns per 64-bit word, vs rand()'s ~10ns for 1 byte. + * Per-thread state seeded once with the result of rand() to keep the + * (guid n) sequence varying across program runs (rand() is itself + * seeded by the runtime). v4 UUID quality only requires the version + * and variant nibbles to be correct; the remaining 122 bits are + * pseudo-random and xorshift64* is more than sufficient. */ +static __thread uint64_t guid_rng_state = 0; + +static inline uint64_t guid_rng_next(void) { + uint64_t x = guid_rng_state; + if (RAY_UNLIKELY(x == 0)) { + /* Mix rand() into a non-zero seed. rand() returns ≤ 31 bits, so + * combine three calls plus an address-derived constant for + * thread-distinct initialisation. */ + uint64_t a = (uint64_t)rand(); + uint64_t b = (uint64_t)rand(); + uint64_t c = (uint64_t)rand(); + x = (a << 33) ^ (b << 17) ^ c ^ 0x9E3779B97F4A7C15ULL; + if (x == 0) x = 0x9E3779B97F4A7C15ULL; + } + x ^= x >> 12; + x ^= x << 25; + x ^= x >> 27; + guid_rng_state = x; + return x * 0x2545F4914F6CDD1DULL; +} + +/* (guid n) -> generate n random GUIDs as GUID vector. + * v4 UUID format: 122 random bits + 4 version-bits (0100) + 2 variant-bits (10). */ +ray_t* ray_guid_fn(ray_t* n_arg) { + if (!n_arg || !is_numeric(n_arg)) return ray_error("type", NULL); + int64_t n = as_i64(n_arg); + if (n < 0) return ray_error("domain", NULL); + ray_t* result = ray_vec_new(RAY_GUID, n); + if (RAY_IS_ERR(result)) return result; + result->len = n; + uint8_t* data = (uint8_t*)ray_data(result); + /* Two 64-bit RNG calls per UUID give 16 random bytes; then we just + * stamp the version/variant nibbles. */ + for (int64_t i = 0; i < n; i++) { + uint64_t lo = guid_rng_next(); + uint64_t hi = guid_rng_next(); + memcpy(data + i * 16, &lo, 8); + memcpy(data + i * 16 + 8, &hi, 8); + data[i * 16 + 6] = (data[i * 16 + 6] & 0x0F) | 0x40; /* version 4 */ + data[i * 16 + 8] = (data[i * 16 + 8] & 0x3F) | 0x80; /* variant 10 */ + } + return result; +} + +/* ══════════════════════════════════════════ + * Eval, parse, print, system, env builtins + * ══════════════════════════════════════════ */ + +/* (eval expr) -- evaluate a parsed expression */ +ray_t* ray_eval_builtin_fn(ray_t* x) { + return ray_eval(x); +} + +/* (parse str) -- parse a string into an AST */ +ray_t* ray_parse_builtin_fn(ray_t* x) { + if (x->type != -RAY_STR) return ray_error("type", "parse expects a string"); + const char* src = ray_str_ptr(x); + if (!src) return ray_error("domain", NULL); + ray_t* parsed = ray_parse(src); + return parsed ? parsed : ray_error("parse", NULL); +} + +/* (print val) -- print without newline, return the value */ +/* print moved to builtins.c alongside println/show */ + +/* (meta x) -- return metadata about an object as a dict */ +ray_t* ray_meta_fn(ray_t* x) { + if (!x) return ray_error("type", NULL); + + const char* tname = ray_type_name(x->type); + int64_t type_sym = ray_sym_intern("type", 4); + int64_t type_id = ray_sym_intern(tname, strlen(tname)); + + /* Build keys SYM vec + vals LIST. */ + int64_t cap = ray_is_atom(x) ? 1 : 2; + ray_t* keys = ray_sym_vec_new(RAY_SYM_W64, cap); + if (RAY_IS_ERR(keys)) return keys; + ray_t* vals = ray_list_new(cap); + if (RAY_IS_ERR(vals)) { ray_release(keys); return vals; } + + keys = ray_vec_append(keys, &type_sym); + if (RAY_IS_ERR(keys)) { ray_release(vals); return keys; } + ray_t* tv = ray_sym(type_id); + vals = ray_list_append(vals, tv); + ray_release(tv); + if (RAY_IS_ERR(vals)) { ray_release(keys); return vals; } + + if (!ray_is_atom(x)) { + int64_t len_sym = ray_sym_intern("len", 3); + keys = ray_vec_append(keys, &len_sym); + if (RAY_IS_ERR(keys)) { ray_release(vals); return keys; } + int64_t row_count; + if (x->type == RAY_DICT) row_count = ray_dict_len(x); + else if (x->type == RAY_TABLE) row_count = ray_table_ncols(x); + else row_count = x->len; + ray_t* lv = make_i64(row_count); + vals = ray_list_append(vals, lv); + ray_release(lv); + if (RAY_IS_ERR(vals)) { ray_release(keys); return vals; } + } + + return ray_dict_new(keys, vals); +} + +/* (.sys.gc) -- no-op garbage collection trigger, return 0. Variadic + * so the call site can be (.sys.gc) without the dummy-arg ceremony. */ +ray_t* ray_gc_fn(ray_t** args, int64_t n) { (void)args; (void)n; return ray_i64(0); } + +/* (system cmd) -- run shell command, return exit code */ +ray_t* ray_system_fn(ray_t* x) { + if (x->type != -RAY_STR) return ray_error("type", "system expects a string"); + const char* cmd = ray_str_ptr(x); + if (!cmd) return ray_error("domain", NULL); + int rc = system(cmd); + return make_i64(rc); +} + +/* (getenv name) -- get environment variable */ +ray_t* ray_getenv_fn(ray_t* x) { + if (x->type != -RAY_STR) return ray_error("type", "getenv expects a string"); + const char* name = ray_str_ptr(x); + if (!name) return ray_error("domain", NULL); + const char* val = getenv(name); + return val ? ray_str(val, strlen(val)) : ray_str("", 0); +} + +/* (setenv name val) -- set environment variable */ +#if !defined(RAY_OS_WINDOWS) +extern int setenv(const char*, const char*, int); +#endif +ray_t* ray_setenv_fn(ray_t* name, ray_t* val) { + if (name->type != -RAY_STR || val->type != -RAY_STR) + return ray_error("type", "setenv expects two strings"); + const char* n = ray_str_ptr(name); + const char* v = ray_str_ptr(val); + if (!n || !v) return ray_error("domain", NULL); +#if defined(RAY_OS_WINDOWS) + _putenv_s(n, v); +#else + setenv(n, v, 1); +#endif + return val; +} + +/* ══════════════════════════════════════════ + * Quote, return, args, rc, diverse, get, remove, + * timer, env, internals, memstat, sysinfo + * ══════════════════════════════════════════ */ + +/* (quote expr) -- special form, returns argument unevaluated */ +ray_t* ray_quote_fn(ray_t** args, int64_t n) { + if (n < 1) return ray_error("domain", "quote expects 1 argument"); + ray_retain(args[0]); + return args[0]; +} + +/* (return x) -- early return from function (identity in Rayfall) */ +ray_t* ray_return_fn(ray_t* x) { + ray_retain(x); + return x; +} + +/* (args) -- return command-line arguments as a list of strings */ +ray_t* ray_args_fn(ray_t* x) { + (void)x; + /* Return empty list -- CLI args not wired into eval context */ + ray_t* list = ray_list_new(0); + if (!list) return ray_error("oom", NULL); + return list; +} + +/* (rc x) -- return reference count of object */ +ray_t* ray_rc_fn(ray_t* x) { + if (!x || RAY_IS_ERR(x)) return make_i64(0); + return make_i64((int64_t)x->rc); +} + +/* (diverse x) -- check if all elements in a collection are unique */ +ray_t* ray_diverse_fn(ray_t* x) { + if (ray_is_atom(x)) return make_bool(1); + if (!is_collection(x)) return ray_error("type", "diverse expects a collection"); + + int64_t n = ray_len(x); + if (n <= 1) return make_bool(1); + + ray_t* d = ray_distinct_fn(x); + if (RAY_IS_ERR(d)) return d; + int64_t dn = ray_len(d); + ray_release(d); + return make_bool(dn == n ? 1 : 0); +} + +/* (get dict key) -- dictionary/table lookup (alias for at) */ +ray_t* ray_get_fn(ray_t* dict, ray_t* key) { + return ray_at_fn(dict, key); +} + +/* (remove dict key) -- remove key from dict, return new dict */ +ray_t* ray_remove_fn(ray_t* dict, ray_t* key) { + if (!dict || dict->type != RAY_DICT) + return ray_error("type", "remove expects a dict"); + ray_retain(dict); + return ray_dict_remove(dict, key); +} + +/* (timer) -- return high-res timestamp in nanoseconds for benchmarking */ +ray_t* ray_timer_fn(ray_t* x) { + (void)x; + clock_t t = clock(); + int64_t nanos = (int64_t)((double)t / (double)CLOCKS_PER_SEC * 1e9); + return make_i64(nanos); +} + +/* (env) -- return dict of all global environment bindings */ +ray_t* ray_env_fn(ray_t* x) { + (void)x; + int64_t sym_ids[1024]; + ray_t* vals_buf[1024]; + int32_t count = ray_env_list(sym_ids, vals_buf, 1024); + + ray_t* keys = ray_sym_vec_new(RAY_SYM_W64, count); + if (RAY_IS_ERR(keys)) return keys; + ray_t* vals = ray_list_new(count); + if (RAY_IS_ERR(vals)) { ray_release(keys); return vals; } + + for (int32_t i = 0; i < count; i++) { + keys = ray_vec_append(keys, &sym_ids[i]); + if (RAY_IS_ERR(keys)) { ray_release(vals); return keys; } + vals = ray_list_append(vals, vals_buf[i]); + if (RAY_IS_ERR(vals)) { ray_release(keys); return vals; } + } + return ray_dict_new(keys, vals); +} + +/* (.sys.build) -- return dict with internal build information */ +ray_t* ray_internals_fn(ray_t** args, int64_t n) { + (void)args; (void)n; + ray_t* keys = ray_sym_vec_new(RAY_SYM_W64, 2); + if (RAY_IS_ERR(keys)) return keys; + ray_t* vals = ray_list_new(2); + if (RAY_IS_ERR(vals)) { ray_release(keys); return vals; } + + int64_t ver_sym = ray_sym_intern("version", 7); + keys = ray_vec_append(keys, &ver_sym); +#ifdef RAYFORCE_VERSION + ray_t* v1 = ray_str(RAYFORCE_VERSION, strlen(RAYFORCE_VERSION)); +#else + ray_t* v1 = ray_str("unknown", 7); +#endif + vals = ray_list_append(vals, v1); ray_release(v1); + + int64_t date_sym = ray_sym_intern("build-date", 10); + keys = ray_vec_append(keys, &date_sym); +#ifdef RAYFORCE_BUILD_DATE + ray_t* v2 = ray_str(RAYFORCE_BUILD_DATE, strlen(RAYFORCE_BUILD_DATE)); +#else + ray_t* v2 = ray_str("unknown", 7); +#endif + vals = ray_list_append(vals, v2); ray_release(v2); + + return ray_dict_new(keys, vals); +} + +/* (.sys.mem) -- return dict with memory allocator statistics */ +ray_t* ray_memstat_fn(ray_t** args, int64_t n) { + (void)args; (void)n; + ray_mem_stats_t st; + ray_mem_stats(&st); + + ray_t* keys = ray_sym_vec_new(RAY_SYM_W64, 5); + if (RAY_IS_ERR(keys)) return keys; + ray_t* vals = ray_list_new(5); + if (RAY_IS_ERR(vals)) { ray_release(keys); return vals; } + + struct { const char* name; size_t nlen; int64_t v; } rows[] = { + { "alloc-count", 11, (int64_t)st.alloc_count }, + { "bytes-allocated", 15, (int64_t)st.bytes_allocated }, + { "peak-bytes", 10, (int64_t)st.peak_bytes }, + { "slab-hits", 9, (int64_t)st.slab_hits }, + { "sys-current", 11, (int64_t)st.sys_current }, + }; + for (size_t i = 0; i < sizeof(rows)/sizeof(rows[0]); i++) { + int64_t s = ray_sym_intern(rows[i].name, rows[i].nlen); + keys = ray_vec_append(keys, &s); + ray_t* v = make_i64(rows[i].v); + vals = ray_list_append(vals, v); ray_release(v); + } + + return ray_dict_new(keys, vals); +} + +ray_t* ray_sysinfo_fn(ray_t** args, int64_t n) { + (void)args; (void)n; + ray_t* keys = ray_sym_vec_new(RAY_SYM_W64, 3); + if (RAY_IS_ERR(keys)) return keys; + ray_t* vals = ray_list_new(3); + if (RAY_IS_ERR(vals)) { ray_release(keys); return vals; } + +#if !defined(RAY_OS_WINDOWS) + int64_t s1 = ray_sym_intern("cores", 5); + keys = ray_vec_append(keys, &s1); + ray_t* v1 = make_i64(sysconf(_SC_NPROCESSORS_ONLN)); + vals = ray_list_append(vals, v1); ray_release(v1); + + int64_t s2 = ray_sym_intern("page-size", 9); + keys = ray_vec_append(keys, &s2); + ray_t* v2 = make_i64(sysconf(_SC_PAGESIZE)); + vals = ray_list_append(vals, v2); ray_release(v2); + + long pages = sysconf(_SC_PHYS_PAGES); + long psize = sysconf(_SC_PAGESIZE); + int64_t s3 = ray_sym_intern("total-mem", 9); + keys = ray_vec_append(keys, &s3); + ray_t* v3 = make_i64((int64_t)pages * (int64_t)psize); + vals = ray_list_append(vals, v3); ray_release(v3); +#else + int64_t s1 = ray_sym_intern("cores", 5); + keys = ray_vec_append(keys, &s1); + ray_t* v1 = make_i64(1); + vals = ray_list_append(vals, v1); ray_release(v1); +#endif + + return ray_dict_new(keys, vals); +} + +/* ══════════════════════════════════════════ + * IPC builtins + * ══════════════════════════════════════════ */ + +/* (hopen "host:port[:user:password]") → i64 handle */ +ray_t* ray_hopen_fn(ray_t* x) { + if (!ray_is_atom(x) || x->type != -RAY_STR) + return ray_error("type", NULL); + + const char* s = ray_str_ptr(x); + size_t slen = ray_str_len(x); + + /* Split on colons */ + const char* parts[4] = {0}; + size_t part_lens[4] = {0}; + int n_parts = 0; + const char* start = s; + for (size_t i = 0; i <= slen && n_parts < 4; i++) { + if (i == slen || s[i] == ':') { + parts[n_parts] = start; + part_lens[n_parts] = (size_t)(&s[i] - start); + n_parts++; + start = &s[i + 1]; + } + } + if (n_parts < 2) return ray_error("domain", NULL); + + char host[256]; + if (part_lens[0] >= sizeof(host)) return ray_error("domain", NULL); + memcpy(host, parts[0], part_lens[0]); + host[part_lens[0]] = '\0'; + + char port_str[8]; + if (part_lens[1] >= sizeof(port_str)) return ray_error("domain", NULL); + memcpy(port_str, parts[1], part_lens[1]); + port_str[part_lens[1]] = '\0'; + int port = atoi(port_str); + if (port <= 0 || port > 65535) return ray_error("domain", NULL); + + char user[128] = ""; + char password[128] = ""; + if (n_parts >= 4) { + if (part_lens[2] < sizeof(user)) { + memcpy(user, parts[2], part_lens[2]); + user[part_lens[2]] = '\0'; + } + if (part_lens[3] < sizeof(password)) { + memcpy(password, parts[3], part_lens[3]); + password[part_lens[3]] = '\0'; + } + } + + const char* pw_ptr = (n_parts >= 4) ? password : NULL; + const char* us_ptr = (n_parts >= 4) ? user : NULL; + + int64_t h = ray_ipc_connect(host, (uint16_t)port, us_ptr, pw_ptr); + if (h == -2) return ray_error("access", "server requires authentication"); + if (h == -3) return ray_error("access", "authentication failed"); + if (h < 0) return ray_error("io", "connection refused: %s:%d", host, port); + + return make_i64(h); +} + +/* (hclose handle) → null */ +ray_t* ray_hclose_fn(ray_t* x) { + if (!ray_is_atom(x) || (x->type != -RAY_I64 && x->type != -RAY_I32)) + return ray_error("type", NULL); + int64_t h = (x->type == -RAY_I64) ? x->i64 : x->i32; + ray_ipc_close(h); + return RAY_NULL_OBJ; +} + +/* (hsend handle msg) → result */ +ray_t* ray_hsend_fn(ray_t* handle, ray_t* msg) { + if (!ray_is_atom(handle) || (handle->type != -RAY_I64 && handle->type != -RAY_I32)) + return ray_error("type", NULL); + int64_t h = (handle->type == -RAY_I64) ? handle->i64 : handle->i32; + /* Validate message is serializable (reject builtins, etc.) */ + if (ray_serde_size(msg) <= 0) + return ray_error("type", "message not serializable"); + return ray_ipc_send(h, msg); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/tblop.c b/crates/rayforce-sys/vendor/rayforce/src/ops/tblop.c new file mode 100644 index 0000000..688abde --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/tblop.c @@ -0,0 +1,948 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Table builtins — extracted from eval.c */ + +#include "lang/internal.h" +#include "lang/env.h" +#include "ops/ops.h" +#include "ops/internal.h" +#include "ops/hash.h" +#include "ops/idxop.h" +#include "table/sym.h" +#include "mem/heap.h" +#include +#include + +/* ══════════════════════════════════════════ + * pivot_fn_to_agg_op + * ══════════════════════════════════════════ */ + +/* Map a RAY_UNARY agg function pointer to a DAG opcode. + * Returns 0 if the function is not a known aggregation builtin. */ +uint16_t pivot_fn_to_agg_op(ray_t* fn) { + if (fn->type != RAY_UNARY) return 0; + ray_unary_fn f = (ray_unary_fn)(uintptr_t)fn->i64; + if (f == ray_sum_fn) return OP_SUM; + if (f == ray_avg_fn) return OP_AVG; + if (f == ray_min_fn) return OP_MIN; + if (f == ray_max_fn) return OP_MAX; + if (f == ray_count_fn) return OP_COUNT; + if (f == ray_first_fn) return OP_FIRST; + if (f == ray_last_fn) return OP_LAST; + return 0; +} + +/* ══════════════════════════════════════════ + * pivot + * ══════════════════════════════════════════ */ + +/* (pivot table index_col pivot_col value_col agg_fn) — pivot table */ +ray_t* ray_pivot_fn(ray_t** args, int64_t n) { + if (n != 5) return ray_error("arity", "pivot expects 5 arguments: table, index, pivot-col, value-col, agg-fn"); + ray_t* tbl = args[0]; + ray_t* index_arg = args[1]; /* sym atom or list of syms */ + ray_t* pivot_col_name = args[2]; /* sym atom */ + ray_t* value_col_name = args[3]; /* sym atom */ + ray_t* agg_fn = args[4]; /* function */ + + if (tbl->type != RAY_TABLE) + return ray_error("type", "pivot: first argument must be a table"); + if (pivot_col_name->type != -RAY_SYM) + return ray_error("type", "pivot: pivot-col must be a symbol"); + if (value_col_name->type != -RAY_SYM) + return ray_error("type", "pivot: value-col must be a symbol"); + if (agg_fn->type != RAY_UNARY && agg_fn->type != RAY_LAMBDA && + agg_fn->type != RAY_VARY) + return ray_error("type", "pivot: agg-fn must be a function"); + + /* Determine index columns */ + int64_t idx_syms[16]; + int64_t n_idx = 0; + if (index_arg->type == -RAY_SYM) { + idx_syms[0] = index_arg->i64; + n_idx = 1; + } else if (index_arg->type == RAY_LIST || ray_is_vec(index_arg)) { + int64_t len = ray_len(index_arg); + if (len > 16) return ray_error("limit", "pivot: too many index columns"); + for (int64_t i = 0; i < len; i++) { + int alloc = 0; + ray_t* elem = collection_elem(index_arg, i, &alloc); + if (RAY_IS_ERR(elem)) return elem; + if (elem->type != -RAY_SYM) { + if (alloc) ray_release(elem); + return ray_error("type", "pivot: index columns must be symbols"); + } + idx_syms[i] = elem->i64; + if (alloc) ray_release(elem); + } + n_idx = len; + } else { + return ray_error("type", "pivot: index must be a symbol or list of symbols"); + } + + /* Get pivot column, value column */ + ray_t* pcol = ray_table_get_col(tbl, pivot_col_name->i64); + if (!pcol) return ray_error("domain", "pivot: pivot column not found"); + ray_t* vcol = ray_table_get_col(tbl, value_col_name->i64); + if (!vcol) return ray_error("domain", "pivot: value column not found"); + + /* Get index columns */ + ray_t* icols[16]; + for (int64_t i = 0; i < n_idx; i++) { + icols[i] = ray_table_get_col(tbl, idx_syms[i]); + if (!icols[i]) return ray_error("domain", "pivot: index column not found"); + } + + int64_t nrows = ray_table_nrows(tbl); + if (nrows == 0) return ray_table_new(0); + + /* DAG fast path: known agg builtins on hashable columns → OP_PIVOT */ + uint16_t agg_op = pivot_fn_to_agg_op(agg_fn); + bool dag_ok = (agg_op != 0 && pcol->type != RAY_STR && vcol->type != RAY_STR); + for (int64_t i = 0; i < n_idx && dag_ok; i++) + if (icols[i]->type == RAY_STR) dag_ok = false; + + if (dag_ok) { + ray_graph_t* g = ray_graph_new(tbl); + if (!g) return ray_error("oom", NULL); + ray_op_t* idx_ops[16]; + bool ok = true; + for (int64_t i = 0; i < n_idx && ok; i++) { + ray_t* s = ray_sym_str(idx_syms[i]); + idx_ops[i] = s ? ray_scan(g, ray_str_ptr(s)) : NULL; + if (!idx_ops[i]) ok = false; + } + ray_t* ps = ray_sym_str(pivot_col_name->i64); + ray_t* vs = ray_sym_str(value_col_name->i64); + ray_op_t* p_op = (ps && ok) ? ray_scan(g, ray_str_ptr(ps)) : NULL; + ray_op_t* v_op = (vs && p_op) ? ray_scan(g, ray_str_ptr(vs)) : NULL; + if (v_op) { + ray_op_t* root = ray_pivot_op(g, idx_ops, (uint8_t)n_idx, p_op, v_op, agg_op); + if (root) { + ray_t* result = ray_execute(g, root); + ray_graph_free(g); + return result; + } + } + ray_graph_free(g); + } + + /* Generic fallback: use OP_GROUP DAG to group by (index_cols, pivot_col), + * then apply agg_fn per group and unstack. Single O(n) hash pass. */ + + /* Build GROUP BY (idx0, ..., idxN-1, pivot_col) with COUNT agg via DAG */ + ray_graph_t* g = ray_graph_new(tbl); + if (!g) return ray_error("oom", NULL); + + uint8_t n_keys = (uint8_t)(n_idx + 1); + ray_op_t* key_ops[16]; + bool ok = true; + for (int64_t i = 0; i < n_idx && ok; i++) { + ray_t* s = ray_sym_str(idx_syms[i]); + key_ops[i] = s ? ray_scan(g, ray_str_ptr(s)) : NULL; + if (!key_ops[i]) ok = false; + } + { + ray_t* ps = ray_sym_str(pivot_col_name->i64); + key_ops[n_idx] = (ps && ok) ? ray_scan(g, ray_str_ptr(ps)) : NULL; + if (!key_ops[n_idx]) ok = false; + } + /* Value column scan for COUNT (just need a column ref for group) */ + ray_t* vs = ray_sym_str(value_col_name->i64); + ray_op_t* val_scan = (vs && ok) ? ray_scan(g, ray_str_ptr(vs)) : NULL; + if (!val_scan) { ray_graph_free(g); return ray_error("domain", "pivot: failed to build DAG"); } + + uint16_t grp_agg_ops[1] = { OP_COUNT }; + ray_op_t* grp_agg_ins[1] = { val_scan }; + ray_op_t* grp_root = ray_group(g, key_ops, n_keys, grp_agg_ops, grp_agg_ins, 1); + if (!grp_root) { ray_graph_free(g); return ray_error("oom", NULL); } + + ray_t* grouped = ray_execute(g, grp_root); + ray_graph_free(g); + if (!grouped || RAY_IS_ERR(grouped)) return grouped; + + /* `grouped` is a table: (idx0, ..., idxN-1, pivot_col, _count). + * Each row is one (index, pivot) combination. + * Now for each group, gather the value column subset and apply agg_fn. */ + int64_t n_grps = ray_table_nrows(grouped); + + /* Get grouped columns */ + ray_t* g_icols[16]; + for (int64_t i = 0; i < n_idx; i++) + g_icols[i] = ray_table_get_col(grouped, idx_syms[i]); + ray_t* g_pcol = ray_table_get_col(grouped, pivot_col_name->i64); + + /* Collect distinct pivot values and index keys from grouped table */ + ray_retain(g_pcol); + ray_t* dvals = ray_distinct_fn(g_pcol); + ray_release(g_pcol); + if (RAY_IS_ERR(dvals)) { ray_release(grouped); return dvals; } + int64_t n_pv = ray_len(dvals); + + /* Re-scan original table to assign a grouped-row index to each + * input row. Previously this was an O(nrows * n_grps) nested loop + * that hung on any large pivot that took the generic fallback. + * Replaced with an open-addressed hash table keyed by a cheap row + * hash of (idx_cols..., pivot_col), giving O(nrows + n_grps) in the + * common case. Hash collisions re-verify via atom_eq so unhashable + * cells (strings, guids) still match correctly. + * + * Hash helper: produces the same value when called on two rows with + * equal cell values for numeric/sym/temporal columns; for strings + * and guids we under-hash (returning a type-independent constant) + * and rely entirely on atom_eq for equality. */ + #define FB_ROW_HASH(cols, ncols, pv, rid) \ + ({ \ + uint64_t _h = 0; \ + for (int64_t _k = 0; _k < (ncols); _k++) { \ + ray_t* _c = (cols)[_k]; \ + uint64_t _kh; \ + if (ray_vec_is_null(_c, (rid))) \ + _kh = 0x9E3779B97F4A7C15ULL ^ (uint64_t)(rid); \ + else if (_c->type == RAY_F64) \ + _kh = ray_hash_f64(((double*)ray_data(_c))[(rid)]); \ + else if (_c->type == RAY_STR || _c->type == RAY_GUID) \ + _kh = 0xDEADBEEFCAFEBABEULL; \ + else \ + _kh = ray_hash_i64(read_col_i64(ray_data(_c), (rid), \ + _c->type, _c->attrs)); \ + _h = (_k == 0) ? _kh : ray_hash_combine(_h, _kh); \ + } \ + ray_t* _pc = (pv); \ + uint64_t _ph; \ + if (ray_vec_is_null(_pc, (rid))) \ + _ph = 0x165667B19E3779F9ULL ^ (uint64_t)(rid); \ + else if (_pc->type == RAY_F64) \ + _ph = ray_hash_f64(((double*)ray_data(_pc))[(rid)]); \ + else if (_pc->type == RAY_STR || _pc->type == RAY_GUID) \ + _ph = 0xFEEDFACE12345678ULL; \ + else \ + _ph = ray_hash_i64(read_col_i64(ray_data(_pc), (rid), \ + _pc->type, _pc->attrs)); \ + ray_hash_combine(_h, _ph); \ + }) + + uint32_t gid_cap = 256; + while (gid_cap < (uint32_t)n_grps * 2 && gid_cap < (1u << 30)) gid_cap <<= 1; + ray_t* gid_ht_hdr = ray_alloc((size_t)gid_cap * sizeof(uint32_t)); + if (!gid_ht_hdr) { ray_release(dvals); ray_release(grouped); return ray_error("oom", NULL); } + uint32_t* gid_ht = (uint32_t*)ray_data(gid_ht_hdr); + memset(gid_ht, 0xFF, gid_cap * sizeof(uint32_t)); + uint32_t gid_mask = gid_cap - 1; + + /* Insert each grouped row into the HT (grouped rows are already + * distinct by construction — no equality check needed on insert). */ + for (int64_t gi = 0; gi < n_grps; gi++) { + uint64_t h = FB_ROW_HASH(g_icols, n_idx, g_pcol, gi); + uint32_t slot = (uint32_t)(h & gid_mask); + while (gid_ht[slot] != UINT32_MAX) slot = (slot + 1) & gid_mask; + gid_ht[slot] = (uint32_t)gi; + } + + ray_t* gid_vec = ray_vec_new(RAY_I64, nrows); + if (!gid_vec || RAY_IS_ERR(gid_vec)) { + ray_free(gid_ht_hdr); ray_release(dvals); ray_release(grouped); + return ray_error("oom", NULL); + } + gid_vec->len = nrows; + int64_t* gids = (int64_t*)ray_data(gid_vec); + + /* Probe HT for each input row; on collision fall through to atom_eq. */ + for (int64_t r = 0; r < nrows; r++) { + uint64_t h = FB_ROW_HASH(icols, n_idx, pcol, r); + uint32_t slot = (uint32_t)(h & gid_mask); + int64_t found = -1; + while (gid_ht[slot] != UINT32_MAX) { + int64_t gi = gid_ht[slot]; + bool match = true; + for (int64_t ci = 0; ci < n_idx && match; ci++) { + int a1 = 0, a2 = 0; + ray_t* v1 = collection_elem(icols[ci], r, &a1); + ray_t* v2 = collection_elem(g_icols[ci], gi, &a2); + if (!atom_eq(v1, v2)) match = false; + if (a1) ray_release(v1); + if (a2) ray_release(v2); + } + if (match) { + int a1 = 0, a2 = 0; + ray_t* v1 = collection_elem(pcol, r, &a1); + ray_t* v2 = collection_elem(g_pcol, gi, &a2); + if (!atom_eq(v1, v2)) match = false; + if (a1) ray_release(v1); + if (a2) ray_release(v2); + } + if (match) { found = gi; break; } + slot = (slot + 1) & gid_mask; + } + gids[r] = found; + } + ray_free(gid_ht_hdr); + + /* For each group, gather the value column subset and apply agg_fn */ + ray_t* agg_results = ray_alloc(n_grps * sizeof(ray_t*)); + if (!agg_results) { ray_release(gid_vec); ray_release(dvals); ray_release(grouped); return ray_error("oom", NULL); } + agg_results->type = RAY_LIST; + agg_results->len = n_grps; + ray_t** ar = (ray_t**)ray_data(agg_results); + + /* Counting-sort rows by gid: O(nrows + n_grps) vs the previous + * O(nrows * n_grps) double-scan per group. */ + ray_t* off_hdr = ray_alloc((size_t)(n_grps + 1) * sizeof(int64_t)); + if (!off_hdr) { + ray_free(agg_results); ray_release(gid_vec); ray_release(dvals); ray_release(grouped); + return ray_error("oom", NULL); + } + int64_t* offs = (int64_t*)ray_data(off_hdr); + memset(offs, 0, (size_t)(n_grps + 1) * sizeof(int64_t)); + for (int64_t r = 0; r < nrows; r++) { + int64_t g = gids[r]; + if (g >= 0) offs[g + 1]++; + } + for (int64_t gi = 0; gi < n_grps; gi++) offs[gi + 1] += offs[gi]; + + ray_t* sorted_hdr = ray_alloc((size_t)nrows * sizeof(int64_t)); + if (!sorted_hdr) { + ray_free(off_hdr); + ray_free(agg_results); ray_release(gid_vec); ray_release(dvals); ray_release(grouped); + return ray_error("oom", NULL); + } + int64_t* sorted = (int64_t*)ray_data(sorted_hdr); + /* Write-cursor array derived from offs. */ + ray_t* wcur_hdr = ray_alloc((size_t)n_grps * sizeof(int64_t)); + if (!wcur_hdr) { + ray_free(sorted_hdr); ray_free(off_hdr); + ray_free(agg_results); ray_release(gid_vec); ray_release(dvals); ray_release(grouped); + return ray_error("oom", NULL); + } + int64_t* wcur = (int64_t*)ray_data(wcur_hdr); + memcpy(wcur, offs, (size_t)n_grps * sizeof(int64_t)); + for (int64_t r = 0; r < nrows; r++) { + int64_t g = gids[r]; + if (g >= 0) sorted[wcur[g]++] = r; + } + ray_free(wcur_hdr); + + for (int64_t gi = 0; gi < n_grps; gi++) { + int64_t cnt = offs[gi + 1] - offs[gi]; + ray_t* subset = gather_by_idx(vcol, sorted + offs[gi], cnt); + if (RAY_IS_ERR(subset)) { + for (int64_t j = 0; j < gi; j++) ray_release(ar[j]); + ray_free(sorted_hdr); ray_free(off_hdr); + ray_free(agg_results); ray_release(gid_vec); ray_release(dvals); ray_release(grouped); + return subset; + } + ray_t* agg_val = call_fn1(agg_fn, subset); + ray_release(subset); + if (RAY_IS_ERR(agg_val)) { + for (int64_t j = 0; j < gi; j++) ray_release(ar[j]); + ray_free(sorted_hdr); ray_free(off_hdr); + ray_free(agg_results); ray_release(gid_vec); ray_release(dvals); ray_release(grouped); + return agg_val; + } + ar[gi] = agg_val; + } + ray_free(sorted_hdr); + ray_free(off_hdr); + ray_release(gid_vec); + + /* Unstack: collect distinct index keys, build wide result. + * Map each group to (ix_idx, pv_idx). */ + ray_t* ix_list = ray_list_new(16); + ray_t* gmap = ray_alloc(n_grps * 2 * sizeof(int64_t)); + int64_t* gm_ix = (int64_t*)ray_data(gmap); + int64_t* gm_pv = gm_ix + n_grps; + + for (int64_t gi = 0; gi < n_grps; gi++) { + /* Find pivot index */ + int a1 = 0; + ray_t* pv = collection_elem(g_pcol, gi, &a1); + gm_pv[gi] = -1; + for (int64_t p = 0; p < n_pv; p++) { + int a2 = 0; + ray_t* dv = collection_elem(dvals, p, &a2); + bool eq = atom_eq(pv, dv); + if (a2) ray_release(dv); + if (eq) { gm_pv[gi] = p; break; } + } + if (a1) ray_release(pv); + + /* Find or insert index key */ + gm_ix[gi] = -1; + int64_t n_ix = ray_len(ix_list); + ray_t** ix_items = (ray_t**)ray_data(ix_list); + for (int64_t j = 0; j < n_ix; j++) { + ray_t** ex = (ray_t**)ray_data(ix_items[j]); + bool match = true; + for (int64_t ci = 0; ci < n_idx && match; ci++) { + int a2 = 0; + ray_t* v = collection_elem(g_icols[ci], gi, &a2); + if (!atom_eq(ex[ci], v)) match = false; + if (a2) ray_release(v); + } + if (match) { gm_ix[gi] = j; break; } + } + if (gm_ix[gi] < 0) { + gm_ix[gi] = ray_len(ix_list); + ray_t* tup = ray_list_new((int32_t)n_idx); + for (int64_t ci = 0; ci < n_idx; ci++) { + int a2 = 0; + ray_t* v = collection_elem(g_icols[ci], gi, &a2); + if (!a2) ray_retain(v); + tup = ray_list_append(tup, v); + ray_release(v); + } + ix_list = ray_list_append(ix_list, tup); + ray_release(tup); + } + } + + int64_t n_ix = ray_len(ix_list); + + /* Build result table */ + ray_t* result = ray_table_new(n_idx + n_pv); + if (RAY_IS_ERR(result)) goto fb_cleanup; + + /* Index columns */ + { ray_t** ix_items = (ray_t**)ray_data(ix_list); + for (int64_t ci = 0; ci < n_idx; ci++) { + ray_t* col_vals = ray_list_new((int32_t)n_ix); + for (int64_t r = 0; r < n_ix; r++) { + ray_t* v = ((ray_t**)ray_data(ix_items[r]))[ci]; + ray_retain(v); + col_vals = ray_list_append(col_vals, v); + ray_release(v); + } + ray_t* col_vec = list_to_typed_vec(col_vals, icols[ci]->type); + if (RAY_IS_ERR(col_vec)) { ray_release(result); result = col_vec; goto fb_cleanup; } + result = ray_table_add_col(result, idx_syms[ci], col_vec); + ray_release(col_vec); + if (RAY_IS_ERR(result)) goto fb_cleanup; + } + } + + /* Value columns */ + for (int64_t p = 0; p < n_pv; p++) { + ray_t* col_vals = ray_list_new((int32_t)n_ix); + for (int64_t r = 0; r < n_ix; r++) { + ray_t* zero = ray_i64(0); + col_vals = ray_list_append(col_vals, zero); + ray_release(zero); + } + + for (int64_t gi = 0; gi < n_grps; gi++) { + if (gm_pv[gi] != p) continue; + ray_t** cv = (ray_t**)ray_data(col_vals); + ray_release(cv[gm_ix[gi]]); + ray_retain(ar[gi]); + cv[gm_ix[gi]] = ar[gi]; + } + + int8_t agg_type = RAY_I64; + { ray_t** cv = (ray_t**)ray_data(col_vals); + for (int64_t r = 0; r < n_ix; r++) + if (cv[r]->type == -RAY_F64) { agg_type = RAY_F64; break; } + } + ray_t* agg_vec = list_to_typed_vec(col_vals, agg_type); + if (RAY_IS_ERR(agg_vec)) { ray_release(result); result = agg_vec; goto fb_cleanup; } + + /* Column name */ + int a1 = 0; + ray_t* pval = collection_elem(dvals, p, &a1); + int64_t col_sym; + if (pval->type == -RAY_SYM) { + col_sym = pval->i64; + } else if (pval->type == -RAY_I64) { + char buf[64]; int len = snprintf(buf, sizeof(buf), "%ld", (long)pval->i64); + col_sym = ray_sym_intern(buf, (size_t)len); + } else if (pval->type == -RAY_F64) { + double fv = pval->f64; if (fv == 0.0 && signbit(fv)) fv = 0.0; + char buf[64]; int len = snprintf(buf, sizeof(buf), "%g", fv); + col_sym = ray_sym_intern(buf, (size_t)len); + } else if (pval->type == -RAY_BOOL) { + col_sym = ray_sym_intern(pval->b8 ? "true" : "false", pval->b8 ? 4 : 5); + } else { + char buf[64]; int len = snprintf(buf, sizeof(buf), "col%ld", (long)pval->i64); + col_sym = ray_sym_intern(buf, (size_t)len); + } + if (a1) ray_release(pval); + + result = ray_table_add_col(result, col_sym, agg_vec); + ray_release(agg_vec); + if (RAY_IS_ERR(result)) goto fb_cleanup; + } + +fb_cleanup: + ray_free(gmap); + ray_release(ix_list); + for (int64_t gi = 0; gi < n_grps; gi++) ray_release(ar[gi]); + ray_free(agg_results); + ray_release(dvals); + ray_release(grouped); + return result; +} + +/* ══════════════════════════════════════════ + * modify + * ══════════════════════════════════════════ */ + +/* (modify tbl col_name fn) — apply fn to the named column, return new table */ +ray_t* ray_modify_fn(ray_t** args, int64_t n) { + if (n < 3) return ray_error("arity", "modify expects 3 arguments: table, column, function"); + ray_t* tbl = args[0]; + ray_t* col_name = args[1]; + ray_t* fn = args[2]; + + if (tbl->type != RAY_TABLE) + return ray_error("type", "modify: first arg must be a table"); + if (col_name->type != -RAY_SYM) + return ray_error("type", "modify: second arg must be a symbol"); + + int64_t target_sym = col_name->i64; + ray_t* col = ray_table_get_col(tbl, target_sym); + if (!col) return ray_error("domain", "modify: column not found"); + + /* Apply fn to the entire column vector (atomic fns will map element-wise) */ + ray_t* new_col = call_fn1(fn, col); + if (RAY_IS_ERR(new_col)) return new_col; + + /* Build new table: copy all columns, replacing the target */ + int64_t ncols = ray_table_ncols(tbl); + ray_t* result = ray_table_new(ncols); + if (RAY_IS_ERR(result)) { ray_release(new_col); return result; } + + for (int64_t i = 0; i < ncols; i++) { + int64_t cname = ray_table_col_name(tbl, i); + ray_t* cvec = (cname == target_sym) ? new_col : ray_table_get_col_idx(tbl, i); + result = ray_table_add_col(result, cname, cvec); + if (RAY_IS_ERR(result)) { ray_release(new_col); return result; } + } + ray_release(new_col); + return result; +} + +/* ══════════════════════════════════════════ + * alter + * ══════════════════════════════════════════ */ + +/* Cleanup helper for alter set's ray_cow failure paths. Releases the + * caller's retain on the original vec plus the eval'd args, then returns + * an owning RAY_ERROR — substituting "oom" when ray_cow itself returned + * NULL (RAY_IS_ERR(NULL) is false, but the caller still owes us a + * structured error to propagate). + * + * Exposed (non-static) so test code can pin the contract directly: + * NULL cow_result must produce an "oom" error, RAY_ERROR cow_result + * passes through, and `original_var` is released exactly once. */ +ray_t* ray_alter_set_cow_fail(ray_t* original_var, ray_t* cow_result, + ray_t* idx, ray_t* val, ray_t* name_sym) { + ray_release(original_var); + if (idx) ray_release(idx); + if (val) ray_release(val); + if (name_sym) ray_release(name_sym); + return cow_result ? cow_result : ray_error("oom", NULL); +} + +ray_t* ray_alter_fn(ray_t** args, int64_t n) { + if (n < 3) return ray_error("domain", NULL); + /* First arg: evaluate to get the symbol */ + ray_t* name_sym = ray_eval(args[0]); + if (!name_sym || RAY_IS_ERR(name_sym)) return name_sym ? name_sym : ray_error("type", NULL); + if (name_sym->type != -RAY_SYM) { ray_release(name_sym); return ray_error("type", NULL); } + + /* Resolve the variable */ + ray_t* var = ray_env_get(name_sym->i64); + if (!var) { ray_release(name_sym); return ray_error("name", NULL); } + + /* Second arg: operation name (unevaluated, must be a name) */ + ray_t* op = args[1]; + if (!op || op->type != -RAY_SYM) { ray_release(name_sym); return ray_error("type", NULL); } + ray_t* op_name = ray_sym_str(op->i64); + if (!op_name) { ray_release(name_sym); return ray_error("domain", NULL); } + const char* oname = ray_str_ptr(op_name); + size_t olen = ray_str_len(op_name); + + if (olen == 3 && memcmp(oname, "set", 3) == 0) { + /* (alter 'v set idx val) — idx can be scalar or vector of indices */ + ray_release(op_name); + if (n < 4) { ray_release(name_sym); return ray_error("domain", NULL); } + ray_t* idx = ray_eval(args[2]); + if (!idx || RAY_IS_ERR(idx)) { ray_release(name_sym); return idx ? idx : ray_error("type", NULL); } + ray_t* val = ray_eval(args[3]); + if (!val || RAY_IS_ERR(val)) { ray_release(idx); ray_release(name_sym); return val ? val : ray_error("type", NULL); } + if (!ray_is_vec(var) && var->type != RAY_LIST) { ray_release(idx); ray_release(val); ray_release(name_sym); return ray_error("type", NULL); } + + /* For LIST types, build a new list with replaced elements */ + if (var->type == RAY_LIST) { + int64_t vlen = ray_len(var); + ray_t** elems = (ray_t**)ray_data(var); + ray_t* new_list = ray_alloc(vlen * sizeof(ray_t*)); + if (!new_list) { ray_release(idx); ray_release(val); ray_release(name_sym); return ray_error("oom", NULL); } + new_list->type = RAY_LIST; + new_list->len = vlen; + ray_t** out = (ray_t**)ray_data(new_list); + for (int64_t i = 0; i < vlen; i++) { ray_retain(elems[i]); out[i] = elems[i]; } + + if (ray_is_atom(idx) && is_numeric(idx)) { + int64_t i = as_i64(idx); + if (i >= 0 && i < vlen) { ray_release(out[i]); ray_retain(val); out[i] = val; } + } else if (ray_is_vec(idx)) { + int64_t nidx = idx->len; + for (int64_t k = 0; k < nidx; k++) { + int alloc = 0; + ray_t* ie = collection_elem(idx, k, &alloc); + int64_t i = as_i64(ie); + if (alloc) ray_release(ie); + if (i >= 0 && i < vlen) { ray_release(out[i]); ray_retain(val); out[i] = val; } + } + } + ray_release(idx); ray_release(val); + ray_env_set(name_sym->i64, new_list); + ray_release(name_sym); + ray_retain(new_list); + return new_list; + } + + /* `var` came from ray_env_get as a BORROWED ref. ray_cow's + * contract is "I take your owning ref; I give you back a ref" + * — so calling it on a borrow over-decrements the env's + * binding when the rc>1 copy path fires (releasing v drops + * env's count from N to N-1; if some other env binding also + * pointed at v, that binding now sees an extra under-retain + * and risks UAF when later replaced). + * + * Retain up-front so the ref we hand to ray_cow is genuinely + * ours. Track the original pointer so the cow-OOM path + * (alloc_copy fails before ray_cow's release would have run) + * can still release the retain — without that, OOM leaks the + * extra ref. */ + ray_t* original_var = var; + ray_retain(var); + ray_t* cow_result = ray_cow(var); + /* ray_cow returns NULL when ray_alloc_copy returned NULL (heap + * exhaustion past RAY_HEAP_MAX_ORDER) and a RAY_ERROR pointer + * when alloc_copy hit its own len-overflow guard. Both leave + * the input ref untouched, so the cleanup helper releases + * `original_var` and either propagates the error pointer or + * synthesizes an "oom" RAY_ERROR for the NULL case (test code + * pins both branches). */ + if (!cow_result || RAY_IS_ERR(cow_result)) { + return ray_alter_set_cow_fail(original_var, cow_result, idx, val, name_sym); + } + var = cow_result; + + /* Validate idx shape + (for the atom case) bounds BEFORE we + * touch any state. The accelerator-index drop below would + * otherwise outlive a failed write. */ + bool idx_is_atom_num = ray_is_atom(idx) && is_numeric(idx); + bool idx_is_vec = ray_is_vec(idx); + if (!idx_is_atom_num && !idx_is_vec) { + ray_release(var); + ray_release(idx); ray_release(val); ray_release(name_sym); + return ray_error("type", NULL); + } + if (idx_is_atom_num) { + int64_t i_check = as_i64(idx); + if (i_check < 0 || i_check >= var->len) { + ray_release(var); + ray_release(idx); ray_release(val); ray_release(name_sym); + return ray_error("index", NULL); + } + } + + /* alter's set path writes via store_typed_elem, which bypasses + * ray_vec_set's mutation guard. Now that we know the write + * will reach the data array, drop any attached accelerator + * index so it can't outlive the mutation. */ + if (var->attrs & RAY_ATTR_HAS_INDEX) { + ray_t* drop_r = ray_index_drop(&var); + if (RAY_IS_ERR(drop_r)) { + ray_release(var); + ray_release(idx); ray_release(val); ray_release(name_sym); + return drop_r; + } + } + + if (idx_is_atom_num) { + /* Single index — bounds already validated above. */ + int64_t i = as_i64(idx); + ray_release(idx); + store_typed_elem(var, i, val); + } else { + /* Vector of indices — set each to val. + * If val is a vector of same length, set pairwise. + * If val is scalar or shorter, broadcast. */ + int64_t nidx = idx->len; + int val_is_vec = ray_is_vec(val) && val->len == nidx; + for (int64_t k = 0; k < nidx; k++) { + int alloc = 0; + ray_t* ie = collection_elem(idx, k, &alloc); + int64_t i = as_i64(ie); + if (alloc) ray_release(ie); + if (i < 0 || i >= var->len) continue; + if (val_is_vec) { + int va = 0; + ray_t* ve = collection_elem(val, k, &va); + store_typed_elem(var, i, ve); + if (va) ray_release(ve); + } else { + store_typed_elem(var, i, val); + } + } + ray_release(idx); + } + ray_release(val); + ray_env_set(name_sym->i64, var); + ray_release(name_sym); + /* The retain-first at the top of the set path gave us an owning + * ref to var. ray_env_set already retained for the env binding; + * transferring our existing ref to the caller via return is + * correct. No additional ray_retain here. */ + return var; + } + if (olen == 6 && memcmp(oname, "concat", 6) == 0) { + /* (alter 'v concat val) */ + ray_release(op_name); + if (n < 3) { ray_release(name_sym); return ray_error("domain", NULL); } + ray_t* val = ray_eval(args[2]); + if (!val || RAY_IS_ERR(val)) { ray_release(name_sym); return val ? val : ray_error("type", NULL); } + ray_t* new_vec = ray_concat_fn(var, val); + ray_release(val); + if (RAY_IS_ERR(new_vec)) { ray_release(name_sym); return new_vec; } + ray_env_set(name_sym->i64, new_vec); + ray_release(name_sym); + ray_retain(new_vec); + return new_vec; + } + if (olen == 6 && memcmp(oname, "remove", 6) == 0) { + /* (alter 'v remove idx) — remove element(s) at index/indices */ + ray_release(op_name); + if (n < 3) { ray_release(name_sym); return ray_error("domain", NULL); } + ray_t* idx = ray_eval(args[2]); + if (!idx || RAY_IS_ERR(idx)) { ray_release(name_sym); return idx ? idx : ray_error("type", NULL); } + + if (!var || var->type != RAY_LIST) { + ray_release(idx); ray_release(name_sym); + return ray_error("type", NULL); + } + + int64_t vlen = ray_len(var); + ray_t** elems = (ray_t**)ray_data(var); + + /* Build a set of indices to remove */ + int64_t remove_idx[256]; + int64_t nremove = 0; + if (ray_is_atom(idx) && is_numeric(idx)) { + remove_idx[0] = as_i64(idx); + nremove = 1; + } else if (ray_is_vec(idx)) { + nremove = idx->len; + if (nremove > 256) { ray_release(idx); ray_release(name_sym); return ray_error("limit", NULL); } + for (int64_t i = 0; i < nremove; i++) { + int alloc = 0; + ray_t* e = collection_elem(idx, i, &alloc); + remove_idx[i] = as_i64(e); + if (alloc) ray_release(e); + } + } else { + ray_release(idx); ray_release(name_sym); + return ray_error("type", NULL); + } + ray_release(idx); + + /* Build new list without the removed indices */ + int64_t new_len = vlen; + for (int64_t i = 0; i < nremove; i++) + if (remove_idx[i] >= 0 && remove_idx[i] < vlen) new_len--; + + ray_t* new_list = ray_alloc(new_len * sizeof(ray_t*)); + if (!new_list) { ray_release(name_sym); return ray_error("oom", NULL); } + new_list->type = RAY_LIST; + new_list->len = new_len; + ray_t** out = (ray_t**)ray_data(new_list); + int64_t j = 0; + for (int64_t i = 0; i < vlen; i++) { + int skip = 0; + for (int64_t k = 0; k < nremove; k++) + if (remove_idx[k] == i) { skip = 1; break; } + if (!skip) { + ray_retain(elems[i]); + out[j++] = elems[i]; + } + } + new_list->len = j; + ray_env_set(name_sym->i64, new_list); + ray_release(name_sym); + ray_retain(new_list); + return new_list; + } + ray_release(op_name); + ray_release(name_sym); + return ray_error("domain", NULL); +} + +/* ══════════════════════════════════════════ + * del + * ══════════════════════════════════════════ */ + +/* (del name) — delete variable from environment (special form, unevaluated arg) */ +ray_t* ray_del_fn(ray_t** args, int64_t n) { + if (n < 1) return ray_error("arity", "del expects 1 argument"); + ray_t* name = args[0]; + if (name->type != -RAY_SYM) + return ray_error("type", "del expects a symbol"); + /* Propagate ray_env_set's failure: silently ignoring the return + * value would let `(del .sys.gc)` appear to succeed while leaving + * the builtin intact — a confusing lie. Emit a precise message + * per error code rather than blaming every failure on the + * reserved-namespace guard (OOM on dotted-path upsert, for + * example, is not a reserve error). */ + ray_err_t err = ray_env_set(name->i64, NULL); + if (err == RAY_OK) return ray_i64(0); + const char* nm = ray_str_ptr(ray_sym_str(name->i64)); + if (err == RAY_ERR_RESERVED) + return ray_error("reserve", + "cannot delete reserved binding '%s'", nm); + return ray_error(ray_err_code_str(err), + "del '%s' failed", nm); +} + +/* ══════════════════════════════════════════ + * row + * ══════════════════════════════════════════ */ + +/* (row table idx) — extract a single row from a table as a dict */ +ray_t* ray_row_fn(ray_t* tbl, ray_t* idx) { + if (tbl->type != RAY_TABLE) return ray_error("type", "row expects a table"); + if (!is_numeric(idx)) return ray_error("type", "row index must be integer"); + /* Delegate to at — it already handles table integer indexing */ + return ray_at_fn(tbl, idx); +} + +/* ══════════════════════════════════════════ + * union-all + * ══════════════════════════════════════════ */ + +/* (union-all t1 t2) — concatenate two tables row-wise (same schema) */ +ray_t* ray_union_all_fn(ray_t* t1, ray_t* t2) { + if (t1->type != RAY_TABLE) + return ray_error("type", "union-all: first arg must be a table"); + if (t2->type != RAY_TABLE) + return ray_error("type", "union-all: second arg must be a table"); + + int64_t ncols = ray_table_ncols(t1); + if (ncols != ray_table_ncols(t2)) + return ray_error("type", "union-all: tables must have same number of columns"); + + /* Validate matching column names */ + for (int64_t c = 0; c < ncols; c++) { + if (ray_table_col_name(t1, c) != ray_table_col_name(t2, c)) + return ray_error("type", "union-all: column names must match"); + } + + ray_t* result = ray_table_new(ncols); + if (!result || RAY_IS_ERR(result)) return result; + + for (int64_t c = 0; c < ncols; c++) { + int64_t name_id = ray_table_col_name(t1, c); + ray_t* col1 = ray_table_get_col_idx(t1, c); + ray_t* col2 = ray_table_get_col_idx(t2, c); + + if (!col1 || !col2) { + ray_release(result); + return ray_error("type", "union-all: missing column"); + } + + ray_t* combined = ray_vec_concat(col1, col2); + if (!combined || RAY_IS_ERR(combined)) { + ray_release(result); + return combined ? combined : ray_error("oom", NULL); + } + + result = ray_table_add_col(result, name_id, combined); + ray_release(combined); + if (!result || RAY_IS_ERR(result)) return result; + } + + return result; +} + +/* ══════════════════════════════════════════ + * table-distinct + * ══════════════════════════════════════════ */ + +/* (table-distinct t) — remove duplicate rows via DAG group-by */ +ray_t* ray_table_distinct_fn(ray_t* tbl) { + if (tbl->type != RAY_TABLE) + return ray_error("type", "table-distinct expects a table"); + + int64_t ncols = ray_table_ncols(tbl); + if (ncols == 0) { ray_retain(tbl); return tbl; } + + ray_graph_t* g = ray_graph_new(tbl); + if (!g) return ray_error("oom", NULL); + + ray_op_t* keys[256]; + if (ncols > 256) { ray_graph_free(g); return ray_error("range", "too many columns"); } + + for (int64_t c = 0; c < ncols; c++) { + int64_t name_id = ray_table_col_name(tbl, c); + ray_t* name_str = ray_sym_str(name_id); + if (!name_str) { ray_graph_free(g); return ray_error("type", "bad column name"); } + keys[c] = ray_scan(g, ray_str_ptr(name_str)); + if (!keys[c]) { ray_graph_free(g); return ray_error("oom", NULL); } + } + + ray_op_t* root = ray_distinct(g, keys, (uint8_t)ncols); + if (!root) { ray_graph_free(g); return ray_error("oom", NULL); } + + ray_t* result = ray_execute(g, root); + ray_graph_free(g); + return result; +} + +/* ══════════════════════════════════════════ + * unify + * ══════════════════════════════════════════ */ + +/* (unify a b) — return list of two vectors promoted to a common type */ +ray_t* ray_unify_fn(ray_t* a, ray_t* b) { + /* Build a 2-element list containing both values */ + ray_t* result = ray_list_new(2); + if (RAY_IS_ERR(result)) return result; + + if (a->type == b->type || ray_is_atom(a) || ray_is_atom(b)) { + /* Same type or atoms: return as-is */ + ray_retain(a); ray_retain(b); + result = ray_list_append(result, a); ray_release(a); + result = ray_list_append(result, b); ray_release(b); + return result; + } + + /* Different vector types: attempt numeric promotion */ + /* For now: wrap both without conversion */ + ray_retain(a); ray_retain(b); + result = ray_list_append(result, a); ray_release(a); + result = ray_list_append(result, b); ray_release(b); + return result; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/temporal.c b/crates/rayforce-sys/vendor/rayforce/src/ops/temporal.c new file mode 100644 index 0000000..9f6065a --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/temporal.c @@ -0,0 +1,665 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ops/internal.h" +#include "lang/internal.h" +#include "ops/temporal.h" +#include + +/* ============================================================================ + * ray_temporal_extract — standalone extract, usable outside the DAG. + * + * Mirrors exec_extract's scalar decomposition kernel but takes a ray_t* + * input directly. Vector input → RAY_I64 vector; atom input → RAY_I64 + * atom. Returned ref is caller-owned. Called from the env dotted-path + * resolver so `date.dd` / `ts.hh` etc. work at runtime without building + * a DAG. + * ============================================================================ */ + +#define RTE_USEC_PER_SEC 1000000LL +#define RTE_USEC_PER_MIN (60LL * RTE_USEC_PER_SEC) +#define RTE_USEC_PER_HOUR (3600LL * RTE_USEC_PER_SEC) +#define RTE_USEC_PER_DAY (86400LL * RTE_USEC_PER_SEC) + +/* Decompose a single 'microseconds since 2000-01-01' value into a field. */ +static int64_t rte_extract_one(int64_t us, int field) { + if (field == RAY_EXTRACT_EPOCH) return us; + if (field == RAY_EXTRACT_HOUR) { + int64_t day_us = us % RTE_USEC_PER_DAY; + if (day_us < 0) day_us += RTE_USEC_PER_DAY; + return day_us / RTE_USEC_PER_HOUR; + } + if (field == RAY_EXTRACT_MINUTE) { + int64_t day_us = us % RTE_USEC_PER_DAY; + if (day_us < 0) day_us += RTE_USEC_PER_DAY; + return (day_us % RTE_USEC_PER_HOUR) / RTE_USEC_PER_MIN; + } + if (field == RAY_EXTRACT_SECOND) { + int64_t day_us = us % RTE_USEC_PER_DAY; + if (day_us < 0) day_us += RTE_USEC_PER_DAY; + return (day_us % RTE_USEC_PER_MIN) / RTE_USEC_PER_SEC; + } + + /* Calendar fields: Hinnant civil_from_days. */ + int64_t days_since_2000 = us / RTE_USEC_PER_DAY; + if (us < 0 && us % RTE_USEC_PER_DAY != 0) days_since_2000--; + int64_t z = days_since_2000 + 10957 + 719468; + int64_t era = (z >= 0 ? z : z - 146096) / 146097; + uint64_t doe = (uint64_t)(z - era * 146097); + uint64_t yoe = (doe - doe/1460 + doe/36524 - doe/146096) / 365; + int64_t y = (int64_t)yoe + era * 400; + uint64_t doy_mar = doe - (365*yoe + yoe/4 - yoe/100); + uint64_t mp = (5*doy_mar + 2) / 153; + uint64_t d = doy_mar - (153*mp + 2) / 5 + 1; + uint64_t mo = mp < 10 ? mp + 3 : mp - 9; + y += (mo <= 2); + + if (field == RAY_EXTRACT_YEAR) return y; + if (field == RAY_EXTRACT_MONTH) return (int64_t)mo; + if (field == RAY_EXTRACT_DAY) return (int64_t)d; + if (field == RAY_EXTRACT_DOW) { + return ((days_since_2000 % 7) + 7 + 5) % 7 + 1; + } + if (field == RAY_EXTRACT_DOY) { + static const int dbm[13] = { + 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 + }; + if (mo < 1 || mo > 12) return 0; + int leap = (y % 4 == 0 && (y % 100 != 0 || y % 400 == 0)); + int64_t doy_jan = dbm[mo] + (int64_t)d; + if (mo > 2 && leap) doy_jan++; + return doy_jan; + } + return 0; +} + +/* Convert a raw slot value from the respective temporal type into + * microseconds-since-2000 — the internal unit used by rte_extract_one's + * Hinnant math. DATE is stored as int32 days, TIME as int32 ms, + * TIMESTAMP as int64 *nanoseconds* (matching io/csv.c's parse and the + * rest of the runtime). The previous version of this helper treated + * TIMESTAMP as µs, which made (yyyy ts) decode to absurd years (26204 + * on 2024-03-15) — a 1000× unit mismatch. */ +static inline int64_t rte_to_us(int8_t type, int64_t raw) { + if (type == RAY_DATE || type == -RAY_DATE) return raw * RTE_USEC_PER_DAY; + if (type == RAY_TIME || type == -RAY_TIME) return raw * 1000LL; + /* RAY_TIMESTAMP / -RAY_TIMESTAMP: ns → µs (floor toward -inf). */ + return raw >= 0 ? raw / 1000LL + : -(((-raw) + 999LL) / 1000LL); +} + +/* Inverse of rte_to_us for TIMESTAMP output paths (truncate). */ +static inline int64_t rte_us_to_ts_raw(int64_t us) { return us * 1000LL; } + +ray_t* ray_temporal_extract(ray_t* input, int field) { + if (!input || RAY_IS_ERR(input)) return input; + + /* Atom input — extract single value as RAY_I64 atom. A null input + * atom produces a typed null output (0Nl): a garbage year/month/etc. + * extracted from the null-sentinel bit pattern would be deeply + * confusing when mixed into downstream arithmetic. */ + if (input->type < 0) { + int8_t t = input->type; + if (t != -RAY_DATE && t != -RAY_TIME && t != -RAY_TIMESTAMP) + return ray_error("type", NULL); + if (RAY_ATOM_IS_NULL(input)) return ray_typed_null(-RAY_I64); + int64_t raw = input->i64; + int64_t us = rte_to_us(t, raw); + return ray_i64(rte_extract_one(us, field)); + } + + /* Vector input. */ + int8_t t = input->type; + if (t != RAY_DATE && t != RAY_TIME && t != RAY_TIMESTAMP) + return ray_error("type", NULL); + + int64_t len = input->len; + ray_t* result = ray_vec_new(RAY_I64, len); + if (!result || RAY_IS_ERR(result)) return result; + result->len = len; + int64_t* out = (int64_t*)ray_data(result); + + /* Null-aware decomposition: any row flagged null in the source + * becomes 0 in the data buffer and carries the null bit on the + * output, so downstream ops treat it as 0Nl rather than the bogus + * year/month/etc that would fall out of decomposing the null + * sentinel's bit pattern. */ + /* Slice-aware HAS_NULLS check: slices don't carry HAS_NULLS on + * themselves, so inspect the parent when input is a slice. */ + bool src_has_nulls = + (input->attrs & RAY_ATTR_HAS_NULLS) || + ((input->attrs & RAY_ATTR_SLICE) && input->slice_parent && + (input->slice_parent->attrs & RAY_ATTR_HAS_NULLS)); + const char* base = (const char*)ray_data(input); + for (int64_t i = 0; i < len; i++) { + if (src_has_nulls && ray_vec_is_null(input, i)) { + out[i] = 0; + ray_vec_set_null(result, i, true); + continue; + } + int64_t raw; + if (t == RAY_DATE) raw = (int64_t)((const int32_t*)base)[i]; + else if (t == RAY_TIME) raw = (int64_t)((const int32_t*)base)[i]; + else raw = ((const int64_t*)base)[i]; + out[i] = rte_extract_one(rte_to_us(t, raw), field); + } + return result; +} + +/* Sym name → RAY_EXTRACT_* field code. Resolves by reading the interned + * name string and matching against the documented segment names. Used + * by the env dotted-path resolver so `date_col.dd` works without a DAG. */ +int ray_temporal_field_from_sym(int64_t sym_id) { + ray_t* s = ray_sym_str(sym_id); + if (!s) return -1; + const char* p = ray_str_ptr(s); + size_t n = ray_str_len(s); + if (!p) return -1; + + if (n == 4 && memcmp(p, "yyyy", 4) == 0) return RAY_EXTRACT_YEAR; + if (n == 2 && memcmp(p, "mm", 2) == 0) return RAY_EXTRACT_MONTH; + if (n == 2 && memcmp(p, "dd", 2) == 0) return RAY_EXTRACT_DAY; + if (n == 2 && memcmp(p, "hh", 2) == 0) return RAY_EXTRACT_HOUR; + if (n == 6 && memcmp(p, "minute", 6) == 0) return RAY_EXTRACT_MINUTE; + if (n == 2 && memcmp(p, "ss", 2) == 0) return RAY_EXTRACT_SECOND; + if (n == 3 && memcmp(p, "dow", 3) == 0) return RAY_EXTRACT_DOW; + if (n == 3 && memcmp(p, "doy", 3) == 0) return RAY_EXTRACT_DOY; + + return -1; +} + +/* Eval-level unary builtins. Each one is a thin wrapper around + * ray_temporal_extract with the field bound, so they participate in the + * regular function-call machinery: `(ss ts)`, `(yyyy d)`, etc. behave + * like any other unary builtin and `ts.ss`, `d.yyyy` resolve through + * env_resolve's standard container-then-callable dispatch. */ +ray_t* ray_extract_ss_fn(ray_t* x) { return ray_temporal_extract(x, RAY_EXTRACT_SECOND); } +ray_t* ray_extract_hh_fn(ray_t* x) { return ray_temporal_extract(x, RAY_EXTRACT_HOUR); } +ray_t* ray_extract_minute_fn(ray_t* x) { return ray_temporal_extract(x, RAY_EXTRACT_MINUTE); } +ray_t* ray_extract_yyyy_fn(ray_t* x) { return ray_temporal_extract(x, RAY_EXTRACT_YEAR); } +ray_t* ray_extract_mm_fn(ray_t* x) { return ray_temporal_extract(x, RAY_EXTRACT_MONTH); } +ray_t* ray_extract_dd_fn(ray_t* x) { return ray_temporal_extract(x, RAY_EXTRACT_DAY); } +ray_t* ray_extract_dow_fn(ray_t* x) { return ray_temporal_extract(x, RAY_EXTRACT_DOW); } +ray_t* ray_extract_doy_fn(ray_t* x) { return ray_temporal_extract(x, RAY_EXTRACT_DOY); } + +int ray_temporal_trunc_from_sym(int64_t sym_id) { + ray_t* s = ray_sym_str(sym_id); + if (!s) return -1; + const char* p = ray_str_ptr(s); + size_t n = ray_str_len(s); + if (!p) return -1; + if (n == 4 && memcmp(p, "date", 4) == 0) return RAY_EXTRACT_DAY; + if (n == 4 && memcmp(p, "time", 4) == 0) return RAY_EXTRACT_SECOND; + return -1; +} + +ray_t* ray_temporal_truncate(ray_t* input, int kind) { + if (!input || RAY_IS_ERR(input)) return input; + + /* Atom input — produce a RAY_TIMESTAMP atom. Null input → 0Np. */ + if (input->type < 0) { + int8_t t = input->type; + if (t != -RAY_DATE && t != -RAY_TIME && t != -RAY_TIMESTAMP) + return ray_error("type", NULL); + if (RAY_ATOM_IS_NULL(input)) return ray_typed_null(-RAY_TIMESTAMP); + int64_t us = rte_to_us(t, input->i64); + int64_t bucket = (kind == RAY_EXTRACT_DAY) + ? RTE_USEC_PER_DAY + : RTE_USEC_PER_SEC; + int64_t r = us % bucket; + int64_t out_us = us - r - (r < 0 ? bucket : 0); + return ray_timestamp(rte_us_to_ts_raw(out_us)); + } + + /* Vector input. */ + int8_t t = input->type; + if (t != RAY_DATE && t != RAY_TIME && t != RAY_TIMESTAMP) + return ray_error("type", NULL); + + int64_t len = input->len; + ray_t* result = ray_vec_new(RAY_TIMESTAMP, len); + if (!result || RAY_IS_ERR(result)) return result; + result->len = len; + int64_t* out = (int64_t*)ray_data(result); + + /* Slice-aware HAS_NULLS check: slices don't carry HAS_NULLS on + * themselves, so inspect the parent when input is a slice. */ + bool src_has_nulls = + (input->attrs & RAY_ATTR_HAS_NULLS) || + ((input->attrs & RAY_ATTR_SLICE) && input->slice_parent && + (input->slice_parent->attrs & RAY_ATTR_HAS_NULLS)); + const char* base = (const char*)ray_data(input); + int64_t bucket = (kind == RAY_EXTRACT_DAY) + ? RTE_USEC_PER_DAY + : RTE_USEC_PER_SEC; + + for (int64_t i = 0; i < len; i++) { + if (src_has_nulls && ray_vec_is_null(input, i)) { + out[i] = 0; + ray_vec_set_null(result, i, true); + continue; + } + int64_t raw; + if (t == RAY_DATE) raw = (int64_t)((const int32_t*)base)[i]; + else if (t == RAY_TIME) raw = (int64_t)((const int32_t*)base)[i]; + else raw = ((const int64_t*)base)[i]; + int64_t us = rte_to_us(t, raw); + int64_t r = us % bucket; + out[i] = rte_us_to_ts_raw(us - r - (r < 0 ? bucket : 0)); + } + return result; +} + +/* ============================================================================ + * EXTRACT — date/time component extraction from temporal columns + * + * Input: RAY_TIMESTAMP (i64 us since 2000-01-01), RAY_DATE (i32 days since + * 2000-01-01), or RAY_TIME (i32 ms since midnight). + * Output: i64 vector of extracted field values. + * + * Uses Howard Hinnant's civil_from_days algorithm (public domain) for + * Gregorian calendar decomposition. + * ============================================================================ */ + +ray_t* exec_extract(ray_graph_t* g, ray_op_t* op) { + ray_t* input = exec_node(g, op->inputs[0]); + if (!input || RAY_IS_ERR(input)) return input; + + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) { ray_release(input); return ray_error("nyi", NULL); } + + int64_t field = ext->sym; + int64_t len = input->len; + int8_t in_type = input->type; + + ray_t* result = ray_vec_new(RAY_I64, len); + if (!result || RAY_IS_ERR(result)) { ray_release(input); return result; } + result->len = len; + + int64_t* out = (int64_t*)ray_data(result); + + #undef USEC_PER_SEC + #define USEC_PER_SEC 1000000LL + #define USEC_PER_MIN (60LL * USEC_PER_SEC) + #define USEC_PER_HOUR (3600LL * USEC_PER_SEC) + #define USEC_PER_DAY (86400LL * USEC_PER_SEC) + + /* Slice-aware HAS_NULLS check: slices don't carry HAS_NULLS on + * themselves, so inspect the parent when input is a slice. */ + bool src_has_nulls = + (input->attrs & RAY_ATTR_HAS_NULLS) || + ((input->attrs & RAY_ATTR_SLICE) && input->slice_parent && + (input->slice_parent->attrs & RAY_ATTR_HAS_NULLS)); + + ray_morsel_t m; + ray_morsel_init(&m, input); + int64_t off = 0; + + while (ray_morsel_next(&m)) { + int64_t n = m.morsel_len; + + for (int64_t i = 0; i < n; i++) { + /* Propagate nulls: decomposing a null-sentinel's raw bytes + * would emit a bogus year / month / hour, so we zero the + * output slot and set its null bit instead. */ + if (src_has_nulls && ray_vec_is_null(input, off + i)) { + out[off + i] = 0; + ray_vec_set_null(result, off + i, true); + continue; + } + int64_t us; + if (in_type == RAY_DATE) { + /* int32 days since 2000-01-01 -> microseconds */ + int32_t d = ((const int32_t*)m.morsel_ptr)[i]; + us = (int64_t)d * USEC_PER_DAY; + } else if (in_type == RAY_TIME) { + /* int32 milliseconds since midnight -> microseconds */ + int32_t ms = ((const int32_t*)m.morsel_ptr)[i]; + us = (int64_t)ms * 1000LL; + } else { + /* RAY_TIMESTAMP: int64 *nanoseconds* since 2000 (matches + * io/csv parse and the rest of the runtime). Convert to + * µs for the calendar/time decomposition below. RAY_I64 + * inputs flow through the same path; anything higher- + * resolution than µs loses its low three digits, which + * doesn't matter for calendar or clock field extraction. */ + int64_t ns = ((const int64_t*)m.morsel_ptr)[i]; + us = ns >= 0 ? ns / 1000LL + : -(((-ns) + 999LL) / 1000LL); + } + + if (field == RAY_EXTRACT_EPOCH) { + out[off + i] = us; + } else if (field == RAY_EXTRACT_HOUR) { + int64_t day_us = us % USEC_PER_DAY; + if (day_us < 0) day_us += USEC_PER_DAY; + out[off + i] = day_us / USEC_PER_HOUR; + } else if (field == RAY_EXTRACT_MINUTE) { + int64_t day_us = us % USEC_PER_DAY; + if (day_us < 0) day_us += USEC_PER_DAY; + out[off + i] = (day_us % USEC_PER_HOUR) / USEC_PER_MIN; + } else if (field == RAY_EXTRACT_SECOND) { + int64_t day_us = us % USEC_PER_DAY; + if (day_us < 0) day_us += USEC_PER_DAY; + out[off + i] = (day_us % USEC_PER_MIN) / USEC_PER_SEC; + } else { + /* Calendar fields: YEAR, MONTH, DAY, DOW, DOY */ + /* Floor-divide microseconds to get day count */ + int64_t days_since_2000 = us / USEC_PER_DAY; + if (us < 0 && us % USEC_PER_DAY != 0) days_since_2000--; + + /* Hinnant civil_from_days: shift to 0000-03-01 era-based epoch */ + int64_t z = days_since_2000 + 10957 + 719468; + int64_t era = (z >= 0 ? z : z - 146096) / 146097; + uint64_t doe = (uint64_t)(z - era * 146097); + uint64_t yoe = (doe - doe/1460 + doe/36524 - doe/146096) / 365; + int64_t y = (int64_t)yoe + era * 400; + uint64_t doy_mar = doe - (365*yoe + yoe/4 - yoe/100); + uint64_t mp = (5*doy_mar + 2) / 153; + uint64_t d = doy_mar - (153*mp + 2) / 5 + 1; + uint64_t mo = mp < 10 ? mp + 3 : mp - 9; + y += (mo <= 2); + + if (field == RAY_EXTRACT_YEAR) { + out[off + i] = y; + } else if (field == RAY_EXTRACT_MONTH) { + out[off + i] = (int64_t)mo; + } else if (field == RAY_EXTRACT_DAY) { + out[off + i] = (int64_t)d; + } else if (field == RAY_EXTRACT_DOW) { + /* ISO day of week: Mon=1 .. Sun=7 + * 2000-01-01 was Saturday (ISO 6). + * Formula: ((days%7)+7+5)%7 + 1 */ + out[off + i] = ((days_since_2000 % 7) + 7 + 5) % 7 + 1; + } else if (field == RAY_EXTRACT_DOY) { + /* Day of year [1..366], January-based */ + static const int dbm[13] = { + 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 + }; + if (mo < 1 || mo > 12) { out[off + i] = 0; continue; } + int leap = (y % 4 == 0 && (y % 100 != 0 || y % 400 == 0)); + int64_t doy_jan = dbm[mo] + (int64_t)d; + if (mo > 2 && leap) doy_jan++; + out[off + i] = doy_jan; + } else { + out[off + i] = 0; + } + } + } + off += n; + } + + #undef USEC_PER_SEC + #undef USEC_PER_MIN + #undef USEC_PER_HOUR + #undef USEC_PER_DAY + + ray_release(input); + return result; +} + +/* ============================================================================ + * DATE_TRUNC — truncate temporal value to specified precision + * + * Input: RAY_TIMESTAMP (i64 us since 2000-01-01), RAY_DATE (i32 days since + * 2000-01-01), or RAY_TIME (i32 ms since midnight). + * Output: RAY_TIMESTAMP (i64 us) — always returns microseconds since 2000-01-01. + * Sub-day: modular arithmetic. Month/year: calendar decompose + recompose. + * ============================================================================ */ + +/* Convert (year, month, day) to days since 2000-01-01 using the inverse of + * Hinnant's civil_from_days. */ +static int64_t days_from_civil(int64_t y, int64_t m, int64_t d) { + y -= (m <= 2); + int64_t era = (y >= 0 ? y : y - 399) / 400; + uint64_t yoe = (uint64_t)(y - era * 400); + uint64_t doy = (153 * (m > 2 ? (uint64_t)m - 3 : (uint64_t)m + 9) + 2) / 5 + (uint64_t)d - 1; + uint64_t doe = yoe * 365 + yoe / 4 - yoe / 100 + doy; + return era * 146097 + (int64_t)doe - 719468 - 10957; +} + +ray_t* exec_date_trunc(ray_graph_t* g, ray_op_t* op) { + ray_t* input = exec_node(g, op->inputs[0]); + if (!input || RAY_IS_ERR(input)) return input; + + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) { ray_release(input); return ray_error("nyi", NULL); } + + int64_t field = ext->sym; + int64_t len = input->len; + int8_t in_type = input->type; + + ray_t* result = ray_vec_new(RAY_TIMESTAMP, len); + if (!result || RAY_IS_ERR(result)) { ray_release(input); return result; } + result->len = len; + + int64_t* out = (int64_t*)ray_data(result); + + #define DT_USEC_PER_SEC 1000000LL + #define DT_USEC_PER_MIN (60LL * DT_USEC_PER_SEC) + #define DT_USEC_PER_HOUR (3600LL * DT_USEC_PER_SEC) + #define DT_USEC_PER_DAY (86400LL * DT_USEC_PER_SEC) + + /* Slice-aware HAS_NULLS check: slices don't carry HAS_NULLS on + * themselves, so inspect the parent when input is a slice. */ + bool src_has_nulls = + (input->attrs & RAY_ATTR_HAS_NULLS) || + ((input->attrs & RAY_ATTR_SLICE) && input->slice_parent && + (input->slice_parent->attrs & RAY_ATTR_HAS_NULLS)); + + ray_morsel_t m; + ray_morsel_init(&m, input); + int64_t off = 0; + + while (ray_morsel_next(&m)) { + int64_t n = m.morsel_len; + + for (int64_t i = 0; i < n; i++) { + /* Null sentinels decode to garbage times; propagate the + * null bit instead of emitting a bogus truncated value. */ + if (src_has_nulls && ray_vec_is_null(input, off + i)) { + out[off + i] = 0; + ray_vec_set_null(result, off + i, true); + continue; + } + + int64_t us; + if (in_type == RAY_DATE) { + int32_t d = ((const int32_t*)m.morsel_ptr)[i]; + us = (int64_t)d * DT_USEC_PER_DAY; + } else if (in_type == RAY_TIME) { + int32_t ms = ((const int32_t*)m.morsel_ptr)[i]; + us = (int64_t)ms * 1000LL; + } else { + /* RAY_TIMESTAMP: nanoseconds since 2000 → microseconds. + * Sub-microsecond precision is intentionally dropped — + * every DATE_TRUNC field truncates at second boundary + * or coarser. */ + int64_t ns = ((const int64_t*)m.morsel_ptr)[i]; + us = ns >= 0 ? ns / 1000LL + : -(((-ns) + 999LL) / 1000LL); + } + + /* Truncation math below happens in µs; the final value is + * scaled back to ns before storing, because the result + * vector is RAY_TIMESTAMP and the rest of the runtime + * expects ns. */ + int64_t out_us; + switch (field) { + case RAY_EXTRACT_SECOND: { + int64_t r = us % DT_USEC_PER_SEC; + out_us = us - r - (r < 0 ? DT_USEC_PER_SEC : 0); + break; + } + case RAY_EXTRACT_MINUTE: { + int64_t r = us % DT_USEC_PER_MIN; + out_us = us - r - (r < 0 ? DT_USEC_PER_MIN : 0); + break; + } + case RAY_EXTRACT_HOUR: { + int64_t r = us % DT_USEC_PER_HOUR; + out_us = us - r - (r < 0 ? DT_USEC_PER_HOUR : 0); + break; + } + case RAY_EXTRACT_DAY: { + int64_t r = us % DT_USEC_PER_DAY; + out_us = us - r - (r < 0 ? DT_USEC_PER_DAY : 0); + break; + } + case RAY_EXTRACT_MONTH: { + int64_t days2k = us / DT_USEC_PER_DAY; + if (us < 0 && us % DT_USEC_PER_DAY != 0) days2k--; + int64_t z = days2k + 10957 + 719468; + int64_t era = (z >= 0 ? z : z - 146096) / 146097; + uint64_t doe = (uint64_t)(z - era * 146097); + uint64_t yoe = (doe - doe/1460 + doe/36524 - doe/146096) / 365; + int64_t y = (int64_t)yoe + era * 400; + uint64_t doy_mar = doe - (365*yoe + yoe/4 - yoe/100); + uint64_t mp = (5*doy_mar + 2) / 153; + uint64_t mo = mp < 10 ? mp + 3 : mp - 9; + y += (mo <= 2); + out_us = days_from_civil(y, (int64_t)mo, 1) * DT_USEC_PER_DAY; + break; + } + case RAY_EXTRACT_YEAR: { + int64_t days2k = us / DT_USEC_PER_DAY; + if (us < 0 && us % DT_USEC_PER_DAY != 0) days2k--; + int64_t z = days2k + 10957 + 719468; + int64_t era = (z >= 0 ? z : z - 146096) / 146097; + uint64_t doe = (uint64_t)(z - era * 146097); + uint64_t yoe = (doe - doe/1460 + doe/36524 - doe/146096) / 365; + int64_t y = (int64_t)yoe + era * 400; + uint64_t doy_mar = doe - (365*yoe + yoe/4 - yoe/100); + uint64_t mp = (5*doy_mar + 2) / 153; + uint64_t mo = mp < 10 ? mp + 3 : mp - 9; + y += (mo <= 2); + out_us = days_from_civil(y, 1, 1) * DT_USEC_PER_DAY; + break; + } + default: + out_us = us; + break; + } + out[off + i] = out_us * 1000LL; /* µs → ns for RAY_TIMESTAMP */ + } + off += n; + } + + #undef DT_USEC_PER_SEC + #undef DT_USEC_PER_MIN + #undef DT_USEC_PER_HOUR + #undef DT_USEC_PER_DAY + + ray_release(input); + return result; +} + +/* ── Builtins ── */ + +/* Helper: is the argument the symbol 'global? */ +static bool is_global_arg(ray_t* arg) { + if (arg && arg->type == -RAY_SYM) { + ray_t* s = ray_sym_str(arg->i64); + if (s && ray_str_len(s) == 6 && memcmp(ray_str_ptr(s), "global", 6) == 0) + return true; + } + return false; +} + +/* Compute seconds since 2000.01.01 00:00:00 UTC (the rayforce epoch) */ +static time_t ray_epoch_offset(void) { + /* 2000-01-01 00:00:00 UTC = 946684800 seconds after 1970 epoch */ + return (time_t)946684800; +} + +/* (date 'local) or (date 'global) — returns current date as DATE atom. + * Overloaded: if arg is a DATE / TIME / TIMESTAMP value or vector, + * returns `arg` truncated to the day boundary (RAY_TIMESTAMP result). + * This lets `(date ts)` and `ts.date` both flow through the registered + * unary builtin with no special-case detour. */ +ray_t* ray_date_clock_fn(ray_t* arg) { + if (arg) { + int8_t t = arg->type < 0 ? (int8_t)-arg->type : arg->type; + if (t == RAY_DATE || t == RAY_TIME || t == RAY_TIMESTAMP) + return ray_temporal_truncate(arg, RAY_EXTRACT_DAY); + } + bool local = !is_global_arg(arg); + time_t now = time(NULL); + struct tm* t = local ? localtime(&now) : gmtime(&now); + if (!t) return ray_error("domain", "date: failed to get current time"); + + /* Reconstruct midnight of today */ + struct tm day = *t; + day.tm_hour = 0; day.tm_min = 0; day.tm_sec = 0; day.tm_isdst = -1; + time_t day_time = mktime(&day); + + /* For UTC (global), mktime interprets as local — adjust via difference */ + if (!local) { + /* Use a simpler approach: total days from epoch */ + int32_t days = (int32_t)((now - ray_epoch_offset()) / 86400); + return ray_date((int64_t)days); + } + + /* Local: days since the rayforce epoch, in local time sense */ + int32_t days = (int32_t)((day_time - ray_epoch_offset()) / 86400); + return ray_date((int64_t)days); +} + +/* (time 'local) or (time 'global) — returns current time as TIME atom. + * Overloaded same way as ray_date_clock_fn: temporal argument ⇒ + * truncate to second boundary (RAY_TIMESTAMP); symbol / default ⇒ clock. */ +ray_t* ray_time_clock_fn(ray_t* arg) { + if (arg) { + int8_t t = arg->type < 0 ? (int8_t)-arg->type : arg->type; + if (t == RAY_DATE || t == RAY_TIME || t == RAY_TIMESTAMP) + return ray_temporal_truncate(arg, RAY_EXTRACT_SECOND); + } + bool local = !is_global_arg(arg); + time_t now = time(NULL); + struct tm* t = local ? localtime(&now) : gmtime(&now); + if (!t) return ray_error("domain", "time: failed to get current time"); + + int32_t ms = t->tm_hour * 3600000 + t->tm_min * 60000 + t->tm_sec * 1000; + return ray_time((int64_t)ms); +} + +/* (timestamp 'local) or (timestamp 'global) — returns current timestamp (ns since 2000.01.01) */ +ray_t* ray_timestamp_clock_fn(ray_t* arg) { + bool local = !is_global_arg(arg); + time_t now = time(NULL); + struct tm* t = local ? localtime(&now) : gmtime(&now); + if (!t) return ray_error("domain", "timestamp: failed to get current time"); + + int64_t secs; + if (!local) { + secs = now - ray_epoch_offset(); + } else { + /* For local, compute offset from rayforce epoch in local terms */ + struct tm lt = *t; + lt.tm_isdst = -1; + secs = mktime(<) - ray_epoch_offset(); + } + + int64_t nanos = secs * 1000000000LL; + return ray_timestamp(nanos); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/temporal.h b/crates/rayforce-sys/vendor/rayforce/src/ops/temporal.h new file mode 100644 index 0000000..91016c2 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/temporal.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_OPS_TEMPORAL_H +#define RAY_OPS_TEMPORAL_H + +#include +#include "ops/ops.h" + +/* Extract a calendar / clock field from a RAY_DATE, RAY_TIME, or + * RAY_TIMESTAMP input (vector or atom). `field` is one of the + * RAY_EXTRACT_* codes from ops/ops.h. + * + * Vector input → RAY_I64 vector of the same length, each slot holding the + * extracted value. Atom input (type < 0) → RAY_I64 atom. Returns an + * error ray_t* if the input isn't a supported temporal type. The + * returned value is caller-owned (rc >= 1); caller must ray_release. + * Does NOT consume the input's refcount. */ +ray_t* ray_temporal_extract(ray_t* input, int field); + +/* Map a sym_id (used as a dotted-name segment, e.g. `.dd`, `.yyyy`, `.mm`) + * to a RAY_EXTRACT_* field code. Returns -1 if the sym isn't a known + * temporal field name. Recognised segments: + * yyyy → RAY_EXTRACT_YEAR + * mm → RAY_EXTRACT_MONTH + * dd → RAY_EXTRACT_DAY + * hh → RAY_EXTRACT_HOUR + * minute → RAY_EXTRACT_MINUTE + * ss → RAY_EXTRACT_SECOND + * dow → RAY_EXTRACT_DOW (ISO day-of-week 1..7, Mon=1) + * doy → RAY_EXTRACT_DOY (day-of-year 1..366) + * + * `mm` is unambiguously MONTH — MINUTE spelling stays long-form + * because a two-letter token can't serve both meanings in a uniform + * dotted walk that has no container-type-dependent dispatch. */ +int ray_temporal_field_from_sym(int64_t sym_id); + +/* Truncate a temporal value/vector to day boundary (`kind == 0`) or to + * time-of-day (`kind == 1`, i.e. microseconds within the current day). + * Returns a freshly-allocated RAY_TIMESTAMP-typed ray_t* (caller-owned); + * nulls in the input propagate to nulls in the output. `kind` uses + * RAY_EXTRACT_DAY (for `.date`) or RAY_EXTRACT_SECOND (for `.time`) so + * the set of codes stays consistent with exec_date_trunc. Returns + * ray_error("type", ...) if input isn't a supported temporal type. */ +ray_t* ray_temporal_truncate(ray_t* input, int kind); + +/* Dotted-segment sym → truncate kind (see above). Returns -1 if the + * segment isn't one of the truncate-flavoured names (`date` / `time`). */ +int ray_temporal_trunc_from_sym(int64_t sym_id); + +/* Unary builtins: thin wrappers over ray_temporal_extract with the + * field pre-bound. Exposed so eval.c can register them alongside the + * rest of the language's unary functions — `(ss ts)` and `ts.ss` then + * dispatch through the normal call machinery. */ +ray_t* ray_extract_ss_fn(ray_t* x); +ray_t* ray_extract_hh_fn(ray_t* x); +ray_t* ray_extract_minute_fn(ray_t* x); +ray_t* ray_extract_yyyy_fn(ray_t* x); +ray_t* ray_extract_mm_fn(ray_t* x); +ray_t* ray_extract_dd_fn(ray_t* x); +ray_t* ray_extract_dow_fn(ray_t* x); +ray_t* ray_extract_doy_fn(ray_t* x); + +#endif /* RAY_OPS_TEMPORAL_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/traverse.c b/crates/rayforce-sys/vendor/rayforce/src/ops/traverse.c new file mode 100644 index 0000000..3e12201 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/traverse.c @@ -0,0 +1,2641 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ops/internal.h" + +/* ============================================================================ + * Graph execution functions + * ============================================================================ */ + +/* exec_expand_factorized: emit factorized output for expand+group fusion. + * Returns a table with _src (unique sources) and _count (degree per source). + * This avoids materializing the full (src, dst) cross-product. */ +static ray_t* exec_expand_factorized(ray_rel_t* rel, uint8_t direction, ray_t* src_vec) { + int64_t n_src = src_vec->len; + int64_t* src_data = (int64_t*)ray_data(src_vec); + + /* Compute degrees for each source node */ + ray_t* out_src = ray_vec_new(RAY_I64, n_src > 0 ? n_src : 1); + ray_t* out_cnt = ray_vec_new(RAY_I64, n_src > 0 ? n_src : 1); + if (!out_src || RAY_IS_ERR(out_src) || !out_cnt || RAY_IS_ERR(out_cnt)) { + if (out_src && !RAY_IS_ERR(out_src)) ray_release(out_src); + if (out_cnt && !RAY_IS_ERR(out_cnt)) ray_release(out_cnt); + return ray_error("oom", NULL); + } + + int64_t* sd = (int64_t*)ray_data(out_src); + int64_t* cd = (int64_t*)ray_data(out_cnt); + int64_t out_len = 0; + + for (int64_t i = 0; i < n_src; i++) { + int64_t node = src_data[i]; + int64_t deg = 0; + if (direction == 0 || direction == 2) { + if (node >= 0 && node < rel->fwd.n_nodes) + deg += ray_csr_degree(&rel->fwd, node); + } + if (direction == 1 || direction == 2) { + if (node >= 0 && node < rel->rev.n_nodes) + deg += ray_csr_degree(&rel->rev, node); + } + if (deg > 0) { + sd[out_len] = node; + cd[out_len] = deg; + out_len++; + } + } + out_src->len = out_len; + out_cnt->len = out_len; + + int64_t src_sym = ray_sym_intern("_src", 4); + int64_t cnt_sym = ray_sym_intern("_count", 6); + ray_t* result = ray_table_new(2); + if (!result || RAY_IS_ERR(result)) { + ray_release(out_src); ray_release(out_cnt); + return ray_error("oom", NULL); + } + ray_t* tmp = ray_table_add_col(result, src_sym, out_src); + if (!tmp || RAY_IS_ERR(tmp)) { ray_release(out_src); ray_release(out_cnt); ray_release(result); return ray_error("oom", NULL); } + result = tmp; + tmp = ray_table_add_col(result, cnt_sym, out_cnt); + if (!tmp || RAY_IS_ERR(tmp)) { ray_release(out_src); ray_release(out_cnt); ray_release(result); return ray_error("oom", NULL); } + result = tmp; + ray_release(out_src); ray_release(out_cnt); + return result; +} + +/* exec_expand: 1-hop CSR neighbor expansion. + * Count-then-fill pattern (same as exec_join). */ +ray_t* exec_expand(ray_graph_t* g, ray_op_t* op, ray_t* src_vec) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + ray_rel_t* rel = (ray_rel_t*)ext->graph.rel; + if (!rel) return ray_error("schema", NULL); + + /* Factorized mode: emit pre-aggregated degree counts */ + if (ext->graph.factorized) + return exec_expand_factorized(rel, ext->graph.direction, src_vec); + + uint8_t direction = ext->graph.direction; + int64_t n_src = src_vec->len; + int64_t* src_data = (int64_t*)ray_data(src_vec); + + /* SIP runtime: check for source-side selection bitmap stored on the + * expand ext node (set by optimizer sip_pass or manually for testing). + * + * If sip_sel is not pre-built but the optimizer left a filter hint in + * pad[2..3], build a source-side bitmap by marking all source nodes + * that have degree > 0 in the active CSR direction. */ + uint64_t* src_sel_bits = NULL; + int64_t src_sel_len = 0; + ray_t* sip_sel = (ray_t*)ext->graph.sip_sel; + if (!sip_sel) { + uint8_t filter_hint = ext->base.pad[2]; + if (filter_hint > 0 && n_src > 64) { + /* Build SIP bitmap: mark source nodes with degree > 0. + * For direction==2 (both), check both fwd and rev CSRs. */ + int64_t nn = rel->fwd.n_nodes; + if (rel->rev.n_nodes > nn) nn = rel->rev.n_nodes; + ray_t* built_sel = ray_sel_new(nn); + if (built_sel && !RAY_IS_ERR(built_sel)) { + uint64_t* bits = ray_sel_bits(built_sel); + if (direction == 0 || direction == 2) { + for (int64_t nd = 0; nd < rel->fwd.n_nodes; nd++) + if (ray_csr_degree(&rel->fwd, nd) > 0) + RAY_SEL_BIT_SET(bits, nd); + } + if (direction == 1 || direction == 2) { + for (int64_t nd = 0; nd < rel->rev.n_nodes; nd++) + if (ray_csr_degree(&rel->rev, nd) > 0) + RAY_SEL_BIT_SET(bits, nd); + } + ext->graph.sip_sel = built_sel; + sip_sel = built_sel; + } + } + } + if (sip_sel && !RAY_IS_ERR(sip_sel) && sip_sel->type == RAY_SEL) { + src_sel_bits = ray_sel_bits(sip_sel); + src_sel_len = sip_sel->len; + } + + /* Helper to expand one CSR direction */ + #define EXPAND_DIR(csr_ptr) do { \ + ray_csr_t* csr = (csr_ptr); \ + /* Phase 1: count total output pairs */ \ + int64_t total = 0; \ + for (int64_t i = 0; i < n_src; i++) { \ + int64_t node = src_data[i]; \ + /* SIP skip: if source node not in selection, skip */ \ + if (src_sel_bits && node >= 0 && node < src_sel_len \ + && !RAY_SEL_BIT_TEST(src_sel_bits, node)) continue; \ + if (node >= 0 && node < csr->n_nodes) \ + total += ray_csr_degree(csr, node); \ + } \ + /* Phase 2: fill */ \ + ray_t* d_src = ray_vec_new(RAY_I64, total > 0 ? total : 1); \ + ray_t* d_dst = ray_vec_new(RAY_I64, total > 0 ? total : 1); \ + if (!d_src || RAY_IS_ERR(d_src) || !d_dst || RAY_IS_ERR(d_dst)) { \ + if (d_src && !RAY_IS_ERR(d_src)) ray_release(d_src); \ + if (d_dst && !RAY_IS_ERR(d_dst)) ray_release(d_dst); \ + return ray_error("oom", NULL); \ + } \ + d_src->len = total; d_dst->len = total; \ + int64_t* sd = (int64_t*)ray_data(d_src); \ + int64_t* dd = (int64_t*)ray_data(d_dst); \ + int64_t pos = 0; \ + for (int64_t i = 0; i < n_src; i++) { \ + int64_t node = src_data[i]; \ + if (node < 0 || node >= csr->n_nodes) continue; \ + /* SIP skip: must match count phase */ \ + if (src_sel_bits && node < src_sel_len \ + && !RAY_SEL_BIT_TEST(src_sel_bits, node)) continue; \ + int64_t cnt; \ + int64_t* nbrs = ray_csr_neighbors(csr, node, &cnt); \ + for (int64_t j = 0; j < cnt; j++) { \ + sd[pos] = node; \ + dd[pos] = nbrs[j]; \ + pos++; \ + } \ + } \ + /* Build result table */ \ + int64_t src_sym = ray_sym_intern("_src", 4); \ + int64_t dst_sym = ray_sym_intern("_dst", 4); \ + ray_t* result = ray_table_new(2); \ + if (!result || RAY_IS_ERR(result)) { \ + ray_release(d_src); ray_release(d_dst); \ + return ray_error("oom", NULL); \ + } \ + ray_t* _tmp = ray_table_add_col(result, src_sym, d_src); \ + if (!_tmp || RAY_IS_ERR(_tmp)) { ray_release(d_src); ray_release(d_dst); ray_release(result); return ray_error("oom", NULL); } \ + result = _tmp; \ + _tmp = ray_table_add_col(result, dst_sym, d_dst); \ + if (!_tmp || RAY_IS_ERR(_tmp)) { ray_release(d_src); ray_release(d_dst); ray_release(result); return ray_error("oom", NULL); } \ + result = _tmp; \ + ray_release(d_src); ray_release(d_dst); \ + return result; \ + } while (0) + + if (direction == 0) { + EXPAND_DIR(&rel->fwd); + } else if (direction == 1) { + EXPAND_DIR(&rel->rev); + } else { + /* direction == 2: both — expand fwd, then rev, concat */ + ray_csr_t* fwd = &rel->fwd; + ray_csr_t* rev = &rel->rev; + + /* Count forward */ + int64_t fwd_total = 0; + for (int64_t i = 0; i < n_src; i++) { + int64_t node = src_data[i]; + if (src_sel_bits && node >= 0 && node < src_sel_len + && !RAY_SEL_BIT_TEST(src_sel_bits, node)) continue; + if (node >= 0 && node < fwd->n_nodes) + fwd_total += ray_csr_degree(fwd, node); + } + /* Count reverse */ + int64_t rev_total = 0; + for (int64_t i = 0; i < n_src; i++) { + int64_t node = src_data[i]; + if (src_sel_bits && node >= 0 && node < src_sel_len + && !RAY_SEL_BIT_TEST(src_sel_bits, node)) continue; + if (node >= 0 && node < rev->n_nodes) + rev_total += ray_csr_degree(rev, node); + } + + int64_t total = fwd_total + rev_total; + ray_t* d_src = ray_vec_new(RAY_I64, total > 0 ? total : 1); + ray_t* d_dst = ray_vec_new(RAY_I64, total > 0 ? total : 1); + if (!d_src || RAY_IS_ERR(d_src) || !d_dst || RAY_IS_ERR(d_dst)) { + if (d_src && !RAY_IS_ERR(d_src)) ray_release(d_src); + if (d_dst && !RAY_IS_ERR(d_dst)) ray_release(d_dst); + return ray_error("oom", NULL); + } + d_src->len = total; d_dst->len = total; + int64_t* sd = (int64_t*)ray_data(d_src); + int64_t* dd = (int64_t*)ray_data(d_dst); + int64_t pos = 0; + + /* Fill forward */ + for (int64_t i = 0; i < n_src; i++) { + int64_t node = src_data[i]; + if (node < 0 || node >= fwd->n_nodes) continue; + if (src_sel_bits && node < src_sel_len + && !RAY_SEL_BIT_TEST(src_sel_bits, node)) continue; + int64_t cnt; + int64_t* nbrs = ray_csr_neighbors(fwd, node, &cnt); + for (int64_t j = 0; j < cnt; j++) { + sd[pos] = node; dd[pos] = nbrs[j]; pos++; + } + } + /* Fill reverse */ + for (int64_t i = 0; i < n_src; i++) { + int64_t node = src_data[i]; + if (node < 0 || node >= rev->n_nodes) continue; + if (src_sel_bits && node < src_sel_len + && !RAY_SEL_BIT_TEST(src_sel_bits, node)) continue; + int64_t cnt; + int64_t* nbrs = ray_csr_neighbors(rev, node, &cnt); + for (int64_t j = 0; j < cnt; j++) { + sd[pos] = node; dd[pos] = nbrs[j]; pos++; + } + } + + int64_t src_sym = ray_sym_intern("_src", 4); + int64_t dst_sym = ray_sym_intern("_dst", 4); + ray_t* result = ray_table_new(2); + if (!result || RAY_IS_ERR(result)) { + ray_release(d_src); ray_release(d_dst); + return ray_error("oom", NULL); + } + ray_t* tmp = ray_table_add_col(result, src_sym, d_src); + if (!tmp || RAY_IS_ERR(tmp)) { ray_release(d_src); ray_release(d_dst); ray_release(result); return ray_error("oom", NULL); } + result = tmp; + tmp = ray_table_add_col(result, dst_sym, d_dst); + if (!tmp || RAY_IS_ERR(tmp)) { ray_release(d_src); ray_release(d_dst); ray_release(result); return ray_error("oom", NULL); } + result = tmp; + ray_release(d_src); ray_release(d_dst); + return result; + } + #undef EXPAND_DIR +} + +/* exec_var_expand: iterative BFS with depth limit and cycle detection */ +ray_t* exec_var_expand(ray_graph_t* g, ray_op_t* op, ray_t* start_vec) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + ray_rel_t* rel = (ray_rel_t*)ext->graph.rel; + if (!rel) return ray_error("schema", NULL); + + uint8_t direction = ext->graph.direction; + uint8_t min_depth = ext->graph.min_depth; + uint8_t max_depth = ext->graph.max_depth; + ray_csr_t* csr_fwd = &rel->fwd; + ray_csr_t* csr_rev = &rel->rev; + /* For direction==2 (both), use fwd for n_nodes bound but expand both */ + ray_csr_t* csr = (direction == 1) ? csr_rev : csr_fwd; + + int64_t n_start = start_vec->len; + int64_t* start_data = (int64_t*)ray_data(start_vec); + + /* Pre-allocate output buffers (grow as needed) */ + int64_t out_cap = 1024; + ray_t *start_hdr, *end_hdr, *depth_hdr; + int64_t* out_start = (int64_t*)scratch_alloc(&start_hdr, (size_t)out_cap * sizeof(int64_t)); + int64_t* out_end = (int64_t*)scratch_alloc(&end_hdr, (size_t)out_cap * sizeof(int64_t)); + int64_t* out_depth = (int64_t*)scratch_alloc(&depth_hdr, (size_t)out_cap * sizeof(int64_t)); + if (!out_start || !out_end || !out_depth) { + scratch_free(start_hdr); scratch_free(end_hdr); scratch_free(depth_hdr); + return ray_error("oom", NULL); + } + int64_t out_count = 0; + + /* For direction==2, use the larger n_nodes bound */ + int64_t bfs_n_nodes = csr->n_nodes; + if (direction == 2 && csr_rev->n_nodes > bfs_n_nodes) + bfs_n_nodes = csr_rev->n_nodes; + + /* BFS per start node */ + for (int64_t s = 0; s < n_start; s++) { + int64_t start_node = start_data[s]; + if (start_node < 0 || start_node >= bfs_n_nodes) continue; + + /* Visited bitmap via RAY_SEL */ + ray_t* visited_sel = ray_sel_new(bfs_n_nodes); + if (!visited_sel || RAY_IS_ERR(visited_sel)) continue; + uint64_t* visited = ray_sel_bits(visited_sel); + RAY_SEL_BIT_SET(visited, start_node); + + /* Frontier */ + ray_t* front_hdr; + int64_t front_cap = 256; + int64_t* frontier = (int64_t*)scratch_alloc(&front_hdr, (size_t)front_cap * sizeof(int64_t)); + if (!frontier) { ray_release(visited_sel); continue; } + frontier[0] = start_node; + int64_t front_len = 1; + + for (uint8_t depth = 1; depth <= max_depth && front_len > 0; depth++) { + ray_t* next_hdr; + int64_t next_cap = (front_len > INT64_MAX / 4) ? INT64_MAX : front_len * 4; + if (next_cap < 64) next_cap = 64; + int64_t* next_front = (int64_t*)scratch_alloc(&next_hdr, (size_t)next_cap * sizeof(int64_t)); + if (!next_front) { scratch_free(front_hdr); ray_release(visited_sel); goto cleanup; } + int64_t next_len = 0; + + for (int64_t f = 0; f < front_len; f++) { + int64_t node = frontier[f]; + /* Expand neighbors from active CSR(s). + * For direction==2 (both), expand fwd then rev. */ + int n_csrs = (direction == 2) ? 2 : 1; + ray_csr_t* csrs[2] = { csr, csr_rev }; + for (int ci = 0; ci < n_csrs; ci++) { + ray_csr_t* cur_csr = csrs[ci]; + if (node < 0 || node >= cur_csr->n_nodes) continue; + int64_t cnt; + int64_t* nbrs = ray_csr_neighbors(cur_csr, node, &cnt); + for (int64_t j = 0; j < cnt; j++) { + int64_t nbr = nbrs[j]; + if (nbr < 0 || nbr >= bfs_n_nodes) continue; + if (RAY_SEL_BIT_TEST(visited, nbr)) continue; + RAY_SEL_BIT_SET(visited, nbr); + + /* Grow next_front if needed */ + if (next_len >= next_cap) { + if (next_cap > INT64_MAX / 2) break; + int64_t new_cap = next_cap * 2; + int64_t* new_nf = (int64_t*)scratch_realloc(&next_hdr, + (size_t)next_cap * sizeof(int64_t), + (size_t)new_cap * sizeof(int64_t)); + if (!new_nf) break; + next_front = new_nf; + next_cap = new_cap; + } + next_front[next_len++] = nbr; + + /* Emit if within depth range */ + if (depth >= min_depth) { + if (out_count >= out_cap) { + if (out_cap > INT64_MAX / 2) break; + int64_t new_oc = out_cap * 2; + /* Grow all three buffers atomically — alloc new + * copies first, commit only if all succeed. */ + ray_t *ns_h = NULL, *ne_h = NULL, *nd_h = NULL; + size_t old_sz = (size_t)out_cap * sizeof(int64_t); + size_t new_sz = (size_t)new_oc * sizeof(int64_t); + int64_t* ns = (int64_t*)scratch_alloc(&ns_h, new_sz); + int64_t* ne = (int64_t*)scratch_alloc(&ne_h, new_sz); + int64_t* nd_buf = (int64_t*)scratch_alloc(&nd_h, new_sz); + if (!ns || !ne || !nd_buf) { + scratch_free(ns_h); scratch_free(ne_h); scratch_free(nd_h); + break; + } + memcpy(ns, out_start, old_sz); + memcpy(ne, out_end, old_sz); + memcpy(nd_buf, out_depth, old_sz); + scratch_free(start_hdr); scratch_free(end_hdr); scratch_free(depth_hdr); + start_hdr = ns_h; end_hdr = ne_h; depth_hdr = nd_h; + out_start = ns; out_end = ne; out_depth = nd_buf; + out_cap = new_oc; + } + out_start[out_count] = start_node; + out_end[out_count] = nbr; + out_depth[out_count] = depth; + out_count++; + } + } + } /* end for ci (CSR directions) */ + } + + scratch_free(front_hdr); + front_hdr = next_hdr; + frontier = next_front; + front_len = next_len; + } + + scratch_free(front_hdr); + ray_release(visited_sel); + } + +cleanup:; + /* Build output table */ + ray_t* v_start = ray_vec_from_raw(RAY_I64, out_start, out_count); + ray_t* v_end = ray_vec_from_raw(RAY_I64, out_end, out_count); + ray_t* v_depth = ray_vec_from_raw(RAY_I64, out_depth, out_count); + scratch_free(start_hdr); scratch_free(end_hdr); scratch_free(depth_hdr); + + if (!v_start || RAY_IS_ERR(v_start) || !v_end || RAY_IS_ERR(v_end) || + !v_depth || RAY_IS_ERR(v_depth)) { + if (v_start && !RAY_IS_ERR(v_start)) ray_release(v_start); + if (v_end && !RAY_IS_ERR(v_end)) ray_release(v_end); + if (v_depth && !RAY_IS_ERR(v_depth)) ray_release(v_depth); + return ray_error("oom", NULL); + } + + int64_t start_sym = ray_sym_intern("_start", 6); + int64_t end_sym = ray_sym_intern("_end", 4); + int64_t depth_sym = ray_sym_intern("_depth", 6); + + ray_t* result = ray_table_new(3); + if (!result || RAY_IS_ERR(result)) { + ray_release(v_start); ray_release(v_end); ray_release(v_depth); + return ray_error("oom", NULL); + } + ray_t* tmp = ray_table_add_col(result, start_sym, v_start); + if (!tmp || RAY_IS_ERR(tmp)) { ray_release(v_start); ray_release(v_end); ray_release(v_depth); ray_release(result); return ray_error("oom", NULL); } + result = tmp; + tmp = ray_table_add_col(result, end_sym, v_end); + if (!tmp || RAY_IS_ERR(tmp)) { ray_release(v_start); ray_release(v_end); ray_release(v_depth); ray_release(result); return ray_error("oom", NULL); } + result = tmp; + tmp = ray_table_add_col(result, depth_sym, v_depth); + if (!tmp || RAY_IS_ERR(tmp)) { ray_release(v_start); ray_release(v_end); ray_release(v_depth); ray_release(result); return ray_error("oom", NULL); } + result = tmp; + ray_release(v_start); ray_release(v_end); ray_release(v_depth); + return result; +} + +/* exec_shortest_path: BFS from src to dst with parent tracking */ +ray_t* exec_shortest_path(ray_graph_t* g, ray_op_t* op, + ray_t* src_val, ray_t* dst_val) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + ray_rel_t* rel = (ray_rel_t*)ext->graph.rel; + if (!rel) return ray_error("schema", NULL); + uint8_t direction = ext->graph.direction; + ray_csr_t* csr = (direction == 1) ? &rel->rev : &rel->fwd; + ray_csr_t* csr_rev = &rel->rev; + int n_csrs = (direction == 2) ? 2 : 1; + ray_csr_t* csrs[2] = { csr, csr_rev }; + int64_t bfs_n_nodes = csr->n_nodes; + if (direction == 2 && csr_rev->n_nodes > bfs_n_nodes) + bfs_n_nodes = csr_rev->n_nodes; + uint8_t max_depth = ext->graph.max_depth; + + /* Extract single I64 values */ + int64_t src_node, dst_node; + if (ray_is_atom(src_val)) { + src_node = src_val->i64; + } else { + if (src_val->len == 0) return ray_error("range", NULL); + src_node = ((int64_t*)ray_data(src_val))[0]; + } + if (ray_is_atom(dst_val)) { + dst_node = dst_val->i64; + } else { + if (dst_val->len == 0) return ray_error("range", NULL); + dst_node = ((int64_t*)ray_data(dst_val))[0]; + } + + if (src_node < 0 || src_node >= bfs_n_nodes || + dst_node < 0 || dst_node >= bfs_n_nodes) + return ray_error("range", NULL); + + /* Special case: src == dst */ + if (src_node == dst_node) { + ray_t* v_node = ray_vec_from_raw(RAY_I64, &src_node, 1); + int64_t zero = 0; + ray_t* v_depth = ray_vec_from_raw(RAY_I64, &zero, 1); + if (!v_node || RAY_IS_ERR(v_node) || !v_depth || RAY_IS_ERR(v_depth)) { + if (v_node && !RAY_IS_ERR(v_node)) ray_release(v_node); + if (v_depth && !RAY_IS_ERR(v_depth)) ray_release(v_depth); + return ray_error("oom", NULL); + } + ray_t* result = ray_table_new(2); + if (!result || RAY_IS_ERR(result)) { ray_release(v_node); ray_release(v_depth); return ray_error("oom", NULL); } + ray_t* tmp = ray_table_add_col(result, sym_intern_safe("_node", 5), v_node); + if (!tmp || RAY_IS_ERR(tmp)) { ray_release(v_node); ray_release(v_depth); ray_release(result); return ray_error("oom", NULL); } + result = tmp; + tmp = ray_table_add_col(result, sym_intern_safe("_depth", 6), v_depth); + if (!tmp || RAY_IS_ERR(tmp)) { ray_release(v_node); ray_release(v_depth); ray_release(result); return ray_error("oom", NULL); } + result = tmp; + ray_release(v_node); ray_release(v_depth); + return result; + } + + /* Allocate parent array (-1 = unvisited) */ + ray_t* parent_hdr; + int64_t* parent = (int64_t*)scratch_alloc(&parent_hdr, + (size_t)bfs_n_nodes * sizeof(int64_t)); + if (!parent) return ray_error("oom", NULL); + memset(parent, 0xFF, (size_t)bfs_n_nodes * sizeof(int64_t)); /* -1 */ + parent[src_node] = src_node; + + /* BFS queue */ + ray_t* queue_hdr; + int64_t q_cap = 1024; + int64_t* queue = (int64_t*)scratch_alloc(&queue_hdr, (size_t)q_cap * sizeof(int64_t)); + if (!queue) { scratch_free(parent_hdr); return ray_error("oom", NULL); } + queue[0] = src_node; + int64_t q_start = 0, q_end = 1; + bool found = false; + + for (uint8_t depth = 1; depth <= max_depth && !found; depth++) { + int64_t level_end = q_end; + for (int64_t qi = q_start; qi < level_end && !found; qi++) { + int64_t node = queue[qi]; + for (int ci = 0; ci < n_csrs && !found; ci++) { + ray_csr_t* cur_csr = csrs[ci]; + if (node < 0 || node >= cur_csr->n_nodes) continue; + int64_t cnt; + int64_t* nbrs = ray_csr_neighbors(cur_csr, node, &cnt); + for (int64_t j = 0; j < cnt; j++) { + int64_t nbr = nbrs[j]; + if (nbr < 0 || nbr >= bfs_n_nodes) continue; + if (parent[nbr] != -1) continue; + parent[nbr] = node; + + if (nbr == dst_node) { found = true; break; } + + /* Grow queue if needed */ + if (q_end >= q_cap) { + if (q_cap > INT64_MAX / 2) { found = false; goto bfs_done; } + int64_t new_cap = q_cap * 2; + int64_t* new_q = (int64_t*)scratch_realloc(&queue_hdr, + (size_t)q_cap * sizeof(int64_t), + (size_t)new_cap * sizeof(int64_t)); + if (!new_q) { found = false; goto bfs_done; } + queue = new_q; + q_cap = new_cap; + } + queue[q_end++] = nbr; + } + } /* end for ci (CSR directions) */ + } + q_start = level_end; + } + +bfs_done: + scratch_free(queue_hdr); + + if (!found) { + scratch_free(parent_hdr); + return ray_error("range", NULL); + } + + /* Reconstruct path */ + int64_t path_buf[256]; + int64_t path_len = 0; + int64_t cur = dst_node; + while (cur != src_node && path_len < 255) { + path_buf[path_len++] = cur; + cur = parent[cur]; + } + if (cur != src_node) { + scratch_free(parent_hdr); + return ray_error("range", "path exceeds 254 hops"); + } + path_buf[path_len++] = src_node; + scratch_free(parent_hdr); + + /* Reverse path */ + for (int64_t i = 0; i < path_len / 2; i++) { + int64_t tmp = path_buf[i]; + path_buf[i] = path_buf[path_len - 1 - i]; + path_buf[path_len - 1 - i] = tmp; + } + + /* Build output table */ + ray_t* v_node = ray_vec_from_raw(RAY_I64, path_buf, path_len); + ray_t* v_depth = ray_vec_new(RAY_I64, path_len); + if (!v_node || RAY_IS_ERR(v_node) || !v_depth || RAY_IS_ERR(v_depth)) { + if (v_node && !RAY_IS_ERR(v_node)) ray_release(v_node); + if (v_depth && !RAY_IS_ERR(v_depth)) ray_release(v_depth); + return ray_error("oom", NULL); + } + v_depth->len = path_len; + int64_t* dep_data = (int64_t*)ray_data(v_depth); + for (int64_t i = 0; i < path_len; i++) dep_data[i] = i; + + int64_t node_sym = ray_sym_intern("_node", 5); + int64_t depth_sym = ray_sym_intern("_depth", 6); + ray_t* result = ray_table_new(2); + if (!result || RAY_IS_ERR(result)) { ray_release(v_node); ray_release(v_depth); return ray_error("oom", NULL); } + ray_t* tmp = ray_table_add_col(result, node_sym, v_node); + if (!tmp || RAY_IS_ERR(tmp)) { ray_release(v_node); ray_release(v_depth); ray_release(result); return ray_error("oom", NULL); } + result = tmp; + tmp = ray_table_add_col(result, depth_sym, v_depth); + if (!tmp || RAY_IS_ERR(tmp)) { ray_release(v_node); ray_release(v_depth); ray_release(result); return ray_error("oom", NULL); } + result = tmp; + ray_release(v_node); ray_release(v_depth); + return result; +} + +/* -------------------------------------------------------------------------- + * exec_pagerank: iterative PageRank over CSR adjacency. + * + * rank[v] = (1 - d)/N + d * SUM(rank[u] / out_degree[u]) for u in in_neighbors(v) + * + * Uses reverse CSR for in-neighbors, forward CSR for out-degree. + * -------------------------------------------------------------------------- */ +ray_t* exec_pagerank(ray_graph_t* g, ray_op_t* op) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + ray_rel_t* rel = (ray_rel_t*)ext->graph.rel; + if (!rel) return ray_error("schema", NULL); + + int64_t n = rel->fwd.n_nodes; + uint16_t iters = ext->graph.max_iter; + double damping = ext->graph.damping; + + if (n <= 0) return ray_error("length", NULL); + + /* Arena for all scratch memory — freed in one shot */ + ray_scratch_arena_t arena; + ray_scratch_arena_init(&arena); + + double* rank = (double*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(double)); + double* rank_new = (double*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(double)); + if (!rank || !rank_new) { + ray_scratch_arena_reset(&arena); + return ray_error("oom", NULL); + } + + double init = 1.0 / (double)n; + for (int64_t i = 0; i < n; i++) rank[i] = init; + + /* Get raw CSR arrays for direct access */ + int64_t* fwd_off = (int64_t*)ray_data(rel->fwd.offsets); + int64_t* rev_off = (int64_t*)ray_data(rel->rev.offsets); + int64_t* rev_tgt = (int64_t*)ray_data(rel->rev.targets); + + double base = (1.0 - damping) / (double)n; + + for (uint16_t iter = 0; iter < iters; iter++) { + /* Dangling node correction: redistribute rank of zero-out-degree nodes */ + double dangling_sum = 0.0; + for (int64_t u = 0; u < n; u++) { + if (fwd_off[u + 1] == fwd_off[u]) dangling_sum += rank[u]; + } + double adjusted_base = base + damping * dangling_sum / (double)n; + + for (int64_t v = 0; v < n; v++) { + double sum = 0.0; + /* Iterate over in-neighbors of v using reverse CSR */ + int64_t rev_start = rev_off[v]; + int64_t rev_end = rev_off[v + 1]; + for (int64_t j = rev_start; j < rev_end; j++) { + int64_t u = rev_tgt[j]; + /* out_degree of u from forward CSR */ + int64_t out_deg = fwd_off[u + 1] - fwd_off[u]; + if (out_deg > 0) { + sum += rank[u] / (double)out_deg; + } + } + rank_new[v] = adjusted_base + damping * sum; + } + /* Swap */ + double* tmp = rank; + rank = rank_new; + rank_new = tmp; + } + + /* Build output table: _node (I64), _rank (F64) */ + ray_t* node_vec = ray_vec_new(RAY_I64, n); + ray_t* rank_vec = ray_vec_new(RAY_F64, n); + if (!node_vec || RAY_IS_ERR(node_vec) || !rank_vec || RAY_IS_ERR(rank_vec)) { + ray_scratch_arena_reset(&arena); + if (node_vec && !RAY_IS_ERR(node_vec)) ray_release(node_vec); + if (rank_vec && !RAY_IS_ERR(rank_vec)) ray_release(rank_vec); + return ray_error("oom", NULL); + } + + int64_t* ndata = (int64_t*)ray_data(node_vec); + double* rdata = (double*)ray_data(rank_vec); + for (int64_t i = 0; i < n; i++) { + ndata[i] = i; + rdata[i] = rank[i]; + } + node_vec->len = n; + rank_vec->len = n; + + ray_scratch_arena_reset(&arena); + + /* Package as table with named columns */ + ray_t* result = ray_table_new(2); + if (!result || RAY_IS_ERR(result)) { + ray_release(node_vec); + ray_release(rank_vec); + return ray_error("oom", NULL); + } + result = ray_table_add_col(result, sym_intern_safe("_node", 5), node_vec); + ray_release(node_vec); + result = ray_table_add_col(result, sym_intern_safe("_rank", 5), rank_vec); + ray_release(rank_vec); + + return result; +} + +/* -------------------------------------------------------------------------- + * exec_connected_comp: connected components via label propagation. + * Treats graph as undirected (uses both forward and reverse CSR). + * O(diameter * |E|) time. + * -------------------------------------------------------------------------- */ +ray_t* exec_connected_comp(ray_graph_t* g, ray_op_t* op) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + ray_rel_t* rel = (ray_rel_t*)ext->graph.rel; + if (!rel) return ray_error("schema", NULL); + + int64_t n = rel->fwd.n_nodes; + if (n <= 0) return ray_error("length", NULL); + + /* Arena for all scratch memory — freed in one shot */ + ray_scratch_arena_t arena; + ray_scratch_arena_init(&arena); + + int64_t* label = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + if (!label) { + ray_scratch_arena_reset(&arena); + return ray_error("oom", NULL); + } + + /* Initialize: each node is its own component */ + for (int64_t i = 0; i < n; i++) label[i] = i; + + int64_t* fwd_off = (int64_t*)ray_data(rel->fwd.offsets); + int64_t* fwd_tgt = (int64_t*)ray_data(rel->fwd.targets); + int64_t* rev_off = (int64_t*)ray_data(rel->rev.offsets); + int64_t* rev_tgt = (int64_t*)ray_data(rel->rev.targets); + + /* Iterate until convergence */ + bool changed = true; + while (changed) { + changed = false; + for (int64_t v = 0; v < n; v++) { + int64_t min_label = label[v]; + /* Forward neighbors */ + for (int64_t j = fwd_off[v]; j < fwd_off[v + 1]; j++) { + int64_t u = fwd_tgt[j]; + if (label[u] < min_label) min_label = label[u]; + } + /* Reverse neighbors */ + for (int64_t j = rev_off[v]; j < rev_off[v + 1]; j++) { + int64_t u = rev_tgt[j]; + if (label[u] < min_label) min_label = label[u]; + } + if (min_label < label[v]) { + label[v] = min_label; + changed = true; + } + } + } + + /* Build output table */ + ray_t* node_vec = ray_vec_new(RAY_I64, n); + ray_t* comp_vec = ray_vec_new(RAY_I64, n); + if (!node_vec || RAY_IS_ERR(node_vec) || !comp_vec || RAY_IS_ERR(comp_vec)) { + ray_scratch_arena_reset(&arena); + if (node_vec && !RAY_IS_ERR(node_vec)) ray_release(node_vec); + if (comp_vec && !RAY_IS_ERR(comp_vec)) ray_release(comp_vec); + return ray_error("oom", NULL); + } + + int64_t* ndata = (int64_t*)ray_data(node_vec); + int64_t* cdata = (int64_t*)ray_data(comp_vec); + for (int64_t i = 0; i < n; i++) { + ndata[i] = i; + cdata[i] = label[i]; + } + node_vec->len = n; + comp_vec->len = n; + + ray_scratch_arena_reset(&arena); + + ray_t* result = ray_table_new(2); + if (!result || RAY_IS_ERR(result)) { + ray_release(node_vec); + ray_release(comp_vec); + return ray_error("oom", NULL); + } + result = ray_table_add_col(result, sym_intern_safe("_node", 5), node_vec); + ray_release(node_vec); + result = ray_table_add_col(result, sym_intern_safe("_component", 10), comp_vec); + ray_release(comp_vec); + + return result; +} + +/* -------------------------------------------------------------------------- + * exec_dijkstra: weighted shortest path via Dijkstra's algorithm. + * Uses a binary min-heap. Reads edge weights from CSR property table. + * Returns table with _node (I64), _dist (F64), _depth (I64). + * -------------------------------------------------------------------------- */ + +/* Min-heap entry for Dijkstra */ +typedef struct { + double dist; + int64_t node; +} dijk_entry_t; + +static void dijk_heap_push(dijk_entry_t* heap, int64_t* size, + double dist, int64_t node) { + int64_t i = (*size)++; + heap[i].dist = dist; + heap[i].node = node; + /* Sift up */ + while (i > 0) { + int64_t parent = (i - 1) / 2; + if (heap[parent].dist <= heap[i].dist) break; + dijk_entry_t tmp = heap[parent]; + heap[parent] = heap[i]; + heap[i] = tmp; + i = parent; + } +} + +static dijk_entry_t dijk_heap_pop(dijk_entry_t* heap, int64_t* size) { + dijk_entry_t top = heap[0]; + (*size)--; + if (*size > 0) { + heap[0] = heap[*size]; + /* Sift down */ + int64_t i = 0; + while (1) { + int64_t left = 2 * i + 1; + int64_t right = 2 * i + 2; + int64_t smallest = i; + if (left < *size && heap[left].dist < heap[smallest].dist) smallest = left; + if (right < *size && heap[right].dist < heap[smallest].dist) smallest = right; + if (smallest == i) break; + dijk_entry_t tmp = heap[i]; + heap[i] = heap[smallest]; + heap[smallest] = tmp; + i = smallest; + } + } + return top; +} + +/* Reusable Dijkstra with optional node/edge masks (for Yen's k-shortest) */ +static double dijkstra_masked( + int64_t* fwd_off, int64_t* fwd_tgt, int64_t* fwd_row, + double* weights, int64_t n, + int64_t src_id, int64_t dst_id, + bool* node_mask, /* NULL or bool[n]: true = blocked */ + bool* edge_mask, /* NULL or bool[m]: true = blocked */ + double* dist, /* pre-allocated double[n] */ + int64_t* parent, /* pre-allocated int64_t[n] */ + dijk_entry_t* heap, /* pre-allocated */ + bool* visited) /* pre-allocated bool[n] */ +{ + for (int64_t i = 0; i < n; i++) { + dist[i] = 1e308; + parent[i] = -1; + visited[i] = false; + } + + dist[src_id] = 0.0; + int64_t heap_size = 0; + dijk_heap_push(heap, &heap_size, 0.0, src_id); + + while (heap_size > 0) { + dijk_entry_t top = dijk_heap_pop(heap, &heap_size); + int64_t u = top.node; + if (visited[u]) continue; + visited[u] = true; + + if (u == dst_id) break; + + for (int64_t j = fwd_off[u]; j < fwd_off[u + 1]; j++) { + if (edge_mask && edge_mask[j]) continue; + int64_t v = fwd_tgt[j]; + if (node_mask && node_mask[v]) continue; + int64_t edge_row = fwd_row[j]; + double w = weights[edge_row]; + double new_dist = dist[u] + w; + if (new_dist < dist[v]) { + dist[v] = new_dist; + parent[v] = u; + dijk_heap_push(heap, &heap_size, new_dist, v); + } + } + } + + return dist[dst_id]; +} + +ray_t* exec_dijkstra(ray_graph_t* g, ray_op_t* op, + ray_t* src_val, ray_t* dst_val) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + ray_rel_t* rel = (ray_rel_t*)ext->graph.rel; + if (!rel) return ray_error("schema", NULL); + if (!rel->fwd.props) return ray_error("schema", NULL); /* need edge properties */ + + int64_t n = rel->fwd.n_nodes; + int64_t m = rel->fwd.n_edges; + int64_t src_id = ray_is_atom(src_val) ? src_val->i64 : ((int64_t*)ray_data(src_val))[0]; + int64_t dst_id = !dst_val ? -1 : ray_is_atom(dst_val) ? dst_val->i64 : ((int64_t*)ray_data(dst_val))[0]; + + if (src_id < 0 || src_id >= n) return ray_error("range", NULL); + if (dst_id != -1 && (dst_id < 0 || dst_id >= n)) return ray_error("range", NULL); + + /* Find weight column in edge properties */ + int64_t weight_sym = ext->graph.weight_col_sym; + ray_t* props = rel->fwd.props; + ray_t* weight_vec = ray_table_get_col(props, weight_sym); + if (!weight_vec || RAY_IS_ERR(weight_vec)) return ray_error("schema", NULL); + if (weight_vec->type != RAY_F64) return ray_error("schema", NULL); + double* weights = (double*)ray_data(weight_vec); + + /* Dijkstra requires non-negative edge weights */ + for (int64_t i = 0; i < m; i++) { + if (weights[i] < 0.0) + return ray_error("domain", "Dijkstra requires non-negative edge weights"); + } + + /* Allocate working arrays. + * Heap capacity = max(n, m) + 1: each edge relaxation can push one entry, + * and with lazy deletion (visited check on pop) the heap can grow up to m. */ + int64_t heap_cap = (m > n ? m : n) + 1; + + /* Arena for all scratch memory — freed in one shot */ + ray_scratch_arena_t arena; + ray_scratch_arena_init(&arena); + + double* dist = (double*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(double)); + bool* visited = (bool*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(bool)); + int64_t* depth = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + dijk_entry_t* heap = (dijk_entry_t*)ray_scratch_arena_push(&arena, + (size_t)heap_cap * sizeof(dijk_entry_t)); + if (!dist || !visited || !depth || !heap) { + ray_scratch_arena_reset(&arena); + return ray_error("oom", NULL); + } + memset(visited, 0, (size_t)n * sizeof(bool)); + memset(depth, 0, (size_t)n * sizeof(int64_t)); + + for (int64_t i = 0; i < n; i++) { + dist[i] = 1e308; /* infinity */ + } + dist[src_id] = 0.0; + + int64_t heap_size = 0; + dijk_heap_push(heap, &heap_size, 0.0, src_id); + + int64_t* fwd_off = (int64_t*)ray_data(rel->fwd.offsets); + int64_t* fwd_tgt = (int64_t*)ray_data(rel->fwd.targets); + int64_t* fwd_row = (int64_t*)ray_data(rel->fwd.rowmap); + + while (heap_size > 0) { + dijk_entry_t top = dijk_heap_pop(heap, &heap_size); + int64_t u = top.node; + if (visited[u]) continue; + visited[u] = true; + + if (u == dst_id) break; /* early exit if destination reached */ + + for (int64_t j = fwd_off[u]; j < fwd_off[u + 1]; j++) { + int64_t v = fwd_tgt[j]; + int64_t edge_row = fwd_row[j]; + double w = weights[edge_row]; + double new_dist = dist[u] + w; + if (new_dist < dist[v]) { + dist[v] = new_dist; + depth[v] = depth[u] + 1; + dijk_heap_push(heap, &heap_size, new_dist, v); + } + } + } + + /* Collect reachable nodes */ + int64_t count = 0; + for (int64_t i = 0; i < n; i++) { + if (dist[i] < 1e308) count++; + } + + ray_t* node_vec = ray_vec_new(RAY_I64, count); + ray_t* dist_vec = ray_vec_new(RAY_F64, count); + ray_t* depth_vec = ray_vec_new(RAY_I64, count); + if (!node_vec || RAY_IS_ERR(node_vec) || + !dist_vec || RAY_IS_ERR(dist_vec) || + !depth_vec || RAY_IS_ERR(depth_vec)) { + ray_scratch_arena_reset(&arena); + if (node_vec && !RAY_IS_ERR(node_vec)) ray_release(node_vec); + if (dist_vec && !RAY_IS_ERR(dist_vec)) ray_release(dist_vec); + if (depth_vec && !RAY_IS_ERR(depth_vec)) ray_release(depth_vec); + return ray_error("oom", NULL); + } + + int64_t* ndata = (int64_t*)ray_data(node_vec); + double* ddata = (double*)ray_data(dist_vec); + int64_t* hdata = (int64_t*)ray_data(depth_vec); + int64_t idx = 0; + for (int64_t i = 0; i < n; i++) { + if (dist[i] < 1e308) { + ndata[idx] = i; + ddata[idx] = dist[i]; + hdata[idx] = depth[i]; + idx++; + } + } + node_vec->len = count; + dist_vec->len = count; + depth_vec->len = count; + + ray_scratch_arena_reset(&arena); + + ray_t* result = ray_table_new(3); + if (!result || RAY_IS_ERR(result)) { + ray_release(node_vec); + ray_release(dist_vec); + ray_release(depth_vec); + return ray_error("oom", NULL); + } + result = ray_table_add_col(result, sym_intern_safe("_node", 5), node_vec); + ray_release(node_vec); + result = ray_table_add_col(result, sym_intern_safe("_dist", 5), dist_vec); + ray_release(dist_vec); + result = ray_table_add_col(result, sym_intern_safe("_depth", 6), depth_vec); + ray_release(depth_vec); + + return result; +} + +/* exec_wco_join: Worst-Case Optimal Join via general Leapfrog Triejoin */ +ray_t* exec_wco_join(ray_graph_t* g, ray_op_t* op) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + ray_rel_t** rels = (ray_rel_t**)ext->wco.rels; + uint8_t n_rels = ext->wco.n_rels; + uint8_t n_vars = ext->wco.n_vars; + + if (!rels || n_rels == 0) return ray_error("schema", NULL); + if (n_vars > LFTJ_MAX_VARS) return ray_error("nyi", NULL); + + /* Validate sorted CSR (both fwd and rev, since LFTJ may use either) */ + for (uint8_t r = 0; r < n_rels; r++) { + if (!rels[r] || !rels[r]->fwd.sorted || !rels[r]->rev.sorted) + return ray_error("domain", NULL); + } + + /* Build binding plan */ + lftj_enum_ctx_t ctx; + memset(&ctx, 0, sizeof(ctx)); + if (!lftj_build_default_plan(&ctx, rels, n_rels, n_vars)) + return ray_error("nyi", NULL); + + /* Allocate output buffers */ + int64_t out_cap = 4096; + ray_t* col_data_block; + int64_t** col_data = (int64_t**)scratch_alloc(&col_data_block, + (size_t)n_vars * sizeof(int64_t*)); + if (!col_data) { + scratch_free(col_data_block); + return ray_error("oom", NULL); + } + + for (uint8_t v = 0; v < n_vars; v++) { + ray_t* h = ray_alloc((size_t)out_cap * sizeof(int64_t)); + if (!h) { + for (uint8_t j = 0; j < v; j++) ray_free(ctx.buf_hdrs[j]); + scratch_free(col_data_block); + return ray_error("oom", NULL); + } + ctx.buf_hdrs[v] = h; + col_data[v] = (int64_t*)ray_data(h); + } + + ctx.col_data = col_data; + ctx.out_count = 0; + ctx.out_cap = out_cap; + ctx.oom = false; + + /* Run general LFTJ enumeration */ + lftj_enumerate(&ctx, 0); + + if (ctx.oom) { + for (uint8_t v = 0; v < n_vars; v++) ray_free(ctx.buf_hdrs[v]); + scratch_free(col_data_block); + return ray_error("oom", NULL); + } + + /* Build output table */ + ray_t* result = ray_table_new(n_vars); + if (!result || RAY_IS_ERR(result)) { + for (uint8_t v = 0; v < n_vars; v++) ray_free(ctx.buf_hdrs[v]); + scratch_free(col_data_block); + return ray_error("oom", NULL); + } + + for (uint8_t v = 0; v < n_vars; v++) { + ray_t* vec = ray_vec_from_raw(RAY_I64, ctx.col_data[v], ctx.out_count); + ray_free(ctx.buf_hdrs[v]); + if (!vec || RAY_IS_ERR(vec)) { + for (uint8_t j = v + 1; j < n_vars; j++) ray_free(ctx.buf_hdrs[j]); + scratch_free(col_data_block); + ray_release(result); + return ray_error("oom", NULL); + } + char name_buf[12]; + int n = snprintf(name_buf, sizeof(name_buf), "_v%d", v); + int64_t name_id = ray_sym_intern(name_buf, (size_t)n); + ray_t* new_result = ray_table_add_col(result, name_id, vec); + ray_release(vec); + if (!new_result || RAY_IS_ERR(new_result)) { + for (uint8_t j = v + 1; j < n_vars; j++) ray_free(ctx.buf_hdrs[j]); + scratch_free(col_data_block); + ray_release(result); + return ray_error("oom", NULL); + } + result = new_result; + } + + scratch_free(col_data_block); + return result; +} + +/* -------------------------------------------------------------------------- + * exec_louvain: community detection via Louvain modularity optimization. + * Phase 1 only (no graph contraction). + * Maximizes modularity Q = (1/2m) * SUM[(A_ij - k_i*k_j/2m) * delta(c_i, c_j)] + * Treats graph as undirected. Uses forward+reverse CSR. + * -------------------------------------------------------------------------- */ +ray_t* exec_louvain(ray_graph_t* g, ray_op_t* op) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + ray_rel_t* rel = (ray_rel_t*)ext->graph.rel; + if (!rel) return ray_error("schema", NULL); + + int64_t n = rel->fwd.n_nodes; + int64_t m = rel->fwd.n_edges; + uint16_t max_iter = ext->graph.max_iter; + + if (n <= 0) return ray_error("length", NULL); + + /* Arena for all scratch memory — freed in one shot */ + ray_scratch_arena_t arena; + ray_scratch_arena_init(&arena); + + int64_t* community = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + int64_t* degree = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + int64_t* comm_tot = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + if (!community || !degree || !comm_tot) { + ray_scratch_arena_reset(&arena); + return ray_error("oom", NULL); + } + + int64_t* fwd_off = (int64_t*)ray_data(rel->fwd.offsets); + int64_t* fwd_tgt = (int64_t*)ray_data(rel->fwd.targets); + int64_t* rev_off = (int64_t*)ray_data(rel->rev.offsets); + int64_t* rev_tgt = (int64_t*)ray_data(rel->rev.targets); + + /* Initialize: each node in its own community */ + for (int64_t i = 0; i < n; i++) { + community[i] = i; + degree[i] = (fwd_off[i+1] - fwd_off[i]) + (rev_off[i+1] - rev_off[i]); + comm_tot[i] = degree[i]; + } + + double two_m = (double)(2 * m); + if (two_m == 0) two_m = 1; + + /* Scratch space for per-community edge counts (reused across iterations). + * k_i_in[c] = number of edges from node v to community c. */ + int64_t* k_i_in = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + /* Track which communities were touched so we can reset k_i_in efficiently */ + int64_t* touched = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + if (!k_i_in || !touched) { + ray_scratch_arena_reset(&arena); + return ray_error("oom", NULL); + } + memset(k_i_in, 0, (size_t)n * sizeof(int64_t)); + + for (uint16_t iter = 0; iter < max_iter; iter++) { + bool moved = false; + for (int64_t v = 0; v < n; v++) { + int64_t old_comm = community[v]; + int64_t n_touched = 0; + + /* Aggregate edges per neighbor community (forward + reverse) */ + for (int64_t j = fwd_off[v]; j < fwd_off[v + 1]; j++) { + int64_t c = community[fwd_tgt[j]]; + if (c == old_comm) continue; + if (k_i_in[c] == 0) touched[n_touched++] = c; + k_i_in[c]++; + } + for (int64_t j = rev_off[v]; j < rev_off[v + 1]; j++) { + int64_t c = community[rev_tgt[j]]; + if (c == old_comm) continue; + if (k_i_in[c] == 0) touched[n_touched++] = c; + k_i_in[c]++; + } + + /* Evaluate modularity gain for each candidate community. + * delta_Q = k_i_in[c] / two_m - (sigma_tot[c] * k_v) / (two_m * two_m) */ + int64_t best_comm = old_comm; + double best_gain = 0.0; + double k_v = (double)degree[v]; + + for (int64_t t = 0; t < n_touched; t++) { + int64_t c = touched[t]; + double sigma_tot = (double)comm_tot[c]; + double gain = (double)k_i_in[c] / two_m + - (sigma_tot * k_v) / (two_m * two_m); + if (gain > best_gain) { + best_gain = gain; + best_comm = c; + } + } + + /* Reset k_i_in for touched communities */ + for (int64_t t = 0; t < n_touched; t++) { + k_i_in[touched[t]] = 0; + } + + if (best_comm != old_comm) { + comm_tot[old_comm] -= degree[v]; + comm_tot[best_comm] += degree[v]; + community[v] = best_comm; + moved = true; + } + } + if (!moved) break; + } + + /* Normalize community IDs to 0..k-1 */ + int64_t* remap = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + if (!remap) { + ray_scratch_arena_reset(&arena); + return ray_error("oom", NULL); + } + for (int64_t i = 0; i < n; i++) remap[i] = -1; + int64_t next_id = 0; + for (int64_t i = 0; i < n; i++) { + int64_t c = community[i]; + if (remap[c] < 0) remap[c] = next_id++; + community[i] = remap[c]; + } + + /* Build output table */ + ray_t* node_vec = ray_vec_new(RAY_I64, n); + ray_t* comm_vec = ray_vec_new(RAY_I64, n); + if (!node_vec || RAY_IS_ERR(node_vec) || !comm_vec || RAY_IS_ERR(comm_vec)) { + ray_scratch_arena_reset(&arena); + if (node_vec && !RAY_IS_ERR(node_vec)) ray_release(node_vec); + if (comm_vec && !RAY_IS_ERR(comm_vec)) ray_release(comm_vec); + return ray_error("oom", NULL); + } + + int64_t* ndata = (int64_t*)ray_data(node_vec); + int64_t* cdata = (int64_t*)ray_data(comm_vec); + for (int64_t i = 0; i < n; i++) { + ndata[i] = i; + cdata[i] = community[i]; + } + node_vec->len = n; + comm_vec->len = n; + + ray_scratch_arena_reset(&arena); + + ray_t* result = ray_table_new(2); + if (!result || RAY_IS_ERR(result)) { + ray_release(node_vec); + ray_release(comm_vec); + return ray_error("oom", NULL); + } + result = ray_table_add_col(result, sym_intern_safe("_node", 5), node_vec); + ray_release(node_vec); + result = ray_table_add_col(result, sym_intern_safe("_community", 10), comm_vec); + ray_release(comm_vec); + + return result; +} + +/* -------------------------------------------------------------------------- + * exec_degree_cent: in/out/total degree from CSR offsets. O(n). + * -------------------------------------------------------------------------- */ +ray_t* exec_degree_cent(ray_graph_t* g, ray_op_t* op) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + ray_rel_t* rel = (ray_rel_t*)ext->graph.rel; + if (!rel) return ray_error("schema", NULL); + + int64_t n = rel->fwd.n_nodes; + if (n <= 0) return ray_error("length", NULL); + + int64_t* fwd_off = (int64_t*)ray_data(rel->fwd.offsets); + int64_t* rev_off = (int64_t*)ray_data(rel->rev.offsets); + + ray_t* node_vec = ray_vec_new(RAY_I64, n); + ray_t* in_vec = ray_vec_new(RAY_I64, n); + ray_t* out_vec = ray_vec_new(RAY_I64, n); + ray_t* deg_vec = ray_vec_new(RAY_I64, n); + if (!node_vec || RAY_IS_ERR(node_vec) || + !in_vec || RAY_IS_ERR(in_vec) || + !out_vec || RAY_IS_ERR(out_vec) || + !deg_vec || RAY_IS_ERR(deg_vec)) { + if (node_vec && !RAY_IS_ERR(node_vec)) ray_release(node_vec); + if (in_vec && !RAY_IS_ERR(in_vec)) ray_release(in_vec); + if (out_vec && !RAY_IS_ERR(out_vec)) ray_release(out_vec); + if (deg_vec && !RAY_IS_ERR(deg_vec)) ray_release(deg_vec); + return ray_error("oom", NULL); + } + + int64_t* ndata = (int64_t*)ray_data(node_vec); + int64_t* in_data = (int64_t*)ray_data(in_vec); + int64_t* out_data= (int64_t*)ray_data(out_vec); + int64_t* deg_data= (int64_t*)ray_data(deg_vec); + + for (int64_t i = 0; i < n; i++) { + ndata[i] = i; + out_data[i] = fwd_off[i + 1] - fwd_off[i]; + in_data[i] = rev_off[i + 1] - rev_off[i]; + deg_data[i] = out_data[i] + in_data[i]; + } + node_vec->len = n; + in_vec->len = n; + out_vec->len = n; + deg_vec->len = n; + + ray_t* result = ray_table_new(4); + if (!result || RAY_IS_ERR(result)) { + ray_release(node_vec); ray_release(in_vec); + ray_release(out_vec); ray_release(deg_vec); + return ray_error("oom", NULL); + } + result = ray_table_add_col(result, sym_intern_safe("_node", 5), node_vec); + ray_release(node_vec); + result = ray_table_add_col(result, sym_intern_safe("_in_degree", 10), in_vec); + ray_release(in_vec); + result = ray_table_add_col(result, sym_intern_safe("_out_degree", 11), out_vec); + ray_release(out_vec); + result = ray_table_add_col(result, sym_intern_safe("_degree", 7), deg_vec); + ray_release(deg_vec); + + return result; +} + +/* -------------------------------------------------------------------------- + * exec_topsort: topological sort via Kahn's algorithm. O(n+m). + * Returns error if graph contains a cycle. + * -------------------------------------------------------------------------- */ +ray_t* exec_topsort(ray_graph_t* g, ray_op_t* op) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + ray_rel_t* rel = (ray_rel_t*)ext->graph.rel; + if (!rel) return ray_error("schema", NULL); + + int64_t n = rel->fwd.n_nodes; + if (n <= 0) return ray_error("length", NULL); + + int64_t* fwd_off = (int64_t*)ray_data(rel->fwd.offsets); + int64_t* fwd_tgt = (int64_t*)ray_data(rel->fwd.targets); + int64_t* rev_off = (int64_t*)ray_data(rel->rev.offsets); + + ray_scratch_arena_t arena; + ray_scratch_arena_init(&arena); + + int64_t* in_deg = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + int64_t* queue = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + int64_t* order = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + if (!in_deg || !queue || !order) { + ray_scratch_arena_reset(&arena); + return ray_error("oom", NULL); + } + + /* Compute in-degrees from reverse CSR */ + for (int64_t i = 0; i < n; i++) + in_deg[i] = rev_off[i + 1] - rev_off[i]; + + /* Enqueue zero-degree nodes */ + int64_t head = 0, tail = 0; + for (int64_t i = 0; i < n; i++) { + if (in_deg[i] == 0) queue[tail++] = i; + } + + /* BFS — decrement in-degrees, enqueue new zeros */ + int64_t count = 0; + while (head < tail) { + int64_t v = queue[head++]; + order[v] = count++; + + int64_t start = fwd_off[v]; + int64_t end = fwd_off[v + 1]; + for (int64_t j = start; j < end; j++) { + int64_t u = fwd_tgt[j]; + if (--in_deg[u] == 0) queue[tail++] = u; + } + } + + /* Cycle detection: not all nodes processed */ + if (count < n) { + ray_scratch_arena_reset(&arena); + return ray_error("domain", NULL); /* cycle detected */ + } + + /* Build result */ + ray_t* node_vec = ray_vec_new(RAY_I64, n); + ray_t* order_vec = ray_vec_new(RAY_I64, n); + if (!node_vec || RAY_IS_ERR(node_vec) || !order_vec || RAY_IS_ERR(order_vec)) { + ray_scratch_arena_reset(&arena); + if (node_vec && !RAY_IS_ERR(node_vec)) ray_release(node_vec); + if (order_vec && !RAY_IS_ERR(order_vec)) ray_release(order_vec); + return ray_error("oom", NULL); + } + + int64_t* ndata = (int64_t*)ray_data(node_vec); + int64_t* odata = (int64_t*)ray_data(order_vec); + for (int64_t i = 0; i < n; i++) { + ndata[i] = i; + odata[i] = order[i]; + } + node_vec->len = n; + order_vec->len = n; + + ray_scratch_arena_reset(&arena); + + ray_t* result = ray_table_new(2); + if (!result || RAY_IS_ERR(result)) { + ray_release(node_vec); ray_release(order_vec); + return ray_error("oom", NULL); + } + result = ray_table_add_col(result, sym_intern_safe("_node", 5), node_vec); + ray_release(node_vec); + result = ray_table_add_col(result, sym_intern_safe("_order", 6), order_vec); + ray_release(order_vec); + + return result; +} + +/* -------------------------------------------------------------------------- + * exec_cluster_coeff: clustering coefficient via triangle counting. O(n*d^2). + * For each node v, count triangles among undirected neighbors using bitset. + * -------------------------------------------------------------------------- */ +ray_t* exec_cluster_coeff(ray_graph_t* g, ray_op_t* op) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + ray_rel_t* rel = (ray_rel_t*)ext->graph.rel; + if (!rel) return ray_error("schema", NULL); + + int64_t n = rel->fwd.n_nodes; + if (n <= 0) return ray_error("length", NULL); + + ray_scratch_arena_t arena; + ray_scratch_arena_init(&arena); + + /* Scratch: merged neighbor list per node (max possible size = n) */ + int64_t* nbrs = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + /* Scratch: quick-lookup set for neighbor checking */ + uint8_t* in_nbr = (uint8_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(uint8_t)); + if (!nbrs || !in_nbr) { + ray_scratch_arena_reset(&arena); + return ray_error("oom", NULL); + } + memset(in_nbr, 0, (size_t)n * sizeof(uint8_t)); + + int64_t* fwd_off = (int64_t*)ray_data(rel->fwd.offsets); + int64_t* fwd_tgt = (int64_t*)ray_data(rel->fwd.targets); + int64_t* rev_off = (int64_t*)ray_data(rel->rev.offsets); + int64_t* rev_tgt = (int64_t*)ray_data(rel->rev.targets); + + /* Allocate result vectors */ + ray_t* node_vec = ray_vec_new(RAY_I64, n); + ray_t* lcc_vec = ray_vec_new(RAY_F64, n); + if (!node_vec || RAY_IS_ERR(node_vec) || !lcc_vec || RAY_IS_ERR(lcc_vec)) { + ray_scratch_arena_reset(&arena); + if (node_vec && !RAY_IS_ERR(node_vec)) ray_release(node_vec); + if (lcc_vec && !RAY_IS_ERR(lcc_vec)) ray_release(lcc_vec); + return ray_error("oom", NULL); + } + + int64_t* ndata = (int64_t*)ray_data(node_vec); + double* ldata = (double*)ray_data(lcc_vec); + + for (int64_t v = 0; v < n; v++) { + ndata[v] = v; + + /* Merge forward and reverse neighbors into deduplicated list */ + int64_t deg = 0; + for (int64_t j = fwd_off[v]; j < fwd_off[v + 1]; j++) { + int64_t u = fwd_tgt[j]; + if (u >= 0 && u < n && !in_nbr[u]) { + in_nbr[u] = 1; + nbrs[deg++] = u; + } + } + for (int64_t j = rev_off[v]; j < rev_off[v + 1]; j++) { + int64_t u = rev_tgt[j]; + if (u >= 0 && u < n && !in_nbr[u]) { + in_nbr[u] = 1; + nbrs[deg++] = u; + } + } + + if (deg < 2) { + ldata[v] = 0.0; + } else { + /* Count directed fwd edges between neighbors of v */ + int64_t triangles = 0; + for (int64_t i = 0; i < deg; i++) { + int64_t u = nbrs[i]; + /* Check fwd edges of u against neighbor set */ + for (int64_t j = fwd_off[u]; j < fwd_off[u + 1]; j++) { + if (in_nbr[fwd_tgt[j]]) triangles++; + } + } + ldata[v] = (double)triangles / ((double)deg * (double)(deg - 1)); + } + + /* Reset in_nbr for next node */ + for (int64_t i = 0; i < deg; i++) in_nbr[nbrs[i]] = 0; + } + + node_vec->len = n; + lcc_vec->len = n; + + ray_scratch_arena_reset(&arena); + + ray_t* result = ray_table_new(2); + if (!result || RAY_IS_ERR(result)) { + ray_release(node_vec); + ray_release(lcc_vec); + return ray_error("oom", NULL); + } + result = ray_table_add_col(result, sym_intern_safe("_node", 5), node_vec); + ray_release(node_vec); + result = ray_table_add_col(result, sym_intern_safe("_coefficient", 12), lcc_vec); + ray_release(lcc_vec); + + return result; +} + +/* -------------------------------------------------------------------------- + * exec_betweenness: Brandes betweenness centrality. O(n*m) exact, + * O(sample*m) approximate when sample_size > 0. + * -------------------------------------------------------------------------- */ +ray_t* exec_betweenness(ray_graph_t* g, ray_op_t* op) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + ray_rel_t* rel = (ray_rel_t*)ext->graph.rel; + if (!rel) return ray_error("schema", NULL); + + int64_t n = rel->fwd.n_nodes; + if (n <= 0) return ray_error("length", NULL); + uint16_t sample = ext->graph.max_iter; + int64_t n_sources = (sample > 0 && (int64_t)sample < n) ? (int64_t)sample : n; + + int64_t* fwd_off = (int64_t*)ray_data(rel->fwd.offsets); + int64_t* fwd_tgt = (int64_t*)ray_data(rel->fwd.targets); + int64_t* rev_off = (int64_t*)ray_data(rel->rev.offsets); + int64_t* rev_tgt = (int64_t*)ray_data(rel->rev.targets); + + ray_scratch_arena_t arena; + ray_scratch_arena_init(&arena); + + double* cb = (double*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(double)); + double* sigma = (double*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(double)); + double* delta = (double*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(double)); + int64_t* dist = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + int64_t* queue = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + int64_t* stack = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + + /* Predecessor storage: flat CSR-style array with per-node offsets. + * Two-pass approach: BFS counts predecessors per node, prefix-sum builds + * offsets, then a second pass over the stack fills pred_data in grouped order. */ + int64_t m_total = rel->fwd.n_edges + rel->rev.n_edges; + if (m_total == 0) m_total = 1; + int64_t* pred_data = (int64_t*)ray_scratch_arena_push(&arena, (size_t)m_total * sizeof(int64_t)); + int64_t* pred_off = (int64_t*)ray_scratch_arena_push(&arena, (size_t)(n + 1) * sizeof(int64_t)); + int64_t* pred_cursor = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + /* Per-v dedup marker: tracks which neighbors were already counted via fwd edges + * to avoid double-counting sigma/predecessors for bidirectional edges. */ + int64_t* seen_epoch = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + + if (!cb || !sigma || !delta || !dist || !queue || !stack || + !pred_data || !pred_off || !pred_cursor || !seen_epoch) { + ray_scratch_arena_reset(&arena); + return ray_error("oom", NULL); + } + + memset(cb, 0, (size_t)n * sizeof(double)); + + int64_t stride = (sample > 0 && (int64_t)sample < n) ? (n / n_sources) : 1; + + for (int64_t si = 0; si < n_sources; si++) { + int64_t s = (si * stride) % n; + + /* Initialize */ + for (int64_t i = 0; i < n; i++) { + sigma[i] = 0.0; + delta[i] = 0.0; + dist[i] = -1; + } + sigma[s] = 1.0; + dist[s] = 0; + memset(pred_off, 0, (size_t)(n + 1) * sizeof(int64_t)); + memset(seen_epoch, 0, (size_t)n * sizeof(int64_t)); + + /* BFS pass 1: discover nodes, compute sigma, count predecessors */ + int64_t q_head = 0, q_tail = 0; + int64_t stack_top = 0; + queue[q_tail++] = s; + + /* Use epoch counter to deduplicate: for each v popped from queue, + * mark forward neighbors with epoch, then skip reverse neighbors + * already marked (bidirectional edges). Epoch increments per v. */ + int64_t epoch = 0; + while (q_head < q_tail) { + int64_t v = queue[q_head++]; + stack[stack_top++] = v; + epoch++; + + /* Forward neighbors */ + for (int64_t j = fwd_off[v]; j < fwd_off[v + 1]; j++) { + int64_t w = fwd_tgt[j]; + if (dist[w] < 0) { + dist[w] = dist[v] + 1; + queue[q_tail++] = w; + } + if (dist[w] == dist[v] + 1) { + sigma[w] += sigma[v]; + pred_off[w + 1]++; + seen_epoch[w] = epoch; /* mark w as counted for this v */ + } + } + /* Reverse neighbors (undirected), skip if already counted via fwd */ + for (int64_t j = rev_off[v]; j < rev_off[v + 1]; j++) { + int64_t w = rev_tgt[j]; + if (dist[w] < 0) { + dist[w] = dist[v] + 1; + queue[q_tail++] = w; + } + if (dist[w] == dist[v] + 1 && seen_epoch[w] != epoch) { + sigma[w] += sigma[v]; + pred_off[w + 1]++; + } + } + } + + /* Convert pred_off counts to cumulative offsets */ + for (int64_t i = 1; i <= n; i++) + pred_off[i] += pred_off[i - 1]; + + /* BFS pass 2: fill pred_data grouped by target node using write cursors. + * Same dedup logic as pass 1 to avoid duplicate predecessor entries. */ + for (int64_t i = 0; i < n; i++) pred_cursor[i] = pred_off[i]; + epoch = 0; + for (int64_t si2 = 0; si2 < stack_top; si2++) { + int64_t v = stack[si2]; + epoch++; + for (int64_t j = fwd_off[v]; j < fwd_off[v + 1]; j++) { + int64_t w = fwd_tgt[j]; + if (dist[w] == dist[v] + 1) { + pred_data[pred_cursor[w]++] = v; + seen_epoch[w] = epoch; + } + } + for (int64_t j = rev_off[v]; j < rev_off[v + 1]; j++) { + int64_t w = rev_tgt[j]; + if (dist[w] == dist[v] + 1 && seen_epoch[w] != epoch) + pred_data[pred_cursor[w]++] = v; + } + } + + /* Back-propagation of dependencies */ + while (stack_top > 0) { + int64_t w = stack[--stack_top]; + for (int64_t pi = pred_off[w]; pi < pred_off[w + 1]; pi++) { + int64_t v = pred_data[pi]; + delta[v] += (sigma[v] / sigma[w]) * (1.0 + delta[w]); + } + if (w != s) cb[w] += delta[w]; + } + } + + /* Undirected normalization: BFS from each source counts every unordered + * pair {s,t} twice (once as source=s, once as source=t), so halve. */ + for (int64_t i = 0; i < n; i++) cb[i] /= 2.0; + + /* Normalize if sampled */ + if (sample > 0 && (int64_t)sample < n) { + double scale = (double)n / (double)sample; + for (int64_t i = 0; i < n; i++) cb[i] *= scale; + } + + /* Build result table */ + ray_t* node_vec = ray_vec_new(RAY_I64, n); + ray_t* cent_vec = ray_vec_new(RAY_F64, n); + if (!node_vec || RAY_IS_ERR(node_vec) || !cent_vec || RAY_IS_ERR(cent_vec)) { + ray_scratch_arena_reset(&arena); + if (node_vec && !RAY_IS_ERR(node_vec)) ray_release(node_vec); + if (cent_vec && !RAY_IS_ERR(cent_vec)) ray_release(cent_vec); + return ray_error("oom", NULL); + } + int64_t* ndata = (int64_t*)ray_data(node_vec); + double* cdata = (double*)ray_data(cent_vec); + for (int64_t i = 0; i < n; i++) { ndata[i] = i; cdata[i] = cb[i]; } + node_vec->len = n; + cent_vec->len = n; + ray_scratch_arena_reset(&arena); + + ray_t* result = ray_table_new(2); + if (!result || RAY_IS_ERR(result)) { + ray_release(node_vec); ray_release(cent_vec); + return ray_error("oom", NULL); + } + ray_t* tmp = ray_table_add_col(result, sym_intern_safe("_node", 5), node_vec); + ray_release(node_vec); + if (!tmp || RAY_IS_ERR(tmp)) { ray_release(result); ray_release(cent_vec); return ray_error("oom", NULL); } + result = tmp; + tmp = ray_table_add_col(result, sym_intern_safe("_centrality", 11), cent_vec); + ray_release(cent_vec); + if (!tmp || RAY_IS_ERR(tmp)) { ray_release(result); return ray_error("oom", NULL); } + result = tmp; + return result; +} + +/* -------------------------------------------------------------------------- + * exec_closeness: closeness centrality via BFS distance sums. + * closeness[v] = reachable / sum_dist[v]. O(n*m) exact, + * O(sample*m) approximate when sample_size > 0. + * -------------------------------------------------------------------------- */ +ray_t* exec_closeness(ray_graph_t* g, ray_op_t* op) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + ray_rel_t* rel = (ray_rel_t*)ext->graph.rel; + if (!rel) return ray_error("schema", NULL); + + int64_t n = rel->fwd.n_nodes; + if (n <= 0) return ray_error("length", NULL); + uint16_t sample = ext->graph.max_iter; + int64_t n_sources = (sample > 0 && (int64_t)sample < n) ? (int64_t)sample : n; + + int64_t* fwd_off = (int64_t*)ray_data(rel->fwd.offsets); + int64_t* fwd_tgt = (int64_t*)ray_data(rel->fwd.targets); + int64_t* rev_off = (int64_t*)ray_data(rel->rev.offsets); + int64_t* rev_tgt = (int64_t*)ray_data(rel->rev.targets); + + ray_scratch_arena_t arena; + ray_scratch_arena_init(&arena); + + double* closeness = (double*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(double)); + int64_t* dist = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + int64_t* queue = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + + if (!closeness || !dist || !queue) { + ray_scratch_arena_reset(&arena); + return ray_error("oom", NULL); + } + + memset(closeness, 0, (size_t)n * sizeof(double)); + + int64_t stride = (sample > 0 && (int64_t)sample < n) ? (n / n_sources) : 1; + + for (int64_t si = 0; si < n_sources; si++) { + int64_t s = (si * stride) % n; + + /* Initialize distances */ + for (int64_t i = 0; i < n; i++) dist[i] = -1; + dist[s] = 0; + + /* BFS from s */ + int64_t q_head = 0, q_tail = 0; + queue[q_tail++] = s; + + while (q_head < q_tail) { + int64_t v = queue[q_head++]; + + /* Forward neighbors */ + for (int64_t j = fwd_off[v]; j < fwd_off[v + 1]; j++) { + int64_t w = fwd_tgt[j]; + if (dist[w] < 0) { + dist[w] = dist[v] + 1; + queue[q_tail++] = w; + } + } + /* Reverse neighbors (undirected) */ + for (int64_t j = rev_off[v]; j < rev_off[v + 1]; j++) { + int64_t w = rev_tgt[j]; + if (dist[w] < 0) { + dist[w] = dist[v] + 1; + queue[q_tail++] = w; + } + } + } + + /* Sum distances and count reachable nodes */ + int64_t sum_dist = 0; + int64_t reachable = 0; + for (int64_t i = 0; i < n; i++) { + if (dist[i] > 0) { + sum_dist += dist[i]; + reachable++; + } + } + + if (reachable > 0 && sum_dist > 0) { + closeness[s] = (double)reachable / (double)sum_dist; + } + } + + /* Build result table: when sampling, only emit computed nodes */ + int64_t n_out = n_sources; + ray_t* node_vec = ray_vec_new(RAY_I64, n_out); + ray_t* cent_vec = ray_vec_new(RAY_F64, n_out); + if (!node_vec || RAY_IS_ERR(node_vec) || !cent_vec || RAY_IS_ERR(cent_vec)) { + ray_scratch_arena_reset(&arena); + if (node_vec && !RAY_IS_ERR(node_vec)) ray_release(node_vec); + if (cent_vec && !RAY_IS_ERR(cent_vec)) ray_release(cent_vec); + return ray_error("oom", NULL); + } + int64_t* ndata = (int64_t*)ray_data(node_vec); + double* cdata = (double*)ray_data(cent_vec); + if (n_sources == n) { + for (int64_t i = 0; i < n; i++) { ndata[i] = i; cdata[i] = closeness[i]; } + } else { + for (int64_t si = 0; si < n_sources; si++) { + int64_t s = (si * stride) % n; + ndata[si] = s; + cdata[si] = closeness[s]; + } + } + node_vec->len = n_out; + cent_vec->len = n_out; + ray_scratch_arena_reset(&arena); + + ray_t* result = ray_table_new(2); + if (!result || RAY_IS_ERR(result)) { + ray_release(node_vec); ray_release(cent_vec); + return ray_error("oom", NULL); + } + ray_t* tmp = ray_table_add_col(result, sym_intern_safe("_node", 5), node_vec); + ray_release(node_vec); + if (!tmp || RAY_IS_ERR(tmp)) { ray_release(result); ray_release(cent_vec); return ray_error("oom", NULL); } + result = tmp; + tmp = ray_table_add_col(result, sym_intern_safe("_centrality", 11), cent_vec); + ray_release(cent_vec); + if (!tmp || RAY_IS_ERR(tmp)) { ray_release(result); return ray_error("oom", NULL); } + result = tmp; + return result; +} + +/* -------------------------------------------------------------------------- + * exec_mst: Minimum Spanning Tree / Forest via Kruskal's algorithm. + * Collects weighted edges from forward CSR, sorts by weight, builds MST + * using union-find with path compression and union by rank. + * -------------------------------------------------------------------------- */ +typedef struct { double w; int64_t src; int64_t dst; } mst_edge_t; + +static int mst_edge_cmp(const void* a, const void* b) { + double da = ((const mst_edge_t*)a)->w; + double db = ((const mst_edge_t*)b)->w; + return (da > db) - (da < db); +} + +static int64_t uf_find(int64_t* parent, int64_t x) { + while (parent[x] != x) { parent[x] = parent[parent[x]]; x = parent[x]; } + return x; +} + +static bool uf_union(int64_t* parent, int64_t* rank_arr, int64_t a, int64_t b) { + a = uf_find(parent, a); b = uf_find(parent, b); + if (a == b) return false; + if (rank_arr[a] < rank_arr[b]) { int64_t tmp = a; a = b; b = tmp; } + parent[b] = a; + if (rank_arr[a] == rank_arr[b]) rank_arr[a]++; + return true; +} + +ray_t* exec_mst(ray_graph_t* g, ray_op_t* op) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + ray_rel_t* rel = (ray_rel_t*)ext->graph.rel; + if (!rel || !rel->fwd.props) return ray_error("schema", NULL); + + int64_t n = rel->fwd.n_nodes; + int64_t m = rel->fwd.n_edges; + if (n <= 0) return ray_error("length", NULL); + + int64_t weight_sym = ext->graph.weight_col_sym; + ray_t* weight_vec = ray_table_get_col(rel->fwd.props, weight_sym); + if (!weight_vec || weight_vec->type != RAY_F64) return ray_error("schema", NULL); + double* weights = (double*)ray_data(weight_vec); + + int64_t* fwd_off = (int64_t*)ray_data(rel->fwd.offsets); + int64_t* fwd_tgt = (int64_t*)ray_data(rel->fwd.targets); + int64_t* fwd_row = (int64_t*)ray_data(rel->fwd.rowmap); + + ray_scratch_arena_t arena; + ray_scratch_arena_init(&arena); + + mst_edge_t* edges_arr = (mst_edge_t*)ray_scratch_arena_push(&arena, + (size_t)m * sizeof(mst_edge_t)); + int64_t* uf_parent = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + int64_t* uf_rank = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + if (!edges_arr || !uf_parent || !uf_rank) { + ray_scratch_arena_reset(&arena); + return ray_error("oom", NULL); + } + + /* Fill edge array from forward CSR */ + int64_t ei = 0; + for (int64_t u = 0; u < n; u++) { + for (int64_t j = fwd_off[u]; j < fwd_off[u + 1]; j++) { + edges_arr[ei].src = u; + edges_arr[ei].dst = fwd_tgt[j]; + edges_arr[ei].w = weights[fwd_row[j]]; + ei++; + } + } + + /* Sort edges by weight */ + qsort(edges_arr, (size_t)ei, sizeof(mst_edge_t), mst_edge_cmp); + + /* Initialize union-find */ + for (int64_t i = 0; i < n; i++) { uf_parent[i] = i; uf_rank[i] = 0; } + + /* Build MST */ + int64_t max_mst = n - 1; + int64_t mst_count = 0; + ray_t* src_vec = ray_vec_new(RAY_I64, max_mst); + ray_t* dst_vec = ray_vec_new(RAY_I64, max_mst); + ray_t* wt_vec = ray_vec_new(RAY_F64, max_mst); + if (!src_vec || RAY_IS_ERR(src_vec) || + !dst_vec || RAY_IS_ERR(dst_vec) || + !wt_vec || RAY_IS_ERR(wt_vec)) { + ray_scratch_arena_reset(&arena); + if (src_vec && !RAY_IS_ERR(src_vec)) ray_release(src_vec); + if (dst_vec && !RAY_IS_ERR(dst_vec)) ray_release(dst_vec); + if (wt_vec && !RAY_IS_ERR(wt_vec)) ray_release(wt_vec); + return ray_error("oom", NULL); + } + + int64_t* sdata = (int64_t*)ray_data(src_vec); + int64_t* ddata = (int64_t*)ray_data(dst_vec); + double* wdata = (double*)ray_data(wt_vec); + + for (int64_t i = 0; i < ei && mst_count < max_mst; i++) { + if (uf_union(uf_parent, uf_rank, edges_arr[i].src, edges_arr[i].dst)) { + sdata[mst_count] = edges_arr[i].src; + ddata[mst_count] = edges_arr[i].dst; + wdata[mst_count] = edges_arr[i].w; + mst_count++; + } + } + + src_vec->len = mst_count; + dst_vec->len = mst_count; + wt_vec->len = mst_count; + + ray_scratch_arena_reset(&arena); + + ray_t* result = ray_table_new(3); + if (!result || RAY_IS_ERR(result)) { + ray_release(src_vec); ray_release(dst_vec); ray_release(wt_vec); + return ray_error("oom", NULL); + } + result = ray_table_add_col(result, sym_intern_safe("_src", 4), src_vec); + ray_release(src_vec); + result = ray_table_add_col(result, sym_intern_safe("_dst", 4), dst_vec); + ray_release(dst_vec); + result = ray_table_add_col(result, sym_intern_safe("_weight", 7), wt_vec); + ray_release(wt_vec); + return result; +} + +/* -------------------------------------------------------------------------- + * exec_random_walk: random walk from source node using xorshift64 PRNG. + * -------------------------------------------------------------------------- */ +ray_t* exec_random_walk(ray_graph_t* g, ray_op_t* op, ray_t* src_val) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + ray_rel_t* rel = (ray_rel_t*)ext->graph.rel; + if (!rel) return ray_error("schema", NULL); + + int64_t n = rel->fwd.n_nodes; + uint16_t walk_len = ext->graph.max_iter; + if (n <= 0) return ray_error("length", NULL); + + int64_t start_node; + if (ray_is_atom(src_val)) { + start_node = src_val->i64; + } else { + start_node = ((int64_t*)ray_data(src_val))[0]; + } + if (start_node < 0 || start_node >= n) return ray_error("range", NULL); + + int64_t* fwd_off = (int64_t*)ray_data(rel->fwd.offsets); + int64_t* fwd_tgt = (int64_t*)ray_data(rel->fwd.targets); + + int64_t total = (int64_t)walk_len + 1; + ray_t* step_vec = ray_vec_new(RAY_I64, total); + ray_t* node_vec = ray_vec_new(RAY_I64, total); + if (!step_vec || RAY_IS_ERR(step_vec) || !node_vec || RAY_IS_ERR(node_vec)) { + if (step_vec && !RAY_IS_ERR(step_vec)) ray_release(step_vec); + if (node_vec && !RAY_IS_ERR(node_vec)) ray_release(node_vec); + return ray_error("oom", NULL); + } + + int64_t* sdata = (int64_t*)ray_data(step_vec); + int64_t* ndata = (int64_t*)ray_data(node_vec); + + /* xorshift64 PRNG seeded from source node */ + uint64_t rng = (uint64_t)start_node * 6364136223846793005ULL + 1442695040888963407ULL; + if (rng == 0) rng = 1; + + int64_t current = start_node; + int64_t count = 0; + for (int64_t i = 0; i < total; i++) { + sdata[i] = i; + ndata[i] = current; + count++; + if (i < walk_len) { + int64_t deg = fwd_off[current + 1] - fwd_off[current]; + if (deg == 0) break; /* dead end */ + rng ^= rng << 13; rng ^= rng >> 7; rng ^= rng << 17; + int64_t pick = (int64_t)(rng % (uint64_t)deg); + current = fwd_tgt[fwd_off[current] + pick]; + } + } + + step_vec->len = count; + node_vec->len = count; + + ray_t* result = ray_table_new(2); + if (!result || RAY_IS_ERR(result)) { + ray_release(step_vec); ray_release(node_vec); + return ray_error("oom", NULL); + } + result = ray_table_add_col(result, sym_intern_safe("_step", 5), step_vec); + ray_release(step_vec); + result = ray_table_add_col(result, sym_intern_safe("_node", 5), node_vec); + ray_release(node_vec); + return result; +} + +/* -------------------------------------------------------------------------- + * exec_dfs: depth-first search from source node. O(n+m). + * -------------------------------------------------------------------------- */ +ray_t* exec_dfs(ray_graph_t* g, ray_op_t* op, ray_t* src_val) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + ray_rel_t* rel = (ray_rel_t*)ext->graph.rel; + if (!rel) return ray_error("schema", NULL); + + int64_t n = rel->fwd.n_nodes; + uint8_t max_depth = ext->graph.max_depth; + if (n <= 0) return ray_error("length", NULL); + + /* Get source node ID */ + int64_t start_node; + if (ray_is_atom(src_val)) { + start_node = src_val->i64; + } else { + start_node = ((int64_t*)ray_data(src_val))[0]; + } + if (start_node < 0 || start_node >= n) return ray_error("range", NULL); + + int64_t* fwd_off = (int64_t*)ray_data(rel->fwd.offsets); + int64_t* fwd_tgt = (int64_t*)ray_data(rel->fwd.targets); + + ray_scratch_arena_t arena; + ray_scratch_arena_init(&arena); + + /* Stack can hold up to m entries (one per edge traversal) */ + int64_t m = rel->fwd.n_edges; + int64_t stack_cap = m > n ? m + 1 : n + 1; + + int64_t* stack_node = (int64_t*)ray_scratch_arena_push(&arena, (size_t)stack_cap * sizeof(int64_t)); + int64_t* stack_depth = (int64_t*)ray_scratch_arena_push(&arena, (size_t)stack_cap * sizeof(int64_t)); + int64_t* stack_parent = (int64_t*)ray_scratch_arena_push(&arena, (size_t)stack_cap * sizeof(int64_t)); + uint8_t* visited = (uint8_t*)ray_scratch_arena_push(&arena, (size_t)n); + int64_t* res_node = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + int64_t* res_depth = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + int64_t* res_parent = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + if (!stack_node || !stack_depth || !stack_parent || !visited || + !res_node || !res_depth || !res_parent) { + ray_scratch_arena_reset(&arena); + return ray_error("oom", NULL); + } + + memset(visited, 0, (size_t)n); + + /* Push source */ + int64_t sp = 0; + stack_node[sp] = start_node; + stack_depth[sp] = 0; + stack_parent[sp] = -1; + sp++; + + int64_t count = 0; + + while (sp > 0) { + sp--; + int64_t v = stack_node[sp]; + int64_t d = stack_depth[sp]; + int64_t p = stack_parent[sp]; + + if (visited[v]) continue; + visited[v] = 1; + + res_node[count] = v; + res_depth[count] = d; + res_parent[count] = p; + count++; + + if (d < max_depth) { + /* Push neighbors in reverse order so first neighbor is visited first */ + int64_t start = fwd_off[v]; + int64_t end = fwd_off[v + 1]; + for (int64_t j = end - 1; j >= start; j--) { + int64_t u = fwd_tgt[j]; + if (!visited[u]) { + stack_node[sp] = u; + stack_depth[sp] = d + 1; + stack_parent[sp] = v; + sp++; + } + } + } + } + + /* Build result vectors */ + ray_t* node_vec = ray_vec_new(RAY_I64, count); + ray_t* depth_vec = ray_vec_new(RAY_I64, count); + ray_t* parent_vec = ray_vec_new(RAY_I64, count); + if (!node_vec || RAY_IS_ERR(node_vec) || + !depth_vec || RAY_IS_ERR(depth_vec) || + !parent_vec || RAY_IS_ERR(parent_vec)) { + ray_scratch_arena_reset(&arena); + if (node_vec && !RAY_IS_ERR(node_vec)) ray_release(node_vec); + if (depth_vec && !RAY_IS_ERR(depth_vec)) ray_release(depth_vec); + if (parent_vec && !RAY_IS_ERR(parent_vec)) ray_release(parent_vec); + return ray_error("oom", NULL); + } + + memcpy(ray_data(node_vec), res_node, (size_t)count * sizeof(int64_t)); + memcpy(ray_data(depth_vec), res_depth, (size_t)count * sizeof(int64_t)); + memcpy(ray_data(parent_vec), res_parent, (size_t)count * sizeof(int64_t)); + node_vec->len = count; + depth_vec->len = count; + parent_vec->len = count; + + ray_scratch_arena_reset(&arena); + + ray_t* result = ray_table_new(3); + if (!result || RAY_IS_ERR(result)) { + ray_release(node_vec); ray_release(depth_vec); ray_release(parent_vec); + return ray_error("oom", NULL); + } + result = ray_table_add_col(result, sym_intern_safe("_node", 5), node_vec); + ray_release(node_vec); + result = ray_table_add_col(result, sym_intern_safe("_depth", 6), depth_vec); + ray_release(depth_vec); + result = ray_table_add_col(result, sym_intern_safe("_parent", 7), parent_vec); + ray_release(parent_vec); + + return result; +} + +/* exec_astar: A* shortest path with Euclidean coordinate heuristic */ +ray_t* exec_astar(ray_graph_t* g, ray_op_t* op, + ray_t* src_val, ray_t* dst_val) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + ray_rel_t* rel = (ray_rel_t*)ext->graph.rel; + if (!rel) return ray_error("schema", NULL); + if (!rel->fwd.props) return ray_error("schema", NULL); + + ray_t* np = (ray_t*)ext->graph.node_props; + if (!np) return ray_error("schema", NULL); + + int64_t n = rel->fwd.n_nodes; + int64_t m = rel->fwd.n_edges; + int64_t src_id = src_val->i64; + int64_t dst_id = dst_val->i64; + + if (src_id < 0 || src_id >= n) return ray_error("range", NULL); + if (dst_id < 0 || dst_id >= n) return ray_error("range", NULL); + + /* Resolve weight column from edge properties */ + int64_t weight_sym = ext->graph.weight_col_sym; + ray_t* weight_vec = ray_table_get_col(rel->fwd.props, weight_sym); + if (!weight_vec || RAY_IS_ERR(weight_vec)) return ray_error("schema", NULL); + double* weights_arr = (double*)ray_data(weight_vec); + + /* Resolve coordinate columns from node properties */ + ray_t* lat_vec = ray_table_get_col(np, ext->graph.coord_col_syms[0]); + ray_t* lon_vec = ray_table_get_col(np, ext->graph.coord_col_syms[1]); + if (!lat_vec || !lon_vec) return ray_error("schema", NULL); + double* lat = (double*)ray_data(lat_vec); + double* lon = (double*)ray_data(lon_vec); + + int64_t heap_cap = (m > n ? m : n) + 1; + + ray_scratch_arena_t arena; + ray_scratch_arena_init(&arena); + + double* dist_a = (double*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(double)); + bool* visited = (bool*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(bool)); + int64_t* depth_a = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + dijk_entry_t* heap = (dijk_entry_t*)ray_scratch_arena_push(&arena, + (size_t)heap_cap * sizeof(dijk_entry_t)); + if (!dist_a || !visited || !depth_a || !heap) { + ray_scratch_arena_reset(&arena); + return ray_error("oom", NULL); + } + memset(visited, 0, (size_t)n * sizeof(bool)); + memset(depth_a, 0, (size_t)n * sizeof(int64_t)); + + for (int64_t i = 0; i < n; i++) dist_a[i] = 1e308; + dist_a[src_id] = 0.0; + + /* A* uses f = g + h; heap stores f-cost for priority ordering */ + double dx = lat[src_id] - lat[dst_id]; + double dy = lon[src_id] - lon[dst_id]; + double h0 = sqrt(dx * dx + dy * dy); + int64_t heap_size = 0; + dijk_heap_push(heap, &heap_size, h0, src_id); + + int64_t* fwd_off = (int64_t*)ray_data(rel->fwd.offsets); + int64_t* fwd_tgt = (int64_t*)ray_data(rel->fwd.targets); + int64_t* fwd_row = (int64_t*)ray_data(rel->fwd.rowmap); + + while (heap_size > 0) { + dijk_entry_t top = dijk_heap_pop(heap, &heap_size); + int64_t u = top.node; + if (visited[u]) continue; + visited[u] = true; + + if (u == dst_id) break; + + for (int64_t j = fwd_off[u]; j < fwd_off[u + 1]; j++) { + int64_t v = fwd_tgt[j]; + int64_t edge_row = fwd_row[j]; + double w = weights_arr[edge_row]; + double new_dist = dist_a[u] + w; + if (new_dist < dist_a[v]) { + dist_a[v] = new_dist; + depth_a[v] = depth_a[u] + 1; + /* f = g + h (Euclidean heuristic) */ + double hdx = lat[v] - lat[dst_id]; + double hdy = lon[v] - lon[dst_id]; + double hv = sqrt(hdx * hdx + hdy * hdy); + dijk_heap_push(heap, &heap_size, new_dist + hv, v); + } + } + } + + /* Collect reachable nodes */ + int64_t acount = 0; + for (int64_t i = 0; i < n; i++) { + if (dist_a[i] < 1e308) acount++; + } + + ray_t* node_vec = ray_vec_new(RAY_I64, acount); + ray_t* dist_vec = ray_vec_new(RAY_F64, acount); + ray_t* depth_vec = ray_vec_new(RAY_I64, acount); + if (!node_vec || RAY_IS_ERR(node_vec) || + !dist_vec || RAY_IS_ERR(dist_vec) || + !depth_vec || RAY_IS_ERR(depth_vec)) { + ray_scratch_arena_reset(&arena); + if (node_vec && !RAY_IS_ERR(node_vec)) ray_release(node_vec); + if (dist_vec && !RAY_IS_ERR(dist_vec)) ray_release(dist_vec); + if (depth_vec && !RAY_IS_ERR(depth_vec)) ray_release(depth_vec); + return ray_error("oom", NULL); + } + + int64_t* ndata_a = (int64_t*)ray_data(node_vec); + double* ddata_a = (double*)ray_data(dist_vec); + int64_t* hdata_a = (int64_t*)ray_data(depth_vec); + int64_t idx = 0; + for (int64_t i = 0; i < n; i++) { + if (dist_a[i] < 1e308) { + ndata_a[idx] = i; + ddata_a[idx] = dist_a[i]; + hdata_a[idx] = depth_a[i]; + idx++; + } + } + node_vec->len = acount; + dist_vec->len = acount; + depth_vec->len = acount; + + ray_scratch_arena_reset(&arena); + + ray_t* result = ray_table_new(3); + if (!result || RAY_IS_ERR(result)) { + ray_release(node_vec); + ray_release(dist_vec); + ray_release(depth_vec); + return ray_error("oom", NULL); + } + result = ray_table_add_col(result, sym_intern_safe("_node", 5), node_vec); + ray_release(node_vec); + result = ray_table_add_col(result, sym_intern_safe("_dist", 5), dist_vec); + ray_release(dist_vec); + result = ray_table_add_col(result, sym_intern_safe("_depth", 6), depth_vec); + ray_release(depth_vec); + + return result; +} + +/* exec_k_shortest: Yen's k-shortest paths via iterative masked Dijkstra */ +ray_t* exec_k_shortest(ray_graph_t* g, ray_op_t* op, + ray_t* src_val, ray_t* dst_val) { + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + ray_rel_t* rel = (ray_rel_t*)ext->graph.rel; + if (!rel || !rel->fwd.props) return ray_error("schema", NULL); + + int64_t n = rel->fwd.n_nodes; + int64_t m = rel->fwd.n_edges; + int64_t src_id = src_val->i64; + int64_t dst_id = dst_val->i64; + uint16_t K = ext->graph.max_iter; + + if (src_id < 0 || src_id >= n || dst_id < 0 || dst_id >= n) + return ray_error("range", NULL); + + int64_t weight_sym = ext->graph.weight_col_sym; + ray_t* weight_vec = ray_table_get_col(rel->fwd.props, weight_sym); + if (!weight_vec || RAY_IS_ERR(weight_vec)) return ray_error("schema", NULL); + double* weights_k = (double*)ray_data(weight_vec); + + int64_t* fwd_off = (int64_t*)ray_data(rel->fwd.offsets); + int64_t* fwd_tgt = (int64_t*)ray_data(rel->fwd.targets); + int64_t* fwd_row = (int64_t*)ray_data(rel->fwd.rowmap); + + int64_t heap_cap = (m > n ? m : n) + 1; + + ray_scratch_arena_t arena; + ray_scratch_arena_init(&arena); + + /* Dijkstra working arrays */ + double* dist_arr = (double*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(double)); + int64_t* parent = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + bool* vis = (bool*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(bool)); + dijk_entry_t* heap = (dijk_entry_t*)ray_scratch_arena_push(&arena, + (size_t)heap_cap * sizeof(dijk_entry_t)); + bool* node_mask = (bool*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(bool)); + bool* edge_mask = (bool*)ray_scratch_arena_push(&arena, (size_t)m * sizeof(bool)); + + /* Path storage: K paths, each up to n nodes */ + int64_t* paths_data = (int64_t*)ray_scratch_arena_push(&arena, (size_t)K * (size_t)n * sizeof(int64_t)); + int64_t* path_lens = (int64_t*)ray_scratch_arena_push(&arena, (size_t)K * sizeof(int64_t)); + double* path_costs = (double*)ray_scratch_arena_push(&arena, (size_t)K * sizeof(double)); + + /* Candidate storage */ + int64_t max_cand = (int64_t)K * n; + if (max_cand > 4096) max_cand = 4096; + int64_t* cand_data = (int64_t*)ray_scratch_arena_push(&arena, (size_t)max_cand * (size_t)n * sizeof(int64_t)); + int64_t* cand_lens = (int64_t*)ray_scratch_arena_push(&arena, (size_t)max_cand * sizeof(int64_t)); + double* cand_costs = (double*)ray_scratch_arena_push(&arena, (size_t)max_cand * sizeof(double)); + + /* Temp buffer for path reconstruction */ + int64_t* tmp_path = (int64_t*)ray_scratch_arena_push(&arena, (size_t)n * sizeof(int64_t)); + + if (!dist_arr || !parent || !vis || !heap || !node_mask || !edge_mask || + !paths_data || !path_lens || !path_costs || + !cand_data || !cand_lens || !cand_costs || !tmp_path) { + ray_scratch_arena_reset(&arena); + return ray_error("oom", NULL); + } + + int64_t num_found = 0; + int64_t num_cand = 0; + + /* Step 1: Find shortest path P[0] */ + double d = dijkstra_masked(fwd_off, fwd_tgt, fwd_row, weights_k, n, + src_id, dst_id, NULL, NULL, + dist_arr, parent, heap, vis); + + if (d >= 1e308) { + ray_scratch_arena_reset(&arena); + ray_t* nv = ray_vec_new(RAY_I64, 0); nv->len = 0; + ray_t* dv = ray_vec_new(RAY_F64, 0); dv->len = 0; + ray_t* pv = ray_vec_new(RAY_I64, 0); pv->len = 0; + ray_t* result = ray_table_new(3); + result = ray_table_add_col(result, sym_intern_safe("_path_id", 8), pv); ray_release(pv); + result = ray_table_add_col(result, sym_intern_safe("_node", 5), nv); ray_release(nv); + result = ray_table_add_col(result, sym_intern_safe("_dist", 5), dv); ray_release(dv); + return result; + } + + /* Reconstruct P[0] from parent array (reverse then flip) */ + int64_t plen = 0; + for (int64_t v = dst_id; v != -1; v = parent[v]) { + tmp_path[plen++] = v; + if (plen > n) break; /* safety: avoid infinite loop on corrupt parent */ + } + for (int64_t i = 0; i < plen / 2; i++) { + int64_t tmp = tmp_path[i]; + tmp_path[i] = tmp_path[plen - 1 - i]; + tmp_path[plen - 1 - i] = tmp; + } + + memcpy(&paths_data[0], tmp_path, (size_t)plen * sizeof(int64_t)); + path_lens[0] = plen; + path_costs[0] = d; + num_found = 1; + + /* Step 2: Iteratively find paths P[1]..P[K-1] */ + for (uint16_t k = 1; k < K; k++) { + int64_t* prev_path = &paths_data[(int64_t)(k - 1) * n]; + int64_t prev_len = path_lens[k - 1]; + + for (int64_t i = 0; i < prev_len - 1; i++) { + int64_t spur_node = prev_path[i]; + + /* Compute root path cost */ + double root_cost = 0.0; + for (int64_t r = 0; r < i; r++) { + int64_t from = prev_path[r]; + int64_t to = prev_path[r + 1]; + for (int64_t e = fwd_off[from]; e < fwd_off[from + 1]; e++) { + if (fwd_tgt[e] == to) { + root_cost += weights_k[fwd_row[e]]; + break; + } + } + } + + /* Mask edges used by found paths sharing the root prefix */ + memset(edge_mask, 0, (size_t)m * sizeof(bool)); + memset(node_mask, 0, (size_t)n * sizeof(bool)); + + for (int64_t j = 0; j < num_found; j++) { + int64_t* pj = &paths_data[j * n]; + int64_t pj_len = path_lens[j]; + if (pj_len <= i) continue; + + bool same_prefix = true; + for (int64_t r = 0; r <= i; r++) { + if (pj[r] != prev_path[r]) { same_prefix = false; break; } + } + if (!same_prefix) continue; + + int64_t from = pj[i]; + int64_t to = pj[i + 1]; + for (int64_t e = fwd_off[from]; e < fwd_off[from + 1]; e++) { + if (fwd_tgt[e] == to) { edge_mask[e] = true; break; } + } + } + + /* Mask root path nodes except spur node */ + for (int64_t r = 0; r < i; r++) { + node_mask[prev_path[r]] = true; + } + + /* Dijkstra from spur to dst with masks */ + double spur_dist = dijkstra_masked(fwd_off, fwd_tgt, fwd_row, weights_k, n, + spur_node, dst_id, node_mask, edge_mask, + dist_arr, parent, heap, vis); + if (spur_dist >= 1e308) continue; + + /* Reconstruct spur path */ + int64_t spur_len = 0; + for (int64_t v = dst_id; v != -1; v = parent[v]) { + tmp_path[spur_len++] = v; + if (spur_len > n) break; + } + for (int64_t a = 0; a < spur_len / 2; a++) { + int64_t tmp = tmp_path[a]; + tmp_path[a] = tmp_path[spur_len - 1 - a]; + tmp_path[spur_len - 1 - a] = tmp; + } + + double total_cost = root_cost + spur_dist; + int64_t total_len = i + spur_len; + if (total_len > n || num_cand >= max_cand) continue; + + /* Check for duplicate candidates */ + bool dup = false; + for (int64_t c = 0; c < num_cand && !dup; c++) { + if (cand_lens[c] != total_len) continue; + bool same = true; + int64_t* cp = &cand_data[c * n]; + for (int64_t r = 0; r < i && same; r++) { + if (cp[r] != prev_path[r]) same = false; + } + for (int64_t r = 0; r < spur_len && same; r++) { + if (cp[i + r] != tmp_path[r]) same = false; + } + if (same) dup = true; + } + /* Check against already-found paths */ + for (int64_t f = 0; f < num_found && !dup; f++) { + if (path_lens[f] != total_len) continue; + bool same = true; + int64_t* fp = &paths_data[f * n]; + for (int64_t r = 0; r < i && same; r++) { + if (fp[r] != prev_path[r]) same = false; + } + for (int64_t r = 0; r < spur_len && same; r++) { + if (fp[i + r] != tmp_path[r]) same = false; + } + if (same) dup = true; + } + if (dup) continue; + + /* Store candidate: root_path[0..i-1] + spur_path */ + int64_t* cp = &cand_data[num_cand * n]; + memcpy(cp, prev_path, (size_t)i * sizeof(int64_t)); + memcpy(cp + i, tmp_path, (size_t)spur_len * sizeof(int64_t)); + cand_lens[num_cand] = total_len; + cand_costs[num_cand] = total_cost; + num_cand++; + } + + if (num_cand == 0) break; + + /* Pick cheapest candidate */ + int64_t best = 0; + for (int64_t c = 1; c < num_cand; c++) { + if (cand_costs[c] < cand_costs[best]) best = c; + } + + memcpy(&paths_data[(int64_t)k * n], &cand_data[best * n], + (size_t)cand_lens[best] * sizeof(int64_t)); + path_lens[k] = cand_lens[best]; + path_costs[k] = cand_costs[best]; + num_found++; + + /* Remove used candidate (swap with last) */ + if (best < num_cand - 1) { + memcpy(&cand_data[best * n], &cand_data[(num_cand - 1) * n], + (size_t)cand_lens[num_cand - 1] * sizeof(int64_t)); + cand_lens[best] = cand_lens[num_cand - 1]; + cand_costs[best] = cand_costs[num_cand - 1]; + } + num_cand--; + } + + /* Build output: _path_id, _node, _dist (running dist along each path) */ + int64_t total_rows = 0; + for (int64_t k = 0; k < num_found; k++) total_rows += path_lens[k]; + + ray_t* pid_vec = ray_vec_new(RAY_I64, total_rows); + ray_t* node_vec = ray_vec_new(RAY_I64, total_rows); + ray_t* dist_vec = ray_vec_new(RAY_F64, total_rows); + if (!pid_vec || RAY_IS_ERR(pid_vec) || + !node_vec || RAY_IS_ERR(node_vec) || + !dist_vec || RAY_IS_ERR(dist_vec)) { + ray_scratch_arena_reset(&arena); + if (pid_vec && !RAY_IS_ERR(pid_vec)) ray_release(pid_vec); + if (node_vec && !RAY_IS_ERR(node_vec)) ray_release(node_vec); + if (dist_vec && !RAY_IS_ERR(dist_vec)) ray_release(dist_vec); + return ray_error("oom", NULL); + } + + int64_t* pids = (int64_t*)ray_data(pid_vec); + int64_t* nodes_k = (int64_t*)ray_data(node_vec); + double* dists = (double*)ray_data(dist_vec); + + int64_t row = 0; + for (int64_t k = 0; k < num_found; k++) { + int64_t* path = &paths_data[k * n]; + int64_t pk_len = path_lens[k]; + double running = 0.0; + for (int64_t j = 0; j < pk_len; j++) { + pids[row] = k; + nodes_k[row] = path[j]; + if (j > 0) { + int64_t from = path[j - 1]; + int64_t to = path[j]; + for (int64_t e = fwd_off[from]; e < fwd_off[from + 1]; e++) { + if (fwd_tgt[e] == to) { + running += weights_k[fwd_row[e]]; + break; + } + } + } + dists[row] = running; + row++; + } + } + + pid_vec->len = total_rows; + node_vec->len = total_rows; + dist_vec->len = total_rows; + + ray_scratch_arena_reset(&arena); + + ray_t* result = ray_table_new(3); + if (!result || RAY_IS_ERR(result)) { + ray_release(pid_vec); ray_release(node_vec); ray_release(dist_vec); + return ray_error("oom", NULL); + } + result = ray_table_add_col(result, sym_intern_safe("_path_id", 8), pid_vec); + ray_release(pid_vec); + result = ray_table_add_col(result, sym_intern_safe("_node", 5), node_vec); + ray_release(node_vec); + result = ray_table_add_col(result, sym_intern_safe("_dist", 5), dist_vec); + ray_release(dist_vec); + return result; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/ops/window.c b/crates/rayforce-sys/vendor/rayforce/src/ops/window.c new file mode 100644 index 0000000..75c8d94 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/ops/window.c @@ -0,0 +1,1223 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ops/internal.h" + +/* ============================================================================ + * Window function execution + * ============================================================================ */ + +/* Compare rows ra and rb on the given key columns. Returns true if any differ. */ +static inline bool win_keys_differ(ray_t* const* vecs, uint8_t n_keys, + int64_t ra, int64_t rb) { + for (uint8_t k = 0; k < n_keys; k++) { + ray_t* col = vecs[k]; + if (!col) continue; + switch (col->type) { + case RAY_I64: case RAY_TIMESTAMP: + if (((const int64_t*)ray_data(col))[ra] != + ((const int64_t*)ray_data(col))[rb]) return true; + break; + case RAY_F64: { + double a = ((const double*)ray_data(col))[ra]; + double b = ((const double*)ray_data(col))[rb]; + if (a != b) return true; + break; + } + case RAY_I32: case RAY_DATE: case RAY_TIME: + if (((const int32_t*)ray_data(col))[ra] != + ((const int32_t*)ray_data(col))[rb]) return true; + break; + case RAY_SYM: + if (ray_read_sym(ray_data(col), ra, col->type, col->attrs) != + ray_read_sym(ray_data(col), rb, col->type, col->attrs)) return true; + break; + case RAY_I16: + if (((const int16_t*)ray_data(col))[ra] != + ((const int16_t*)ray_data(col))[rb]) return true; + break; + case RAY_BOOL: case RAY_U8: + if (((const uint8_t*)ray_data(col))[ra] != + ((const uint8_t*)ray_data(col))[rb]) return true; + break; + case RAY_STR: { + const ray_str_t* elems; + const char* pool; + str_resolve(col, &elems, &pool); + if (!ray_str_t_eq(&elems[ra], pool, &elems[rb], pool)) return true; + break; + } + default: break; + } + } + return false; +} + +static inline double win_read_f64(ray_t* col, int64_t row) { + switch (col->type) { + case RAY_F64: return ((const double*)ray_data(col))[row]; + case RAY_I64: case RAY_TIMESTAMP: + return (double)((const int64_t*)ray_data(col))[row]; + case RAY_I32: case RAY_DATE: case RAY_TIME: + return (double)((const int32_t*)ray_data(col))[row]; + case RAY_SYM: + return (double)ray_read_sym(ray_data(col), row, col->type, col->attrs); + case RAY_I16: return (double)((const int16_t*)ray_data(col))[row]; + case RAY_BOOL: case RAY_U8: return (double)((const uint8_t*)ray_data(col))[row]; + default: return 0.0; + } +} + +static inline int64_t win_read_i64(ray_t* col, int64_t row) { + switch (col->type) { + case RAY_I64: case RAY_TIMESTAMP: + return ((const int64_t*)ray_data(col))[row]; + case RAY_I32: case RAY_DATE: case RAY_TIME: + return (int64_t)((const int32_t*)ray_data(col))[row]; + case RAY_SYM: + return ray_read_sym(ray_data(col), row, col->type, col->attrs); + case RAY_F64: return (int64_t)((const double*)ray_data(col))[row]; + case RAY_I16: return (int64_t)((const int16_t*)ray_data(col))[row]; + case RAY_BOOL: case RAY_U8: return (int64_t)((const uint8_t*)ray_data(col))[row]; + default: return 0; + } +} + +/* Aliases for shared parallel null helpers from internal.h */ +#define win_set_null par_set_null +#define win_prepare_nullmap par_prepare_nullmap +#define win_finalize_nulls par_finalize_nulls + +/* Resolve a graph op node to a column vector from tbl */ +static ray_t* win_resolve_vec(ray_graph_t* g, ray_op_t* key_op, ray_t* tbl, + uint8_t* owned) { + ray_op_ext_t* key_ext = find_ext(g, key_op->id); + if (key_ext && key_ext->base.opcode == OP_SCAN) { + *owned = 0; + return ray_table_get_col(tbl, key_ext->sym); + } + *owned = 1; + ray_t* saved = g->table; + g->table = tbl; + ray_t* v = exec_node(g, key_op); + g->table = saved; + return v; +} + +/* Compute window functions for one partition [ps, pe) in sorted_idx */ +static void win_compute_partition( + ray_t* const* order_vecs, uint8_t n_order, + ray_t* const* func_vecs, const uint8_t* func_kinds, const int64_t* func_params, + uint8_t n_funcs, + uint8_t frame_start, uint8_t frame_end, + const int64_t* sorted_idx, int64_t ps, int64_t pe, + ray_t* const* result_vecs, const bool* is_f64) +{ + if (ps >= pe) return; /* empty partition — nothing to compute */ + int64_t part_len = pe - ps; + + for (uint8_t f = 0; f < n_funcs; f++) { + uint8_t kind = func_kinds[f]; + ray_t* fvec = func_vecs[f]; + ray_t* rvec = result_vecs[f]; + bool whole = (frame_start == RAY_BOUND_UNBOUNDED_PRECEDING && + frame_end == RAY_BOUND_UNBOUNDED_FOLLOWING); + + switch (kind) { + case RAY_WIN_ROW_NUMBER: { + int64_t* out = (int64_t*)ray_data(rvec); + for (int64_t i = ps; i < pe; i++) + out[sorted_idx[i]] = i - ps + 1; + break; + } + case RAY_WIN_RANK: { + int64_t* out = (int64_t*)ray_data(rvec); + int64_t rank = 1; + out[sorted_idx[ps]] = 1; + for (int64_t i = ps + 1; i < pe; i++) { + if (n_order > 0 && win_keys_differ(order_vecs, n_order, + sorted_idx[i-1], sorted_idx[i])) + rank = i - ps + 1; + out[sorted_idx[i]] = rank; + } + break; + } + case RAY_WIN_DENSE_RANK: { + int64_t* out = (int64_t*)ray_data(rvec); + int64_t rank = 1; + out[sorted_idx[ps]] = 1; + for (int64_t i = ps + 1; i < pe; i++) { + if (n_order > 0 && win_keys_differ(order_vecs, n_order, + sorted_idx[i-1], sorted_idx[i])) + rank++; + out[sorted_idx[i]] = rank; + } + break; + } + case RAY_WIN_NTILE: { + int64_t n = func_params[f]; + if (n <= 0) n = 1; + int64_t* out = (int64_t*)ray_data(rvec); + for (int64_t i = ps; i < pe; i++) + out[sorted_idx[i]] = ((i - ps) * n) / part_len + 1; + break; + } + case RAY_WIN_COUNT: { + int64_t* out = (int64_t*)ray_data(rvec); + if (whole) { + for (int64_t i = ps; i < pe; i++) + out[sorted_idx[i]] = part_len; + } else { + for (int64_t i = ps; i < pe; i++) + out[sorted_idx[i]] = i - ps + 1; + } + break; + } + case RAY_WIN_SUM: { + if (!fvec) break; + if (is_f64[f]) { + double* out = (double*)ray_data(rvec); + if (whole) { + double t = 0.0; + for (int64_t i = ps; i < pe; i++) + if (!ray_vec_is_null(fvec, sorted_idx[i])) + t += win_read_f64(fvec, sorted_idx[i]); + for (int64_t i = ps; i < pe; i++) + out[sorted_idx[i]] = t; + } else { + double acc = 0.0; + for (int64_t i = ps; i < pe; i++) { + if (!ray_vec_is_null(fvec, sorted_idx[i])) + acc += win_read_f64(fvec, sorted_idx[i]); + out[sorted_idx[i]] = acc; + } + } + } else { + int64_t* out = (int64_t*)ray_data(rvec); + if (whole) { + int64_t t = 0; + for (int64_t i = ps; i < pe; i++) + if (!ray_vec_is_null(fvec, sorted_idx[i])) + t += win_read_i64(fvec, sorted_idx[i]); + for (int64_t i = ps; i < pe; i++) + out[sorted_idx[i]] = t; + } else { + int64_t acc = 0; + for (int64_t i = ps; i < pe; i++) { + if (!ray_vec_is_null(fvec, sorted_idx[i])) + acc += win_read_i64(fvec, sorted_idx[i]); + out[sorted_idx[i]] = acc; + } + } + } + break; + } + case RAY_WIN_AVG: { + if (!fvec) break; + double* out = (double*)ray_data(rvec); + if (whole) { + double t = 0.0; + int64_t cnt = 0; + for (int64_t i = ps; i < pe; i++) + if (!ray_vec_is_null(fvec, sorted_idx[i])) { + t += win_read_f64(fvec, sorted_idx[i]); cnt++; + } + if (cnt > 0) { + double avg = t / (double)cnt; + for (int64_t i = ps; i < pe; i++) + out[sorted_idx[i]] = avg; + } else { + for (int64_t i = ps; i < pe; i++) + win_set_null(rvec, sorted_idx[i]); + } + } else { + double acc = 0.0; + int64_t cnt = 0; + for (int64_t i = ps; i < pe; i++) { + if (!ray_vec_is_null(fvec, sorted_idx[i])) { + acc += win_read_f64(fvec, sorted_idx[i]); cnt++; + } + if (cnt > 0) + out[sorted_idx[i]] = acc / (double)cnt; + else + win_set_null(rvec, sorted_idx[i]); + } + } + break; + } + case RAY_WIN_MIN: { + if (!fvec) break; + if (is_f64[f]) { + double* out = (double*)ray_data(rvec); + if (whole) { + double mn = DBL_MAX; int found = 0; + for (int64_t i = ps; i < pe; i++) { + if (ray_vec_is_null(fvec, sorted_idx[i])) continue; + double v = win_read_f64(fvec, sorted_idx[i]); + if (!found || v < mn) { mn = v; found = 1; } + } + if (found) { + for (int64_t i = ps; i < pe; i++) + out[sorted_idx[i]] = mn; + } else { + for (int64_t i = ps; i < pe; i++) + win_set_null(rvec, sorted_idx[i]); + } + } else { + double mn = DBL_MAX; int found = 0; + for (int64_t i = ps; i < pe; i++) { + if (!ray_vec_is_null(fvec, sorted_idx[i])) { + double v = win_read_f64(fvec, sorted_idx[i]); + if (!found || v < mn) { mn = v; found = 1; } + } + if (found) + out[sorted_idx[i]] = mn; + else + win_set_null(rvec, sorted_idx[i]); + } + } + } else { + int64_t* out = (int64_t*)ray_data(rvec); + if (whole) { + int64_t mn = INT64_MAX; int found = 0; + for (int64_t i = ps; i < pe; i++) { + if (ray_vec_is_null(fvec, sorted_idx[i])) continue; + int64_t v = win_read_i64(fvec, sorted_idx[i]); + if (!found || v < mn) { mn = v; found = 1; } + } + if (found) { + for (int64_t i = ps; i < pe; i++) + out[sorted_idx[i]] = mn; + } else { + for (int64_t i = ps; i < pe; i++) + win_set_null(rvec, sorted_idx[i]); + } + } else { + int64_t mn = INT64_MAX; int found = 0; + for (int64_t i = ps; i < pe; i++) { + if (!ray_vec_is_null(fvec, sorted_idx[i])) { + int64_t v = win_read_i64(fvec, sorted_idx[i]); + if (!found || v < mn) { mn = v; found = 1; } + } + if (found) + out[sorted_idx[i]] = mn; + else + win_set_null(rvec, sorted_idx[i]); + } + } + } + break; + } + case RAY_WIN_MAX: { + if (!fvec) break; + if (is_f64[f]) { + double* out = (double*)ray_data(rvec); + if (whole) { + double mx = -DBL_MAX; int found = 0; + for (int64_t i = ps; i < pe; i++) { + if (ray_vec_is_null(fvec, sorted_idx[i])) continue; + double v = win_read_f64(fvec, sorted_idx[i]); + if (!found || v > mx) { mx = v; found = 1; } + } + if (found) { + for (int64_t i = ps; i < pe; i++) + out[sorted_idx[i]] = mx; + } else { + for (int64_t i = ps; i < pe; i++) + win_set_null(rvec, sorted_idx[i]); + } + } else { + double mx = -DBL_MAX; int found = 0; + for (int64_t i = ps; i < pe; i++) { + if (!ray_vec_is_null(fvec, sorted_idx[i])) { + double v = win_read_f64(fvec, sorted_idx[i]); + if (!found || v > mx) { mx = v; found = 1; } + } + if (found) + out[sorted_idx[i]] = mx; + else + win_set_null(rvec, sorted_idx[i]); + } + } + } else { + int64_t* out = (int64_t*)ray_data(rvec); + if (whole) { + int64_t mx = INT64_MIN; int found = 0; + for (int64_t i = ps; i < pe; i++) { + if (ray_vec_is_null(fvec, sorted_idx[i])) continue; + int64_t v = win_read_i64(fvec, sorted_idx[i]); + if (!found || v > mx) { mx = v; found = 1; } + } + if (found) { + for (int64_t i = ps; i < pe; i++) + out[sorted_idx[i]] = mx; + } else { + for (int64_t i = ps; i < pe; i++) + win_set_null(rvec, sorted_idx[i]); + } + } else { + int64_t mx = INT64_MIN; int found = 0; + for (int64_t i = ps; i < pe; i++) { + if (!ray_vec_is_null(fvec, sorted_idx[i])) { + int64_t v = win_read_i64(fvec, sorted_idx[i]); + if (!found || v > mx) { mx = v; found = 1; } + } + if (found) + out[sorted_idx[i]] = mx; + else + win_set_null(rvec, sorted_idx[i]); + } + } + } + break; + } + case RAY_WIN_LAG: { + if (!fvec) break; + int64_t offset = func_params[f]; + if (offset <= 0) offset = 1; + if (is_f64[f]) { + double* out = (double*)ray_data(rvec); + for (int64_t i = ps; i < pe; i++) { + int64_t src = i - offset; + if (src >= ps) { + out[sorted_idx[i]] = win_read_f64(fvec, sorted_idx[src]); + if (ray_vec_is_null(fvec, sorted_idx[src])) + win_set_null(rvec, sorted_idx[i]); + } else { + out[sorted_idx[i]] = 0.0; + win_set_null(rvec, sorted_idx[i]); + } + } + } else { + int64_t* out = (int64_t*)ray_data(rvec); + for (int64_t i = ps; i < pe; i++) { + int64_t src = i - offset; + if (src >= ps) { + out[sorted_idx[i]] = win_read_i64(fvec, sorted_idx[src]); + if (ray_vec_is_null(fvec, sorted_idx[src])) + win_set_null(rvec, sorted_idx[i]); + } else { + out[sorted_idx[i]] = 0; + win_set_null(rvec, sorted_idx[i]); + } + } + } + break; + } + case RAY_WIN_LEAD: { + if (!fvec) break; + int64_t offset = func_params[f]; + if (offset <= 0) offset = 1; + if (is_f64[f]) { + double* out = (double*)ray_data(rvec); + for (int64_t i = ps; i < pe; i++) { + int64_t src = i + offset; + if (src < pe) { + out[sorted_idx[i]] = win_read_f64(fvec, sorted_idx[src]); + if (ray_vec_is_null(fvec, sorted_idx[src])) + win_set_null(rvec, sorted_idx[i]); + } else { + out[sorted_idx[i]] = 0.0; + win_set_null(rvec, sorted_idx[i]); + } + } + } else { + int64_t* out = (int64_t*)ray_data(rvec); + for (int64_t i = ps; i < pe; i++) { + int64_t src = i + offset; + if (src < pe) { + out[sorted_idx[i]] = win_read_i64(fvec, sorted_idx[src]); + if (ray_vec_is_null(fvec, sorted_idx[src])) + win_set_null(rvec, sorted_idx[i]); + } else { + out[sorted_idx[i]] = 0; + win_set_null(rvec, sorted_idx[i]); + } + } + } + break; + } + case RAY_WIN_FIRST_VALUE: { + if (!fvec) break; + bool first_null = ray_vec_is_null(fvec, sorted_idx[ps]); + if (is_f64[f]) { + double* out = (double*)ray_data(rvec); + double first = first_null ? 0.0 : win_read_f64(fvec, sorted_idx[ps]); + for (int64_t i = ps; i < pe; i++) { + out[sorted_idx[i]] = first; + if (first_null) win_set_null(rvec, sorted_idx[i]); + } + } else { + int64_t* out = (int64_t*)ray_data(rvec); + int64_t first = first_null ? 0 : win_read_i64(fvec, sorted_idx[ps]); + for (int64_t i = ps; i < pe; i++) { + out[sorted_idx[i]] = first; + if (first_null) win_set_null(rvec, sorted_idx[i]); + } + } + break; + } + case RAY_WIN_LAST_VALUE: { + if (!fvec) break; + if (is_f64[f]) { + double* out = (double*)ray_data(rvec); + if (whole) { + bool lnull = ray_vec_is_null(fvec, sorted_idx[pe - 1]); + double last = lnull ? 0.0 : win_read_f64(fvec, sorted_idx[pe - 1]); + for (int64_t i = ps; i < pe; i++) { + out[sorted_idx[i]] = last; + if (lnull) win_set_null(rvec, sorted_idx[i]); + } + } else { + for (int64_t i = ps; i < pe; i++) { + out[sorted_idx[i]] = win_read_f64(fvec, sorted_idx[i]); + if (ray_vec_is_null(fvec, sorted_idx[i])) + win_set_null(rvec, sorted_idx[i]); + } + } + } else { + int64_t* out = (int64_t*)ray_data(rvec); + if (whole) { + bool lnull = ray_vec_is_null(fvec, sorted_idx[pe - 1]); + int64_t last = lnull ? 0 : win_read_i64(fvec, sorted_idx[pe - 1]); + for (int64_t i = ps; i < pe; i++) { + out[sorted_idx[i]] = last; + if (lnull) win_set_null(rvec, sorted_idx[i]); + } + } else { + for (int64_t i = ps; i < pe; i++) { + out[sorted_idx[i]] = win_read_i64(fvec, sorted_idx[i]); + if (ray_vec_is_null(fvec, sorted_idx[i])) + win_set_null(rvec, sorted_idx[i]); + } + } + } + break; + } + case RAY_WIN_NTH_VALUE: { + if (!fvec) break; + int64_t nth = func_params[f]; + if (nth < 1) nth = 1; + bool nth_null = (nth > part_len) || + ray_vec_is_null(fvec, sorted_idx[ps + nth - 1]); + if (is_f64[f]) { + double* out = (double*)ray_data(rvec); + double val = nth_null ? 0.0 : win_read_f64(fvec, sorted_idx[ps + nth - 1]); + for (int64_t i = ps; i < pe; i++) { + out[sorted_idx[i]] = val; + if (nth_null) win_set_null(rvec, sorted_idx[i]); + } + } else { + int64_t* out = (int64_t*)ray_data(rvec); + int64_t val = nth_null ? 0 : win_read_i64(fvec, sorted_idx[ps + nth - 1]); + for (int64_t i = ps; i < pe; i++) { + out[sorted_idx[i]] = val; + if (nth_null) win_set_null(rvec, sorted_idx[i]); + } + } + break; + } + } /* switch */ + } /* for each func */ +} + +/* Parallel per-partition window compute context */ +typedef struct { + ray_t** order_vecs; + uint8_t n_order; + ray_t** func_vecs; + uint8_t* func_kinds; + int64_t* func_params; + uint8_t n_funcs; + uint8_t frame_start; + uint8_t frame_end; + int64_t* sorted_idx; + int64_t* part_offsets; + ray_t** result_vecs; + bool* is_f64; +} win_par_ctx_t; + +static void win_par_fn(void* arg, uint32_t worker_id, + int64_t start, int64_t end) { + (void)worker_id; + win_par_ctx_t* ctx = (win_par_ctx_t*)arg; + for (int64_t p = start; p < end; p++) { + win_compute_partition( + ctx->order_vecs, ctx->n_order, + ctx->func_vecs, ctx->func_kinds, ctx->func_params, + ctx->n_funcs, ctx->frame_start, ctx->frame_end, + ctx->sorted_idx, ctx->part_offsets[p], ctx->part_offsets[p + 1], + ctx->result_vecs, ctx->is_f64); + } +} + +/* Parallel gather of partition key values into contiguous array. + * Eliminates random-access reads during Phase 2 boundary detection. */ +typedef struct { + const int64_t* sorted_idx; + uint64_t* pkey_sorted; + ray_t** sort_vecs; + uint8_t n_part; +} pkey_gather_ctx_t; + +static void pkey_gather_fn(void* arg, uint32_t wid, + int64_t start, int64_t end) { + (void)wid; + pkey_gather_ctx_t* ctx = (pkey_gather_ctx_t*)arg; + const int64_t* sidx = ctx->sorted_idx; + uint64_t* out = ctx->pkey_sorted; + + if (ctx->n_part == 1) { + ray_t* pk = ctx->sort_vecs[0]; + const void* pkd = ray_data(pk); + if (RAY_IS_SYM(pk->type)) { + for (int64_t i = start; i < end; i++) + out[i] = (uint64_t)ray_read_sym(pkd, sidx[i], pk->type, pk->attrs); + } else if (pk->type == RAY_I32 || pk->type == RAY_DATE || pk->type == RAY_TIME) { + const int32_t* src = (const int32_t*)pkd; + for (int64_t i = start; i < end; i++) + out[i] = (uint64_t)((uint32_t)(src[sidx[i]] - INT32_MIN)); + } else { + const uint64_t* src = (const uint64_t*)pkd; + for (int64_t i = start; i < end; i++) + out[i] = src[sidx[i]]; + } + } else { + for (int64_t i = start; i < end; i++) { + int64_t r = sidx[i]; + uint64_t key = 0; + for (uint8_t k = 0; k < ctx->n_part; k++) { + ray_t* col = ctx->sort_vecs[k]; + const void* d = ray_data(col); + if (RAY_IS_SYM(col->type)) + key = (key << 32) | (uint32_t)ray_read_sym(d, r, col->type, col->attrs); + else if (col->type == RAY_I32 || col->type == RAY_DATE || col->type == RAY_TIME) + key = (key << 32) | (uint32_t)(((const int32_t*)d)[r] - INT32_MIN); + else { + key = (key << 32) | (uint32_t)((const uint64_t*)d)[r]; + } + } + out[i] = key; + } + } +} + +ray_t* exec_window(ray_graph_t* g, ray_op_t* op, ray_t* tbl) { + if (!tbl || RAY_IS_ERR(tbl)) return tbl; + + ray_op_ext_t* ext = find_ext(g, op->id); + if (!ext) return ray_error("nyi", NULL); + + int64_t nrows = ray_table_nrows(tbl); + int64_t ncols = ray_table_ncols(tbl); + uint8_t n_part = ext->window.n_part_keys; + uint8_t n_order = ext->window.n_order_keys; + uint8_t n_funcs = ext->window.n_funcs; + /* Guard against uint8_t overflow on n_part + n_order */ + if ((uint16_t)n_part + n_order > 255) + return ray_error("nyi", NULL); + uint8_t n_sort = n_part + n_order; + + if (nrows == 0 || n_funcs == 0) { + ray_retain(tbl); + return tbl; + } + + /* --- Phase 0: Resolve key and func_input vectors --- */ + /* VLAs below are bounded by uint8_t limits (max 255 each), + * so max ~10KB on stack; bounded by uint8_t limits. */ + ray_t* sort_vecs[n_sort > 0 ? n_sort : 1]; + uint8_t sort_owned[n_sort > 0 ? n_sort : 1]; + uint8_t sort_descs[n_sort > 0 ? n_sort : 1]; + memset(sort_owned, 0, sizeof(sort_owned)); + memset(sort_descs, 0, sizeof(sort_descs)); + + for (uint8_t k = 0; k < n_part; k++) { + sort_vecs[k] = win_resolve_vec(g, ext->window.part_keys[k], tbl, + &sort_owned[k]); + sort_descs[k] = 0; /* partition keys always ASC */ + if (!sort_vecs[k] || RAY_IS_ERR(sort_vecs[k])) { + ray_t* err = sort_vecs[k] ? sort_vecs[k] : ray_error("nyi", NULL); + for (uint8_t j = 0; j < k; j++) + if (sort_owned[j] && sort_vecs[j] && !RAY_IS_ERR(sort_vecs[j])) + ray_release(sort_vecs[j]); + return err; + } + } + for (uint8_t k = 0; k < n_order; k++) { + sort_vecs[n_part + k] = win_resolve_vec(g, ext->window.order_keys[k], + tbl, &sort_owned[n_part + k]); + sort_descs[n_part + k] = ext->window.order_descs[k]; + if (!sort_vecs[n_part + k] || RAY_IS_ERR(sort_vecs[n_part + k])) { + ray_t* err = sort_vecs[n_part + k] ? sort_vecs[n_part + k] + : ray_error("nyi", NULL); + for (uint8_t j = 0; j < n_part + k; j++) + if (sort_owned[j] && sort_vecs[j] && !RAY_IS_ERR(sort_vecs[j])) + ray_release(sort_vecs[j]); + return err; + } + } + + ray_t* func_vecs[n_funcs]; + uint8_t func_owned[n_funcs]; + ray_t* result_vecs[n_funcs]; + bool is_f64[n_funcs]; + memset(func_owned, 0, sizeof(func_owned)); + memset(result_vecs, 0, sizeof(result_vecs)); + for (uint8_t f = 0; f < n_funcs; f++) { + ray_op_t* fi = ext->window.func_inputs[f]; + if (fi) { + func_vecs[f] = win_resolve_vec(g, fi, tbl, &func_owned[f]); + if (!func_vecs[f] || RAY_IS_ERR(func_vecs[f])) { + ray_t* err = func_vecs[f] ? func_vecs[f] : ray_error("nyi", NULL); + for (uint8_t j = 0; j < f; j++) + if (func_owned[j] && func_vecs[j] && !RAY_IS_ERR(func_vecs[j])) + ray_release(func_vecs[j]); + for (uint8_t j = 0; j < n_sort; j++) + if (sort_owned[j] && sort_vecs[j] && !RAY_IS_ERR(sort_vecs[j])) + ray_release(sort_vecs[j]); + return err; + } + } else { + func_vecs[f] = NULL; + } + } + + /* --- Phase 1: Sort by (partition_keys ++ order_keys) --- */ + ray_t* radix_itmp_hdr = NULL; + ray_t* win_enum_rank_hdrs[n_sort > 0 ? n_sort : 1]; + memset(win_enum_rank_hdrs, 0, sizeof(win_enum_rank_hdrs)); + + ray_t* indices_hdr = NULL; + int64_t* indices = (int64_t*)scratch_alloc(&indices_hdr, + (size_t)nrows * sizeof(int64_t)); + if (!indices) goto oom; + for (int64_t i = 0; i < nrows; i++) indices[i] = i; + + int64_t* sorted_idx = indices; + + if (n_sort > 0 && nrows <= 64) { + sort_cmp_ctx_t cmp_ctx = { + .vecs = sort_vecs, .desc = sort_descs, + .nulls_first = NULL, .n_sort = n_sort, + }; + sort_insertion(&cmp_ctx, indices, nrows); + } else if (n_sort > 0) { + /* --- Radix sort fast path --- */ + bool can_radix = true; + for (uint8_t k = 0; k < n_sort; k++) { + if (!sort_vecs[k]) { can_radix = false; break; } + int8_t t = sort_vecs[k]->type; + if (t != RAY_I64 && t != RAY_F64 && t != RAY_I32 && t != RAY_I16 && + t != RAY_BOOL && t != RAY_U8 && t != RAY_SYM && + t != RAY_DATE && t != RAY_TIME && t != RAY_TIMESTAMP) { + can_radix = false; break; + } + } + bool radix_done = false; + + if (can_radix) { + ray_pool_t* pool = ray_pool_get(); + + /* Build SYM rank mappings */ + uint32_t* enum_ranks[n_sort]; + memset(enum_ranks, 0, n_sort * sizeof(uint32_t*)); + for (uint8_t k = 0; k < n_sort; k++) { + if (RAY_IS_SYM(sort_vecs[k]->type)) { + enum_ranks[k] = build_enum_rank(sort_vecs[k], nrows, + &win_enum_rank_hdrs[k]); + if (!enum_ranks[k]) { can_radix = false; break; } + } + } + + if (can_radix && n_sort == 1) { + /* Single-key sort */ + uint8_t key_nbytes = radix_key_bytes(sort_vecs[0]->type); + ray_pool_t* sk_pool = (nrows >= SMALL_POOL_THRESHOLD) ? pool : NULL; + ray_t *keys_hdr; + uint64_t* keys = (uint64_t*)scratch_alloc(&keys_hdr, + (size_t)nrows * sizeof(uint64_t)); + if (keys) { + radix_encode_ctx_t enc = { + .keys = keys, .data = ray_data(sort_vecs[0]), + .col = sort_vecs[0], + .type = sort_vecs[0]->type, + .col_attrs = sort_vecs[0]->attrs, + .desc = sort_descs[0], + .nulls_first = sort_descs[0], /* default: NULLS FIRST for DESC */ + .enum_rank = enum_ranks[0], .n_keys = 1, + }; + if (sk_pool) + ray_pool_dispatch(sk_pool, radix_encode_fn, &enc, nrows); + else + radix_encode_fn(&enc, 0, 0, nrows); + + if (nrows <= RADIX_SORT_THRESHOLD) { + key_introsort(keys, indices, nrows); + sorted_idx = indices; + radix_done = true; + } else { + ray_t *ktmp_hdr, *itmp_hdr; + uint64_t* ktmp = (uint64_t*)scratch_alloc(&ktmp_hdr, + (size_t)nrows * sizeof(uint64_t)); + int64_t* itmp = (int64_t*)scratch_alloc(&itmp_hdr, + (size_t)nrows * sizeof(int64_t)); + if (ktmp && itmp) { + sorted_idx = radix_sort_run(sk_pool, keys, indices, + ktmp, itmp, nrows, + key_nbytes, NULL); + radix_done = (sorted_idx != NULL); + } + scratch_free(ktmp_hdr); + if (sorted_idx != itmp) scratch_free(itmp_hdr); + else radix_itmp_hdr = itmp_hdr; + } + } + scratch_free(keys_hdr); + } else if (can_radix && n_sort > 1) { + /* Multi-key composite radix sort */ + ray_pool_t* pool2 = pool; + int64_t mins[n_sort], maxs[n_sort]; + uint8_t total_bits = 0; + bool fits = true; + + ray_pool_t* mk_prescan_pool2 = (nrows >= SMALL_POOL_THRESHOLD) ? pool2 : NULL; + if (n_sort <= MK_PRESCAN_MAX_KEYS && mk_prescan_pool2) { + uint32_t nw = ray_pool_total_workers(mk_prescan_pool2); + size_t pw_count = (size_t)nw * n_sort; + int64_t pw_mins_stack[512], pw_maxs_stack[512]; + ray_t *pw_mins_hdr = NULL, *pw_maxs_hdr = NULL; + int64_t* pw_mins = (pw_count <= 512) + ? pw_mins_stack + : (int64_t*)scratch_alloc(&pw_mins_hdr, pw_count * sizeof(int64_t)); + int64_t* pw_maxs = (pw_count <= 512) + ? pw_maxs_stack + : (int64_t*)scratch_alloc(&pw_maxs_hdr, pw_count * sizeof(int64_t)); + for (size_t i = 0; i < pw_count; i++) { + pw_mins[i] = INT64_MAX; + pw_maxs[i] = INT64_MIN; + } + mk_prescan_ctx_t pctx = { + .vecs = sort_vecs, .enum_ranks = enum_ranks, + .n_keys = n_sort, .nrows = nrows, .n_workers = nw, + .pw_mins = pw_mins, .pw_maxs = pw_maxs, + }; + ray_pool_dispatch(mk_prescan_pool2, mk_prescan_fn, &pctx, nrows); + + for (uint8_t k = 0; k < n_sort; k++) { + int64_t kmin = INT64_MAX, kmax = INT64_MIN; + for (uint32_t w = 0; w < nw; w++) { + int64_t wmin = pw_mins[w * n_sort + k]; + int64_t wmax = pw_maxs[w * n_sort + k]; + if (wmin < kmin) kmin = wmin; + if (wmax > kmax) kmax = wmax; + } + mins[k] = kmin; + maxs[k] = kmax; + uint64_t range = (uint64_t)(kmax - kmin); + uint8_t bits = 1; + while (((uint64_t)1 << bits) <= range && bits < 64) + bits++; + total_bits += bits; + } + if (pw_mins_hdr) scratch_free(pw_mins_hdr); + if (pw_maxs_hdr) scratch_free(pw_maxs_hdr); + } else { + for (uint8_t k = 0; k < n_sort; k++) { + ray_t* col = sort_vecs[k]; + int64_t kmin = INT64_MAX, kmax = INT64_MIN; + if (enum_ranks[k]) { + const void* cdata = ray_data(col); + int8_t ctype = col->type; + uint8_t cattrs = col->attrs; + for (int64_t i = 0; i < nrows; i++) { + uint32_t raw = (uint32_t)ray_read_sym(cdata, i, ctype, cattrs); + int64_t v = (int64_t)enum_ranks[k][raw]; + if (v < kmin) kmin = v; + if (v > kmax) kmax = v; + } + } else if (col->type == RAY_I64 || col->type == RAY_TIMESTAMP) { + const int64_t* d = (const int64_t*)ray_data(col); + for (int64_t i = 0; i < nrows; i++) { + if (d[i] < kmin) kmin = d[i]; + if (d[i] > kmax) kmax = d[i]; + } + } else if (col->type == RAY_I32 || col->type == RAY_DATE || col->type == RAY_TIME) { + const int32_t* d = (const int32_t*)ray_data(col); + for (int64_t i = 0; i < nrows; i++) { + if (d[i] < kmin) kmin = (int64_t)d[i]; + if (d[i] > kmax) kmax = (int64_t)d[i]; + } + } else if (col->type == RAY_I16) { + const int16_t* d = (const int16_t*)ray_data(col); + for (int64_t i = 0; i < nrows; i++) { + if (d[i] < kmin) kmin = (int64_t)d[i]; + if (d[i] > kmax) kmax = (int64_t)d[i]; + } + } else if (col->type == RAY_BOOL || col->type == RAY_U8) { + const uint8_t* d = (const uint8_t*)ray_data(col); + for (int64_t i = 0; i < nrows; i++) { + if (d[i] < kmin) kmin = (int64_t)d[i]; + if (d[i] > kmax) kmax = (int64_t)d[i]; + } + } + mins[k] = kmin; + maxs[k] = kmax; + uint64_t range = (uint64_t)(kmax - kmin); + uint8_t bits = 1; + while (((uint64_t)1 << bits) <= range && bits < 64) + bits++; + total_bits += bits; + } + } + + if (total_bits > 64) fits = false; + + if (fits) { + uint8_t bit_shifts[n_sort]; + uint8_t accum = 0; + for (int k = n_sort - 1; k >= 0; k--) { + bit_shifts[k] = accum; + uint64_t range = (uint64_t)(maxs[k] - mins[k]); + uint8_t bits = 1; + while (((uint64_t)1 << bits) <= range && bits < 64) + bits++; + accum += bits; + } + + uint8_t comp_nbytes = (total_bits + 7) / 8; + if (comp_nbytes < 1) comp_nbytes = 1; + ray_pool_t* mk_pool = (nrows >= SMALL_POOL_THRESHOLD) ? pool2 : NULL; + + ray_t *keys_hdr; + uint64_t* keys = (uint64_t*)scratch_alloc(&keys_hdr, + (size_t)nrows * sizeof(uint64_t)); + if (keys) { + radix_encode_ctx_t enc = { + .keys = keys, .n_keys = n_sort, .vecs = sort_vecs, + }; + for (uint8_t k = 0; k < n_sort; k++) { + enc.mins[k] = mins[k]; + enc.ranges[k] = maxs[k] - mins[k]; + enc.bit_shifts[k] = bit_shifts[k]; + enc.descs[k] = sort_descs[k]; + enc.enum_ranks[k] = enum_ranks[k]; + } + if (mk_pool) + ray_pool_dispatch(mk_pool, radix_encode_fn, &enc, nrows); + else + radix_encode_fn(&enc, 0, 0, nrows); + + if (nrows <= RADIX_SORT_THRESHOLD) { + key_introsort(keys, indices, nrows); + sorted_idx = indices; + radix_done = true; + } else { + ray_t *ktmp_hdr, *itmp_hdr; + uint64_t* ktmp = (uint64_t*)scratch_alloc(&ktmp_hdr, + (size_t)nrows * sizeof(uint64_t)); + int64_t* itmp = (int64_t*)scratch_alloc(&itmp_hdr, + (size_t)nrows * sizeof(int64_t)); + if (ktmp && itmp) { + sorted_idx = radix_sort_run(mk_pool, keys, indices, + ktmp, itmp, nrows, + comp_nbytes, NULL); + radix_done = (sorted_idx != NULL); + } + scratch_free(ktmp_hdr); + if (sorted_idx != itmp) scratch_free(itmp_hdr); + else radix_itmp_hdr = itmp_hdr; + } + } + scratch_free(keys_hdr); + } + } + } + + /* --- Merge sort fallback --- */ + if (!radix_done) { + sort_cmp_ctx_t cmp_ctx = { + .vecs = sort_vecs, .desc = sort_descs, + .nulls_first = NULL, .n_sort = n_sort, + }; + ray_t* tmp_hdr; + int64_t* tmp = (int64_t*)scratch_alloc(&tmp_hdr, + (size_t)nrows * sizeof(int64_t)); + if (!tmp) { scratch_free(indices_hdr); indices_hdr = NULL; goto oom; } + + ray_pool_t* pool = ray_pool_get(); + uint32_t nw = pool ? ray_pool_total_workers(pool) : 1; + if (pool && nw > 1 && nrows > 1024) { + sort_phase1_ctx_t p1ctx = { + .cmp_ctx = &cmp_ctx, .indices = indices, .tmp = tmp, + .nrows = nrows, .n_chunks = nw, + }; + ray_pool_dispatch_n(pool, sort_phase1_fn, &p1ctx, nw); + + int64_t chunk_size = (nrows + nw - 1) / nw; + int64_t run_size = chunk_size; + int64_t* src = indices; + int64_t* dst = tmp; + while (run_size < nrows) { + int64_t n_pairs = (nrows + 2 * run_size - 1) / (2 * run_size); + sort_merge_ctx_t mctx = { + .cmp_ctx = &cmp_ctx, .src = src, .dst = dst, + .nrows = nrows, .run_size = run_size, + }; + if (n_pairs > 1) + ray_pool_dispatch_n(pool, sort_merge_fn, &mctx, + (uint32_t)n_pairs); + else + sort_merge_fn(&mctx, 0, 0, n_pairs); + int64_t* t = src; src = dst; dst = t; + run_size *= 2; + } + if (src != indices) + memcpy(indices, src, (size_t)nrows * sizeof(int64_t)); + } else { + sort_merge_recursive(&cmp_ctx, indices, tmp, nrows); + } + scratch_free(tmp_hdr); + sorted_idx = indices; + } + } + + /* --- Phase 2: Find partition boundaries --- */ + /* Overallocate part_offsets to worst case (single-pass, no counting pass) */ + ray_t* poff_hdr = NULL; + int64_t* part_offsets = (int64_t*)scratch_alloc(&poff_hdr, + (size_t)(nrows + 1) * sizeof(int64_t)); + if (!part_offsets) { scratch_free(indices_hdr); goto oom; } + + part_offsets[0] = 0; + int64_t n_parts = 0; + + if (n_part > 0) { + /* Check if we can pack partition keys into uint64 for fast gather. + * Multi-key packing shifts each key by 32 bits, so any key requiring + * >32 bits in a multi-key scenario would be truncated. Force fallback + * when any 64-bit key appears alongside other keys. */ + uint8_t pk_bits = 0; + bool can_pack = true; + bool has_64bit_key = false; + for (uint8_t k = 0; k < n_part; k++) { + int8_t t = sort_vecs[k]->type; + if (RAY_IS_SYM(t) || t == RAY_I32 || t == RAY_DATE || t == RAY_TIME) pk_bits += 32; + else if (t == RAY_I64 || t == RAY_SYM || t == RAY_TIMESTAMP || + t == RAY_F64) { pk_bits += 64; has_64bit_key = true; } + else { can_pack = false; break; } + if (pk_bits > 64) { can_pack = false; break; } + } + /* If multi-key with any 64-bit type, the <<32 packing truncates. + * Force sequential fallback for correctness. */ + if (can_pack && n_part > 1 && has_64bit_key) can_pack = false; + + ray_t* pkey_hdr = NULL; + uint64_t* pkey_sorted = can_pack ? + (uint64_t*)scratch_alloc(&pkey_hdr, (size_t)nrows * sizeof(uint64_t)) + : NULL; + + if (pkey_sorted) { + /* Parallel gather partition keys into contiguous array */ + pkey_gather_ctx_t gctx = { + .sorted_idx = sorted_idx, .pkey_sorted = pkey_sorted, + .sort_vecs = sort_vecs, .n_part = n_part, + }; + ray_pool_t* gpool = ray_pool_get(); + if (gpool) + ray_pool_dispatch(gpool, pkey_gather_fn, &gctx, nrows); + else + pkey_gather_fn(&gctx, 0, 0, nrows); + + /* Sequential scan on contiguous data (no random access) */ + for (int64_t i = 1; i < nrows; i++) + if (pkey_sorted[i] != pkey_sorted[i - 1]) + part_offsets[++n_parts] = i; + + scratch_free(pkey_hdr); + } else { + /* Fallback: single-pass random-access comparison */ + for (int64_t i = 1; i < nrows; i++) + if (win_keys_differ(sort_vecs, n_part, + sorted_idx[i - 1], sorted_idx[i])) + part_offsets[++n_parts] = i; + } + part_offsets[++n_parts] = nrows; + } else { + /* No partition keys: entire table is one partition. + * Minor memory waste (part_offsets sized for nrows+1) but no + * correctness issue — only indices 0 and 1 are used. */ + part_offsets[1] = nrows; + n_parts = 1; + } + + /* Check cancellation before expensive per-partition compute */ + { + ray_pool_t* cpool = ray_pool_get(); + if (pool_cancelled(cpool)) { + scratch_free(poff_hdr); + scratch_free(indices_hdr); + if (radix_itmp_hdr) scratch_free(radix_itmp_hdr); + for (uint8_t k = 0; k < n_sort; k++) + if (win_enum_rank_hdrs[k]) scratch_free(win_enum_rank_hdrs[k]); + for (uint8_t k = 0; k < n_sort; k++) + if (sort_owned[k] && sort_vecs[k] && !RAY_IS_ERR(sort_vecs[k])) + ray_release(sort_vecs[k]); + for (uint8_t f = 0; f < n_funcs; f++) + if (func_owned[f] && func_vecs[f] && !RAY_IS_ERR(func_vecs[f])) + ray_release(func_vecs[f]); + return ray_error("cancel", NULL); + } + } + + /* --- Phase 3: Allocate result vectors and compute per-partition --- */ + for (uint8_t f = 0; f < n_funcs; f++) { + uint8_t kind = ext->window.func_kinds[f]; + ray_t* fvec = func_vecs[f]; + + bool out_f64 = false; + if (kind == RAY_WIN_AVG) { + out_f64 = true; + } else if (kind == RAY_WIN_SUM || kind == RAY_WIN_MIN || + kind == RAY_WIN_MAX || kind == RAY_WIN_LAG || + kind == RAY_WIN_LEAD || kind == RAY_WIN_FIRST_VALUE || + kind == RAY_WIN_LAST_VALUE || kind == RAY_WIN_NTH_VALUE) { + out_f64 = fvec && fvec->type == RAY_F64; + } + + is_f64[f] = out_f64; + result_vecs[f] = ray_vec_new(out_f64 ? RAY_F64 : RAY_I64, nrows); + if (!result_vecs[f] || RAY_IS_ERR(result_vecs[f])) { + for (uint8_t j = 0; j < f; j++) ray_release(result_vecs[j]); + scratch_free(poff_hdr); + scratch_free(indices_hdr); + goto oom; + } + result_vecs[f]->len = nrows; + memset(ray_data(result_vecs[f]), 0, (size_t)nrows * 8); + } + + /* Order key vectors start at sort_vecs[n_part] */ + ray_t** order_vecs = n_order > 0 ? &sort_vecs[n_part] : NULL; + + { + /* Pre-allocate nullmaps so win_set_null works in both paths. + * On OOM, force sequential path where win_set_null falls back + * to single-threaded ray_vec_set_null. */ + bool nullmaps_ok = true; + for (uint8_t f = 0; f < n_funcs; f++) { + if (win_prepare_nullmap(result_vecs[f]) != RAY_OK) + nullmaps_ok = false; + } + + ray_pool_t* p3pool = ray_pool_get(); + if (p3pool && n_parts > 1 && nullmaps_ok) { + win_par_ctx_t pctx = { + .order_vecs = order_vecs, .n_order = n_order, + .func_vecs = func_vecs, .func_kinds = ext->window.func_kinds, + .func_params = ext->window.func_params, .n_funcs = n_funcs, + .frame_start = ext->window.frame_start, + .frame_end = ext->window.frame_end, + .sorted_idx = sorted_idx, .part_offsets = part_offsets, + .result_vecs = result_vecs, .is_f64 = is_f64, + }; + ray_pool_dispatch_n(p3pool, win_par_fn, &pctx, (uint32_t)n_parts); + } else { + for (int64_t p = 0; p < n_parts; p++) { + win_compute_partition( + order_vecs, n_order, + func_vecs, ext->window.func_kinds, ext->window.func_params, + n_funcs, ext->window.frame_start, ext->window.frame_end, + sorted_idx, part_offsets[p], part_offsets[p + 1], + result_vecs, is_f64); + } + } + + /* Set RAY_ATTR_HAS_NULLS on vectors that actually received nulls */ + for (uint8_t f = 0; f < n_funcs; f++) + win_finalize_nulls(result_vecs[f]); + } + + /* --- Phase 4: Build result table --- */ + ray_t* result = ray_table_new(ncols + n_funcs); + if (!result || RAY_IS_ERR(result)) { + for (uint8_t f = 0; f < n_funcs; f++) ray_release(result_vecs[f]); + scratch_free(poff_hdr); + scratch_free(indices_hdr); + goto oom; + } + + /* Pass-through original columns */ + for (int64_t c = 0; c < ncols; c++) { + ray_t* col = ray_table_get_col_idx(tbl, c); + if (!col) continue; + int64_t name_id = ray_table_col_name(tbl, c); + ray_retain(col); + result = ray_table_add_col(result, name_id, col); + ray_release(col); + } + + /* Add window result columns with auto-generated names */ + for (uint8_t f = 0; f < n_funcs; f++) { + char buf[16] = "_w"; + int pos = 2; + if (f >= 100) buf[pos++] = '0' + (f / 100); + if (f >= 10) buf[pos++] = '0' + ((f / 10) % 10); + buf[pos++] = '0' + (f % 10); + buf[pos] = '\0'; + int64_t name_id = ray_sym_intern(buf, (size_t)pos); + result = ray_table_add_col(result, name_id, result_vecs[f]); + ray_release(result_vecs[f]); + } + + scratch_free(poff_hdr); + if (radix_itmp_hdr) scratch_free(radix_itmp_hdr); + scratch_free(indices_hdr); + for (uint8_t k = 0; k < n_sort; k++) + if (win_enum_rank_hdrs[k]) scratch_free(win_enum_rank_hdrs[k]); + + /* Free owned key/func vectors */ + for (uint8_t k = 0; k < n_sort; k++) + if (sort_owned[k] && sort_vecs[k] && !RAY_IS_ERR(sort_vecs[k])) + ray_release(sort_vecs[k]); + for (uint8_t f = 0; f < n_funcs; f++) + if (func_owned[f] && func_vecs[f] && !RAY_IS_ERR(func_vecs[f])) + ray_release(func_vecs[f]); + + return result; + +oom: + if (radix_itmp_hdr) scratch_free(radix_itmp_hdr); + for (uint8_t k = 0; k < n_sort; k++) + if (win_enum_rank_hdrs[k]) scratch_free(win_enum_rank_hdrs[k]); + for (uint8_t k = 0; k < n_sort; k++) + if (sort_owned[k] && sort_vecs[k] && !RAY_IS_ERR(sort_vecs[k])) + ray_release(sort_vecs[k]); + for (uint8_t f = 0; f < n_funcs; f++) { + if (func_owned[f] && func_vecs[f] && !RAY_IS_ERR(func_vecs[f])) + ray_release(func_vecs[f]); + if (result_vecs[f] && !RAY_IS_ERR(result_vecs[f])) + ray_release(result_vecs[f]); + } + return ray_error("oom", NULL); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/store/col.c b/crates/rayforce-sys/vendor/rayforce/src/store/col.c new file mode 100644 index 0000000..e590ee7 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/store/col.c @@ -0,0 +1,954 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "col.h" +#include "core/platform.h" +#include "mem/heap.h" +#include "store/serde.h" +#include "store/fileio.h" +#include "table/sym.h" +#include "ops/idxop.h" +#include +#include +#include + +/* -------------------------------------------------------------------------- + * validate_sym_bounds -- check all indices in a RAY_SYM column are < sym_count + * + * Width-dispatched scan for maximum index. Returns RAY_ERR_CORRUPT if any + * index >= sym_count. Skipped when sym_count == 0 (allows raw column loads + * in tests without a sym file). + * -------------------------------------------------------------------------- */ + +static ray_err_t validate_sym_bounds(const void* data, int64_t len, + uint8_t attrs, uint32_t sym_count) { + if (sym_count == 0 || len == 0) return RAY_OK; + + uint64_t max_id = 0; + switch (attrs & RAY_SYM_W_MASK) { + case RAY_SYM_W8: { + const uint8_t* p = (const uint8_t*)data; + for (int64_t i = 0; i < len; i++) + if (p[i] > max_id) max_id = p[i]; + break; + } + case RAY_SYM_W16: { + const uint16_t* p = (const uint16_t*)data; + for (int64_t i = 0; i < len; i++) + if (p[i] > max_id) max_id = p[i]; + break; + } + case RAY_SYM_W32: { + const uint32_t* p = (const uint32_t*)data; + for (int64_t i = 0; i < len; i++) + if (p[i] > max_id) max_id = p[i]; + break; + } + case RAY_SYM_W64: { + const int64_t* p = (const int64_t*)data; + for (int64_t i = 0; i < len; i++) { + if (p[i] < 0) return RAY_ERR_CORRUPT; + if ((uint64_t)p[i] > max_id) max_id = (uint64_t)p[i]; + } + break; + } + default: + return RAY_ERR_CORRUPT; + } + + if (max_id >= sym_count) return RAY_ERR_CORRUPT; + return RAY_OK; +} + +/* Magic numbers for extended column formats */ +#define STR_LIST_MAGIC 0x4C525453U /* "STRL" */ +#define STR_VEC_MAGIC 0x56525453U /* "STRV" */ +#define LIST_MAGIC 0x4754534CU /* "LSTG" */ +#define TABLE_MAGIC 0x4C425454U /* "TTBL" */ + +/* -------------------------------------------------------------------------- + * Column file format: + * Bytes 0-15: nullmap (inline) or zeroed (ext_nullmap / no nulls) + * Bytes 16-31: mmod=0, order=0, type, attrs, rc=0, len + * Bytes 32+: raw element data + * (if RAY_ATTR_NULLMAP_EXT): appended (len+7)/8 bitmap bytes + * + * On-disk format IS the in-memory format (zero deserialization on load). + * -------------------------------------------------------------------------- */ + +/* Explicit allowlist of types that are safe to serialize as raw bytes. + * Only fixed-size scalar types -- pointer-bearing types (STR, LIST, TABLE) + * and non-scalar types are excluded. */ +static bool is_serializable_type(int8_t t) { + switch (t) { + case RAY_BOOL: case RAY_U8: case RAY_I16: + case RAY_I32: case RAY_I64: case RAY_F64: + case RAY_DATE: case RAY_TIME: case RAY_TIMESTAMP: case RAY_GUID: + case RAY_SYM: + return true; + default: + return false; + } +} + +/* -------------------------------------------------------------------------- + * String list detection: RAY_LIST whose elements are all -RAY_STR + * -------------------------------------------------------------------------- */ + +static bool is_str_list(ray_t* v) { + if (!v || RAY_IS_ERR(v)) return false; + if (v->type != RAY_LIST) return false; + ray_t** slots = (ray_t**)ray_data(v); + for (int64_t i = 0; i < v->len; i++) { + ray_t* elem = slots[i]; + if (!elem || RAY_IS_ERR(elem)) return false; + if (elem->type != -RAY_STR) return false; + } + return true; +} + +/* -------------------------------------------------------------------------- + * col_save_str_list -- serialize a list of string atoms + * + * Format: [4B magic "STRL"][8B count][for each: 4B len + data bytes] + * -------------------------------------------------------------------------- */ + +static ray_err_t col_save_str_list(ray_t* list, FILE* f) { + uint32_t magic = STR_LIST_MAGIC; + if (fwrite(&magic, 4, 1, f) != 1) return RAY_ERR_IO; + + int64_t count = list->len; + if (fwrite(&count, 8, 1, f) != 1) return RAY_ERR_IO; + + ray_t** slots = (ray_t**)ray_data(list); + for (int64_t i = 0; i < count; i++) { + ray_t* s = slots[i]; + const char* sp = ray_str_ptr(s); + size_t slen = ray_str_len(s); + uint32_t len32 = (uint32_t)slen; + if (fwrite(&len32, 4, 1, f) != 1) return RAY_ERR_IO; + if (slen > 0 && fwrite(sp, 1, slen, f) != slen) return RAY_ERR_IO; + } + return RAY_OK; +} + +/* -------------------------------------------------------------------------- + * col_load_str_list -- deserialize a string list from mapped data + * + * ptr points past the 4B magic. remaining = bytes available. + * -------------------------------------------------------------------------- */ + +static ray_t* col_load_str_list(const uint8_t* ptr, size_t remaining) { + if (remaining < 8) return ray_error("corrupt", NULL); + int64_t count; + memcpy(&count, ptr, 8); + ptr += 8; remaining -= 8; + + if (count < 0 || (uint64_t)count > remaining / 4) + return ray_error("corrupt", NULL); + + ray_t* list = ray_list_new(count); + if (!list || RAY_IS_ERR(list)) return list; + + for (int64_t i = 0; i < count; i++) { + if (remaining < 4) { ray_release(list); return ray_error("corrupt", NULL); } + uint32_t slen; + memcpy(&slen, ptr, 4); + ptr += 4; remaining -= 4; + + if (slen > remaining) { ray_release(list); return ray_error("corrupt", NULL); } + ray_t* s = ray_str((const char*)ptr, (size_t)slen); + if (!s || RAY_IS_ERR(s)) { ray_release(list); return s; } + ptr += slen; remaining -= slen; + + list = ray_list_append(list, s); + ray_release(s); /* list_append retains */ + if (!list || RAY_IS_ERR(list)) return list; + } + return list; +} + +/* -------------------------------------------------------------------------- + * col_save_str_vec -- serialize a RAY_STR vector with Rayforce serde + * + * RAY_STR columns carry a string pool through the header union, so they cannot + * use the raw 32-byte column layout. Reuse the object wire format here; it + * already preserves pooled strings and external null bitmaps. + * -------------------------------------------------------------------------- */ + +static ray_err_t col_save_str_vec(ray_t* vec, FILE* f) { + uint32_t magic = STR_VEC_MAGIC; + if (fwrite(&magic, 4, 1, f) != 1) return RAY_ERR_IO; + + int64_t len = ray_serde_size(vec); + if (len <= 0) return RAY_ERR_IO; + ray_t* bytes = ray_vec_new(RAY_U8, len); + if (!bytes || RAY_IS_ERR(bytes)) return RAY_ERR_OOM; + + int64_t wrote = ray_ser_raw((uint8_t*)ray_data(bytes), vec); + if (wrote != len) { + ray_release(bytes); + return RAY_ERR_IO; + } + + size_t out = fwrite(ray_data(bytes), 1, (size_t)len, f); + ray_release(bytes); + return out == (size_t)len ? RAY_OK : RAY_ERR_IO; +} + +static ray_t* col_load_str_vec(const uint8_t* ptr, size_t remaining) { + if (remaining > (size_t)INT64_MAX) return ray_error("range", NULL); + int64_t len = (int64_t)remaining; + ray_t* result = ray_de_raw((uint8_t*)ptr, &len); + if (!result || RAY_IS_ERR(result)) return result; + if (result->type != RAY_STR) { + ray_release(result); + return ray_error("type", NULL); + } + return result; +} + +/* -------------------------------------------------------------------------- + * Recursive element serialization for generic lists and tables + * + * Recursive element format: + * [1B type] + * atoms (type < 0): + * -RAY_STR: [4B len][data bytes] + * other: [8B raw value] + * vectors with is_serializable_type: [8B len][raw data] + * RAY_LIST: [8B count][recursive elements...] + * RAY_TABLE: [8B ncols][8B nrows][for each col: 8B name_sym + recursive col] + * -------------------------------------------------------------------------- */ + +static ray_err_t col_write_recursive(ray_t* obj, FILE* f); + +static ray_err_t col_write_recursive(ray_t* obj, FILE* f) { + if (!obj || RAY_IS_ERR(obj)) return RAY_ERR_TYPE; + + int8_t type = obj->type; + if (fwrite(&type, 1, 1, f) != 1) return RAY_ERR_IO; + + if (type < 0) { + /* Atom */ + if (type == -RAY_STR) { + const char* sp = ray_str_ptr(obj); + size_t slen = ray_str_len(obj); + uint32_t len32 = (uint32_t)slen; + if (fwrite(&len32, 4, 1, f) != 1) return RAY_ERR_IO; + if (slen > 0 && fwrite(sp, 1, slen, f) != slen) return RAY_ERR_IO; + } else { + /* Fixed-size atom: write 8 bytes of the value union */ + if (fwrite(&obj->i64, 8, 1, f) != 1) return RAY_ERR_IO; + } + return RAY_OK; + } + + if (is_serializable_type(type)) { + /* Fixed-size vector: write len + raw data. + * RAY_SYM: also write attrs byte (adaptive width W8/W16/W32/W64). */ + int64_t len = obj->len; + if (fwrite(&len, 8, 1, f) != 1) return RAY_ERR_IO; + if (type == RAY_SYM) { + uint8_t attrs = obj->attrs; + if (fwrite(&attrs, 1, 1, f) != 1) return RAY_ERR_IO; + } + uint8_t esz = ray_sym_elem_size(type, obj->attrs); + size_t data_size = (size_t)len * esz; + if (data_size > 0 && fwrite(ray_data(obj), 1, data_size, f) != data_size) + return RAY_ERR_IO; + return RAY_OK; + } + + if (type == RAY_LIST) { + int64_t count = obj->len; + if (fwrite(&count, 8, 1, f) != 1) return RAY_ERR_IO; + ray_t** slots = (ray_t**)ray_data(obj); + for (int64_t i = 0; i < count; i++) { + ray_err_t err = col_write_recursive(slots[i], f); + if (err != RAY_OK) return err; + } + return RAY_OK; + } + + if (type == RAY_TABLE) { + int64_t ncols = ray_table_ncols(obj); + int64_t nrows = ray_table_nrows(obj); + if (fwrite(&ncols, 8, 1, f) != 1) return RAY_ERR_IO; + if (fwrite(&nrows, 8, 1, f) != 1) return RAY_ERR_IO; + for (int64_t c = 0; c < ncols; c++) { + int64_t name_sym = ray_table_col_name(obj, c); + if (fwrite(&name_sym, 8, 1, f) != 1) return RAY_ERR_IO; + ray_t* col = ray_table_get_col_idx(obj, c); + ray_err_t err = col_write_recursive(col, f); + if (err != RAY_OK) return err; + } + return RAY_OK; + } + + return RAY_ERR_NYI; +} + +/* Read recursive element from mapped buffer */ +static ray_t* col_read_recursive(const uint8_t** pp, size_t* remaining); + +static ray_t* col_read_recursive(const uint8_t** pp, size_t* remaining) { + if (*remaining < 1) return ray_error("corrupt", NULL); + int8_t type; + memcpy(&type, *pp, 1); + *pp += 1; *remaining -= 1; + + if (type < 0) { + /* Atom */ + if (type == -RAY_STR) { + if (*remaining < 4) return ray_error("corrupt", NULL); + uint32_t slen; + memcpy(&slen, *pp, 4); + *pp += 4; *remaining -= 4; + if (slen > *remaining) return ray_error("corrupt", NULL); + ray_t* s = ray_str((const char*)*pp, (size_t)slen); + *pp += slen; *remaining -= slen; + return s; + } else { + /* Fixed atom: 8 bytes */ + if (*remaining < 8) return ray_error("corrupt", NULL); + int64_t val; + memcpy(&val, *pp, 8); + *pp += 8; *remaining -= 8; + + ray_t* atom = ray_alloc(0); + if (!atom || RAY_IS_ERR(atom)) return atom; + atom->type = type; + atom->i64 = val; + return atom; + } + } + + if (is_serializable_type(type)) { + /* Fixed-size vector */ + if (*remaining < 8) return ray_error("corrupt", NULL); + int64_t len; + memcpy(&len, *pp, 8); + *pp += 8; *remaining -= 8; + if (len < 0) return ray_error("corrupt", NULL); + + /* RAY_SYM: read attrs byte for adaptive width */ + uint8_t attrs = 0; + if (type == RAY_SYM) { + if (*remaining < 1) return ray_error("corrupt", NULL); + memcpy(&attrs, *pp, 1); + *pp += 1; *remaining -= 1; + } + + uint8_t esz = ray_sym_elem_size(type, attrs); + if (esz > 0 && (uint64_t)len > SIZE_MAX / esz) + return ray_error("corrupt", NULL); + size_t data_size = (size_t)len * esz; + if (data_size > *remaining) return ray_error("corrupt", NULL); + + ray_t* vec = (type == RAY_SYM) + ? ray_sym_vec_new(attrs & RAY_SYM_W_MASK, len) + : ray_vec_new(type, len); + if (!vec || RAY_IS_ERR(vec)) return vec; + vec->len = len; + if (data_size > 0) + memcpy(ray_data(vec), *pp, data_size); + *pp += data_size; *remaining -= data_size; + + if (type == RAY_SYM) { + uint32_t sc = ray_sym_count(); + ray_err_t ve = validate_sym_bounds(ray_data(vec), len, attrs, sc); + if (ve != RAY_OK) { ray_release(vec); return ray_error(ray_err_code_str(ve), NULL); } + } + return vec; + } + + if (type == RAY_LIST) { + if (*remaining < 8) return ray_error("corrupt", NULL); + int64_t count; + memcpy(&count, *pp, 8); + *pp += 8; *remaining -= 8; + if (count < 0) return ray_error("corrupt", NULL); + + ray_t* list = ray_list_new(count); + if (!list || RAY_IS_ERR(list)) return list; + for (int64_t i = 0; i < count; i++) { + ray_t* elem = col_read_recursive(pp, remaining); + if (!elem || RAY_IS_ERR(elem)) { ray_release(list); return elem; } + list = ray_list_append(list, elem); + ray_release(elem); + if (!list || RAY_IS_ERR(list)) return list; + } + return list; + } + + if (type == RAY_TABLE) { + if (*remaining < 16) return ray_error("corrupt", NULL); + int64_t ncols, nrows; + memcpy(&ncols, *pp, 8); + *pp += 8; *remaining -= 8; + memcpy(&nrows, *pp, 8); + *pp += 8; *remaining -= 8; + (void)nrows; /* nrows is reconstructed from columns */ + + if (ncols < 0) return ray_error("corrupt", NULL); + ray_t* tbl = ray_table_new(ncols); + if (!tbl || RAY_IS_ERR(tbl)) return tbl; + + for (int64_t c = 0; c < ncols; c++) { + if (*remaining < 8) { ray_release(tbl); return ray_error("corrupt", NULL); } + int64_t name_sym; + memcpy(&name_sym, *pp, 8); + *pp += 8; *remaining -= 8; + + ray_t* col = col_read_recursive(pp, remaining); + if (!col || RAY_IS_ERR(col)) { ray_release(tbl); return col; } + tbl = ray_table_add_col(tbl, name_sym, col); + ray_release(col); /* table_add_col retains */ + if (!tbl || RAY_IS_ERR(tbl)) return tbl; + } + return tbl; + } + + return ray_error("nyi", NULL); +} + +/* -------------------------------------------------------------------------- + * col_save_list -- serialize a generic RAY_LIST + * -------------------------------------------------------------------------- */ + +static ray_err_t col_save_list(ray_t* list, FILE* f) { + uint32_t magic = LIST_MAGIC; + if (fwrite(&magic, 4, 1, f) != 1) return RAY_ERR_IO; + return col_write_recursive(list, f); +} + +/* -------------------------------------------------------------------------- + * col_save_table -- serialize a RAY_TABLE + * -------------------------------------------------------------------------- */ + +static ray_err_t col_save_table(ray_t* tbl, FILE* f) { + uint32_t magic = TABLE_MAGIC; + if (fwrite(&magic, 4, 1, f) != 1) return RAY_ERR_IO; + return col_write_recursive(tbl, f); +} + +/* -------------------------------------------------------------------------- + * try_load_link_sidecar -- attach HAS_LINK to vec from `.link` + * + * Best-effort: missing sidecar, unreadable file, or empty contents leave + * vec as a plain int column. Only RAY_I32 / RAY_I64 columns are eligible. + * The sidecar holds the target table sym name in plain text; we intern it + * into the local sym table and write the resulting sym ID + HAS_LINK bit. + * Used by both ray_col_load (buddy-copy path) and ray_col_mmap (zero-copy + * path) so linked columns survive both load styles. + * -------------------------------------------------------------------------- */ +static void try_load_link_sidecar(ray_t* vec, const char* path) { + if (!vec || (vec->type != RAY_I32 && vec->type != RAY_I64)) return; + char link_path[1024]; + size_t plen = strlen(path); + if (plen + 6 >= sizeof(link_path)) return; + memcpy(link_path, path, plen); + memcpy(link_path + plen, ".link", 6); + FILE* lf = fopen(link_path, "rb"); + if (!lf) return; + char buf[256]; + size_t n = fread(buf, 1, sizeof(buf) - 1, lf); + fclose(lf); + while (n > 0 && (buf[n-1] == '\n' || buf[n-1] == '\r' + || buf[n-1] == ' ' || buf[n-1] == '\t' + || buf[n-1] == '\0')) n--; + if (n == 0) return; + int64_t target_sym = ray_sym_intern(buf, n); + if (target_sym < 0) return; + vec->link_target = target_sym; + vec->attrs |= RAY_ATTR_HAS_LINK; +} + +/* -------------------------------------------------------------------------- + * ray_col_save -- write a vector to a column file + * -------------------------------------------------------------------------- */ + +ray_err_t ray_col_save(ray_t* vec, const char* path) { + if (!vec || RAY_IS_ERR(vec)) return RAY_ERR_TYPE; + if (!path) return RAY_ERR_IO; + + /* Build temp path for crash-safe write: write tmp, fsync, atomic rename */ + char tmp_path[1024]; + if (snprintf(tmp_path, sizeof(tmp_path), "%s.tmp", path) >= (int)sizeof(tmp_path)) + return RAY_ERR_IO; + + /* String list: RAY_LIST of -RAY_STR atoms */ + if (is_str_list(vec)) { + FILE* f = fopen(tmp_path, "wb"); + if (!f) return RAY_ERR_IO; + ray_err_t err = col_save_str_list(vec, f); + fclose(f); + if (err != RAY_OK) { remove(tmp_path); return err; } + goto fsync_and_rename; + } + + /* String vector */ + if (vec->type == RAY_STR) { + FILE* f = fopen(tmp_path, "wb"); + if (!f) return RAY_ERR_IO; + ray_err_t err = col_save_str_vec(vec, f); + fclose(f); + if (err != RAY_OK) { remove(tmp_path); return err; } + goto fsync_and_rename; + } + + /* Generic list */ + if (vec->type == RAY_LIST) { + FILE* f = fopen(tmp_path, "wb"); + if (!f) return RAY_ERR_IO; + ray_err_t err = col_save_list(vec, f); + fclose(f); + if (err != RAY_OK) { remove(tmp_path); return err; } + goto fsync_and_rename; + } + + /* Table */ + if (vec->type == RAY_TABLE) { + FILE* f = fopen(tmp_path, "wb"); + if (!f) return RAY_ERR_IO; + ray_err_t err = col_save_table(vec, f); + fclose(f); + if (err != RAY_OK) { remove(tmp_path); return err; } + goto fsync_and_rename; + } + + /* Explicit allowlist of serializable types */ + if (!is_serializable_type(vec->type)) + return RAY_ERR_NYI; + + { + FILE* f = fopen(tmp_path, "wb"); + if (!f) return RAY_ERR_IO; + + /* Write a clean header (mmod=0, rc=0) */ + ray_t header; + memcpy(&header, vec, 32); + header.mmod = 0; + header.order = 0; + /* For RAY_SYM: store sym count in rc field (always 0 on disk otherwise). + * This serves as O(1) fast-reject metadata on load. */ + header.rc = (vec->type == RAY_SYM) ? ray_sym_count() : 0; + + /* HAS_INDEX rebase: an attached accelerator index displaces the + * 16-byte nullmap union with an index pointer. Persist the + * pre-attach state instead — strip HAS_INDEX, restore the saved + * NULLMAP_EXT bit, and copy the saved bitmap bytes back into the + * on-disk header. ext_for_append captures the saved ext-nullmap + * pointer so the bitmap append at end-of-write reads from the + * right place. */ + ray_t* ext_for_append = (vec->attrs & RAY_ATTR_NULLMAP_EXT) + ? vec->ext_nullmap : NULL; + if (vec->attrs & RAY_ATTR_HAS_INDEX) { + ray_index_t* ix = ray_index_payload(vec->index); + header.attrs &= ~RAY_ATTR_HAS_INDEX; + if (ix->saved_attrs & RAY_ATTR_NULLMAP_EXT) { + header.attrs |= RAY_ATTR_NULLMAP_EXT; + memcpy(&ext_for_append, &ix->saved_nullmap[0], + sizeof(ext_for_append)); + } else { + header.attrs &= ~RAY_ATTR_NULLMAP_EXT; + ext_for_append = NULL; + } + memcpy(header.nullmap, ix->saved_nullmap, 16); + } + + /* HAS_LINK rebase: target sym ID lives at header.nullmap[8..15], + * but sym IDs are process-local — the on-disk file would be + * useless across runs. Strip the bit and zero the slot; the + * sidecar `.link` file (written below after rename) carries the + * target table name in text form for portable restoration. */ + if (vec->attrs & RAY_ATTR_HAS_LINK) { + header.attrs &= (uint8_t)~RAY_ATTR_HAS_LINK; + memset(header.nullmap + 8, 0, 8); + } + + /* Clear slice field; preserve ext_nullmap flag for bitmap append */ + header.attrs &= ~RAY_ATTR_SLICE; + if (!(header.attrs & RAY_ATTR_HAS_NULLS)) { + memset(header.nullmap, 0, 16); + header.attrs &= ~RAY_ATTR_NULLMAP_EXT; + } else if (header.attrs & RAY_ATTR_NULLMAP_EXT) { + /* Ext bitmap appended after data; zero pointer bytes in header */ + memset(header.nullmap, 0, 16); + } + + size_t written = fwrite(&header, 1, 32, f); + if (written != 32) { fclose(f); remove(tmp_path); return RAY_ERR_IO; } + + /* Write data */ + if (vec->len < 0) { fclose(f); remove(tmp_path); return RAY_ERR_CORRUPT; } + uint8_t esz = ray_sym_elem_size(vec->type, vec->attrs); + if (esz == 0 && vec->len > 0) { fclose(f); remove(tmp_path); return RAY_ERR_TYPE; } + /* Overflow check: ensure len*esz fits in size_t with 32-byte header room */ + if ((uint64_t)vec->len > (SIZE_MAX - 32) / (esz ? esz : 1)) { + fclose(f); + remove(tmp_path); + return RAY_ERR_IO; + } + size_t data_size = (size_t)vec->len * esz; + + void* data; + if (vec->attrs & RAY_ATTR_SLICE) { + /* Validate slice bounds before computing data pointer */ + ray_t* parent = vec->slice_parent; + if (!parent || vec->slice_offset < 0 || + vec->slice_offset + vec->len > parent->len) { + fclose(f); + remove(tmp_path); + return RAY_ERR_IO; + } + data = (char*)ray_data(parent) + vec->slice_offset * esz; + } else { + data = ray_data(vec); + } + + if (data_size > 0) { + written = fwrite(data, 1, data_size, f); + if (written != data_size) { fclose(f); remove(tmp_path); return RAY_ERR_IO; } + } + + /* Append external nullmap bitmap after data. Use header.attrs + * (rebased above for HAS_INDEX) and ext_for_append (the + * effective ext_nullmap pointer, possibly extracted from the + * index's saved snapshot). */ + if ((vec->attrs & RAY_ATTR_HAS_NULLS) && + (header.attrs & RAY_ATTR_NULLMAP_EXT) && ext_for_append) { + size_t bitmap_len = ((size_t)vec->len + 7) / 8; + written = fwrite(ray_data(ext_for_append), 1, bitmap_len, f); + if (written != bitmap_len) { fclose(f); remove(tmp_path); return RAY_ERR_IO; } + } + + fclose(f); + } + +fsync_and_rename:; + /* Fsync temp file for durability */ + ray_fd_t tmp_fd = ray_file_open(tmp_path, RAY_OPEN_READ | RAY_OPEN_WRITE); + if (tmp_fd == RAY_FD_INVALID) { remove(tmp_path); return RAY_ERR_IO; } + ray_err_t err = ray_file_sync(tmp_fd); + ray_file_close(tmp_fd); + if (err != RAY_OK) { remove(tmp_path); return err; } + + /* Atomic rename: tmp -> final path */ + err = ray_file_rename(tmp_path, path); + if (err != RAY_OK) { remove(tmp_path); return err; } + + /* Linked-column sidecar: write `.link` containing the target + * table's sym name (text form) so it survives the per-process + * sym-ID re-assignment. Remove any stale `.link` from a previous + * save when the current vec is unlinked. */ + { + char link_path[1024]; + size_t plen = strlen(path); + if (plen + 6 < sizeof(link_path)) { + memcpy(link_path, path, plen); + memcpy(link_path + plen, ".link", 6); + if (vec->attrs & RAY_ATTR_HAS_LINK) { + ray_t* sym_str = ray_sym_str(vec->link_target); + const char* sp = sym_str ? ray_str_ptr(sym_str) : NULL; + size_t slen = sym_str ? ray_str_len(sym_str) : 0; + if (sp && slen > 0) { + char tmp_link[1024]; + memcpy(tmp_link, link_path, plen + 6); + if (plen + 10 < sizeof(tmp_link)) { + memcpy(tmp_link + plen + 5, ".tmp", 5); + FILE* lf = fopen(tmp_link, "wb"); + if (lf) { + size_t wrote = fwrite(sp, 1, slen, lf); + fclose(lf); + if (wrote == slen) { + ray_file_rename(tmp_link, link_path); + } else { + remove(tmp_link); + } + } + } + } + } else { + /* No link on this column — clean stale sidecar if any. */ + remove(link_path); + } + } + } + + return RAY_OK; +} + +/* -------------------------------------------------------------------------- + * col_validate_mapped -- shared validation for ray_col_load / ray_col_mmap + * + * Maps the file, validates header/type/bounds, and returns parsed metadata. + * On success, the mapping remains open (caller must unmap on error paths). + * Returns NULL on success, or an error ray_t* on failure (mapping already + * cleaned up in that case). + * -------------------------------------------------------------------------- */ + +typedef struct { + void* mapped; + size_t mapped_size; + ray_t* header; /* pointer into mapped region */ + uint8_t esz; + size_t data_size; + bool has_ext_nullmap; + size_t bitmap_len; +} col_mapped_t; + +static ray_t* col_validate_mapped(const char* path, col_mapped_t* out) { + size_t mapped_size = 0; + void* ptr = ray_vm_map_file(path, &mapped_size); + if (!ptr) return ray_error("io", NULL); + + if (mapped_size < 32) { + ray_vm_unmap_file(ptr, mapped_size); + return ray_error("corrupt", NULL); + } + + ray_t* hdr = (ray_t*)ptr; + + /* Validate type from untrusted file data -- allowlist only */ + if (!is_serializable_type(hdr->type)) { + ray_vm_unmap_file(ptr, mapped_size); + return ray_error("nyi", NULL); + } + if (hdr->len < 0) { + ray_vm_unmap_file(ptr, mapped_size); + return ray_error("corrupt", NULL); + } + + uint8_t esz = ray_sym_elem_size(hdr->type, hdr->attrs); + if (esz == 0 && hdr->len > 0) { + ray_vm_unmap_file(ptr, mapped_size); + return ray_error("type", NULL); + } + /* Overflow check: ensure len*esz fits in size_t with 32-byte header room */ + if ((uint64_t)hdr->len > (SIZE_MAX - 32) / (esz ? esz : 1)) { + ray_vm_unmap_file(ptr, mapped_size); + return ray_error("io", NULL); + } + size_t data_size = (size_t)hdr->len * esz; + if (32 + data_size > mapped_size) { + ray_vm_unmap_file(ptr, mapped_size); + return ray_error("corrupt", NULL); + } + + /* Check for appended ext_nullmap bitmap */ + bool has_ext_nullmap = (hdr->attrs & RAY_ATTR_HAS_NULLS) && + (hdr->attrs & RAY_ATTR_NULLMAP_EXT); + size_t bitmap_len = has_ext_nullmap ? ((size_t)hdr->len + 7) / 8 : 0; + if (has_ext_nullmap && 32 + data_size + bitmap_len > mapped_size) { + ray_vm_unmap_file(ptr, mapped_size); + return ray_error("corrupt", NULL); + } + + /* RAY_SYM: fast-reject via sym count in header rc field. + * Use memcpy (not atomic_load) since file data is not atomic storage. */ + if (hdr->type == RAY_SYM) { + uint32_t saved_sc; + memcpy(&saved_sc, (const char*)ptr + offsetof(ray_t, rc), sizeof(saved_sc)); + uint32_t cur_sc = ray_sym_count(); + if (saved_sc > 0 && cur_sc > 0 && cur_sc < saved_sc) { + ray_vm_unmap_file(ptr, mapped_size); + return ray_error("corrupt", NULL); + } + } + + out->mapped = ptr; + out->mapped_size = mapped_size; + out->header = hdr; + out->esz = esz; + out->data_size = data_size; + out->has_ext_nullmap = has_ext_nullmap; + out->bitmap_len = bitmap_len; + return NULL; /* success */ +} + +/* -------------------------------------------------------------------------- + * col_restore_ext_nullmap -- allocate buddy-backed copy of ext nullmap + * + * Shared by ray_col_load and ray_col_mmap. On success, sets vec->ext_nullmap. + * Returns NULL on success, or an error string on failure. + * -------------------------------------------------------------------------- */ + +static ray_t* col_restore_ext_nullmap(ray_t* vec, const col_mapped_t* cm) { + ray_t* ext = ray_vec_new(RAY_U8, (int64_t)cm->bitmap_len); + if (!ext || RAY_IS_ERR(ext)) return ray_error("oom", NULL); + ext->len = (int64_t)cm->bitmap_len; + memcpy(ray_data(ext), (char*)cm->mapped + 32 + cm->data_size, cm->bitmap_len); + vec->ext_nullmap = ext; + return NULL; /* success */ +} + +/* -------------------------------------------------------------------------- + * ray_col_load -- load a column file via mmap (zero deserialization) + * -------------------------------------------------------------------------- */ + +ray_t* ray_col_load(const char* path) { + if (!path) return ray_error("io", NULL); + + /* Read file into temp mmap for validation, then copy to buddy block. + * This avoids the mmap lifecycle problem (mmod=1 blocks are never freed). */ + size_t mapped_size = 0; + void* ptr = ray_vm_map_file(path, &mapped_size); + if (!ptr) return ray_error("io", NULL); + + /* Check for extended format magic numbers (first 4 bytes) */ + if (mapped_size >= 4) { + uint32_t magic; + memcpy(&magic, ptr, 4); + + if (magic == STR_LIST_MAGIC) { + ray_t* result = col_load_str_list((const uint8_t*)ptr + 4, mapped_size - 4); + ray_vm_unmap_file(ptr, mapped_size); + return result; + } + if (magic == STR_VEC_MAGIC) { + ray_t* result = col_load_str_vec((const uint8_t*)ptr + 4, mapped_size - 4); + ray_vm_unmap_file(ptr, mapped_size); + return result; + } + if (magic == LIST_MAGIC || magic == TABLE_MAGIC) { + const uint8_t* p = (const uint8_t*)ptr + 4; + size_t rem = mapped_size - 4; + ray_t* result = col_read_recursive(&p, &rem); + ray_vm_unmap_file(ptr, mapped_size); + return result; + } + } + /* Unmap the initial mapping; col_validate_mapped will re-map for validation */ + ray_vm_unmap_file(ptr, mapped_size); + + col_mapped_t cm = {0}; + ray_t* err = col_validate_mapped(path, &cm); + if (err) return err; + + /* Allocate buddy block and copy file data */ + ray_t* vec = ray_alloc(cm.data_size); + if (!vec || RAY_IS_ERR(vec)) { + ray_vm_unmap_file(cm.mapped, cm.mapped_size); + return vec ? vec : ray_error("oom", NULL); + } + uint8_t saved_order = vec->order; /* preserve buddy order */ + memcpy(vec, cm.mapped, 32 + cm.data_size); + + /* Restore external nullmap if present */ + if (cm.has_ext_nullmap) { + ray_t* ext_err = col_restore_ext_nullmap(vec, &cm); + if (ext_err) { + ray_vm_unmap_file(cm.mapped, cm.mapped_size); + ray_free(vec); + return ext_err; + } + } + + ray_vm_unmap_file(cm.mapped, cm.mapped_size); + + /* Fix up header for buddy-allocated block */ + vec->mmod = 0; + vec->order = saved_order; + vec->attrs &= ~RAY_ATTR_SLICE; + if (!cm.has_ext_nullmap) + vec->attrs &= ~RAY_ATTR_NULLMAP_EXT; + ray_atomic_store(&vec->rc, 1); + + /* RAY_SYM: validate sym count footer + bounds check */ + if (vec->type == RAY_SYM) { + ray_err_t sym_err = validate_sym_bounds(ray_data(vec), vec->len, + vec->attrs, ray_sym_count()); + if (sym_err != RAY_OK) { + ray_release(vec); + return ray_error(ray_err_code_str(sym_err), NULL); + } + } + + try_load_link_sidecar(vec, path); + + return vec; +} + +/* -------------------------------------------------------------------------- + * ray_col_mmap -- zero-copy column load via mmap (mmod=1) + * + * Returns a ray_t* backed directly by the file's mmap region. + * MAP_PRIVATE gives COW semantics -- only the header page gets a private + * copy when we write mmod/rc. All data pages stay shared with page cache. + * ray_release -> ray_free -> munmap. + * -------------------------------------------------------------------------- */ + +ray_t* ray_col_mmap(const char* path) { + if (!path) return ray_error("io", NULL); + + col_mapped_t cm = {0}; + ray_t* err = col_validate_mapped(path, &cm); + if (err) return err; + + /* Validate that file size matches expected layout exactly. + * ray_free() reconstructs the munmap size using the same formula. */ + size_t expected = 32 + cm.data_size + cm.bitmap_len; + if (expected != cm.mapped_size) { + ray_vm_unmap_file(cm.mapped, cm.mapped_size); + return ray_error("io", NULL); + } + + ray_t* vec = cm.header; + + /* RAY_SYM: bounds check on data */ + if (vec->type == RAY_SYM) { + ray_err_t sym_err = validate_sym_bounds( + (const char*)cm.mapped + 32, vec->len, vec->attrs, ray_sym_count()); + if (sym_err != RAY_OK) { + ray_vm_unmap_file(cm.mapped, cm.mapped_size); + return ray_error(ray_err_code_str(sym_err), NULL); + } + } + + /* Restore external nullmap: allocate buddy-backed copy + * (ext_nullmap must be a proper ray_t for ref counting) */ + if (cm.has_ext_nullmap) { + ray_t* ext_err = col_restore_ext_nullmap(vec, &cm); + if (ext_err) { + ray_vm_unmap_file(cm.mapped, cm.mapped_size); + return ext_err; + } + } + + /* Patch header -- MAP_PRIVATE COW: only the header page gets copied */ + vec->mmod = 1; + vec->order = 0; + vec->attrs &= ~RAY_ATTR_SLICE; + if (!cm.has_ext_nullmap) + vec->attrs &= ~RAY_ATTR_NULLMAP_EXT; + ray_atomic_store(&vec->rc, 1); + + /* Reattach link sidecar if present. Without this, linked columns + * round-tripped through splay-mmap (splay.c:184) lose HAS_LINK + * even though ray_col_load restores it. */ + try_load_link_sidecar(vec, path); + + return vec; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/store/col.h b/crates/rayforce-sys/vendor/rayforce/src/store/col.h new file mode 100644 index 0000000..55f492b --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/store/col.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_COL_H +#define RAY_COL_H + +#include + +/* Column file I/O */ +ray_err_t ray_col_save(ray_t* vec, const char* path); +ray_t* ray_col_load(const char* path); +ray_t* ray_col_mmap(const char* path); + +#endif /* RAY_COL_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/store/csr.c b/crates/rayforce-sys/vendor/rayforce/src/store/csr.c new file mode 100644 index 0000000..a978cdd --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/store/csr.c @@ -0,0 +1,529 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "csr.h" +#include "store/col.h" +#include "mem/sys.h" +#include +#include +#include +#include +#include + +/* Forward declaration */ +static void csr_free(ray_csr_t* csr); + +/* -------------------------------------------------------------------------- + * CSR construction helpers + * -------------------------------------------------------------------------- */ + +/* Pair for sorting edges */ +typedef struct { + int64_t src; + int64_t dst; + int64_t row; /* original row index for rowmap */ +} edge_pair_t; + +/* Comparison by src then dst */ +static int cmp_edge_by_src(const void* a, const void* b) { + const edge_pair_t* ea = (const edge_pair_t*)a; + const edge_pair_t* eb = (const edge_pair_t*)b; + if (ea->src < eb->src) return -1; + if (ea->src > eb->src) return 1; + if (ea->dst < eb->dst) return -1; + if (ea->dst > eb->dst) return 1; + return 0; +} + +/* Comparison by dst then src (for reverse CSR) */ +static int cmp_edge_by_dst(const void* a, const void* b) { + const edge_pair_t* ea = (const edge_pair_t*)a; + const edge_pair_t* eb = (const edge_pair_t*)b; + if (ea->dst < eb->dst) return -1; + if (ea->dst > eb->dst) return 1; + if (ea->src < eb->src) return -1; + if (ea->src > eb->src) return 1; + return 0; +} + +/* Sort targets within each adjacency list (for LFTJ) */ +static void csr_sort_adjacency_lists(ray_csr_t* csr) { + int64_t* offsets = (int64_t*)ray_data(csr->offsets); + int64_t* targets = (int64_t*)ray_data(csr->targets); + int64_t* rowmap = csr->rowmap ? (int64_t*)ray_data(csr->rowmap) : NULL; + + for (int64_t node = 0; node < csr->n_nodes; node++) { + int64_t start = offsets[node]; + int64_t end = offsets[node + 1]; + int64_t deg = end - start; + if (deg <= 1) continue; + + /* Simple insertion sort — adjacency lists are typically small */ + for (int64_t i = start + 1; i < end; i++) { + int64_t key = targets[i]; + int64_t row_key = rowmap ? rowmap[i] : 0; + int64_t j = i - 1; + while (j >= start && targets[j] > key) { + targets[j + 1] = targets[j]; + if (rowmap) rowmap[j + 1] = rowmap[j]; + j--; + } + targets[j + 1] = key; + if (rowmap) rowmap[j + 1] = row_key; + } + } +} + +/* Build CSR from sorted edge pairs. + * pairs must be sorted by the 'key' field (src for fwd, dst for rev). */ +static ray_err_t csr_build_from_pairs(edge_pair_t* pairs, int64_t n_edges, + int64_t n_nodes, bool is_reverse, + bool sort_targets, ray_csr_t* out) { + out->n_nodes = n_nodes; + out->props = NULL; + + /* Count valid edges (those within [0, n_nodes) range) */ + int64_t valid_edges = 0; + for (int64_t i = 0; i < n_edges; i++) { + int64_t key = is_reverse ? pairs[i].dst : pairs[i].src; + if (key >= 0 && key < n_nodes) valid_edges++; + } + out->n_edges = valid_edges; + + /* Allocate offsets (n_nodes + 1) */ + out->offsets = ray_vec_new(RAY_I64, n_nodes + 1); + if (!out->offsets || RAY_IS_ERR(out->offsets)) return RAY_ERR_OOM; + out->offsets->len = n_nodes + 1; + int64_t* off = (int64_t*)ray_data(out->offsets); + memset(off, 0, (size_t)(n_nodes + 1) * sizeof(int64_t)); + + /* Allocate targets */ + out->targets = ray_vec_new(RAY_I64, valid_edges > 0 ? valid_edges : 1); + if (!out->targets || RAY_IS_ERR(out->targets)) { + ray_release(out->offsets); out->offsets = NULL; + return RAY_ERR_OOM; + } + out->targets->len = valid_edges; + int64_t* tgt = (int64_t*)ray_data(out->targets); + + /* Allocate rowmap */ + out->rowmap = ray_vec_new(RAY_I64, valid_edges > 0 ? valid_edges : 1); + if (!out->rowmap || RAY_IS_ERR(out->rowmap)) { + ray_release(out->offsets); out->offsets = NULL; + ray_release(out->targets); out->targets = NULL; + return RAY_ERR_OOM; + } + out->rowmap->len = valid_edges; + int64_t* rmap = (int64_t*)ray_data(out->rowmap); + + /* Count degrees */ + for (int64_t i = 0; i < n_edges; i++) { + int64_t key = is_reverse ? pairs[i].dst : pairs[i].src; + if (key >= 0 && key < n_nodes) off[key + 1]++; + } + + /* Prefix sum */ + for (int64_t i = 1; i <= n_nodes; i++) + off[i] += off[i - 1]; + + /* Fill targets + rowmap using a position array */ + ray_t* pos_hdr = ray_alloc((size_t)(n_nodes > 0 ? n_nodes : 1) * sizeof(int64_t)); + if (!pos_hdr) { + ray_release(out->offsets); out->offsets = NULL; + ray_release(out->targets); out->targets = NULL; + ray_release(out->rowmap); out->rowmap = NULL; + return RAY_ERR_OOM; + } + int64_t* pos = (int64_t*)ray_data(pos_hdr); + if (n_nodes > 0) + memcpy(pos, off, (size_t)n_nodes * sizeof(int64_t)); + + for (int64_t i = 0; i < n_edges; i++) { + int64_t key = is_reverse ? pairs[i].dst : pairs[i].src; + int64_t val = is_reverse ? pairs[i].src : pairs[i].dst; + if (key >= 0 && key < n_nodes) { + int64_t p = pos[key]++; + tgt[p] = val; + rmap[p] = pairs[i].row; + } + } + ray_free(pos_hdr); + + /* Sort within adjacency lists if requested */ + if (sort_targets) { + csr_sort_adjacency_lists(out); + out->sorted = true; + } else { + out->sorted = false; + } + + return RAY_OK; +} + +/* -------------------------------------------------------------------------- + * ray_rel_from_edges — build from explicit edge table + * -------------------------------------------------------------------------- */ + +ray_rel_t* ray_rel_from_edges(ray_t* edge_table, + const char* src_col, const char* dst_col, + int64_t n_src_nodes, int64_t n_dst_nodes, + bool sort_targets) { + if (!edge_table || RAY_IS_ERR(edge_table) || edge_table->type != RAY_TABLE) + return NULL; + + int64_t src_sym = ray_sym_intern(src_col, strlen(src_col)); + int64_t dst_sym = ray_sym_intern(dst_col, strlen(dst_col)); + if (src_sym < 0 || dst_sym < 0) return NULL; /* sym intern OOM */ + + ray_t* src_vec = ray_table_get_col(edge_table, src_sym); + ray_t* dst_vec = ray_table_get_col(edge_table, dst_sym); + if (!src_vec || !dst_vec) return NULL; + if (src_vec->type != RAY_I64 || dst_vec->type != RAY_I64) return NULL; + + int64_t n_edges = src_vec->len; + if (n_edges != dst_vec->len) return NULL; + if (n_src_nodes < 0 || n_dst_nodes < 0) return NULL; + + /* Build edge pairs */ + ray_t* pairs_hdr = ray_alloc((size_t)n_edges * sizeof(edge_pair_t)); + if (!pairs_hdr) return NULL; + edge_pair_t* pairs = (edge_pair_t*)ray_data(pairs_hdr); + + int64_t* src_data = (int64_t*)ray_data(src_vec); + int64_t* dst_data = (int64_t*)ray_data(dst_vec); + for (int64_t i = 0; i < n_edges; i++) { + pairs[i].src = src_data[i]; + pairs[i].dst = dst_data[i]; + pairs[i].row = i; + } + + /* Allocate rel */ + ray_rel_t* rel = (ray_rel_t*)ray_sys_alloc(sizeof(ray_rel_t)); + if (!rel) { ray_free(pairs_hdr); return NULL; } + memset(rel, 0, sizeof(ray_rel_t)); + rel->name_sym = -1; + + /* Build forward CSR (sorted by src) */ + /* qsort is from libc, not an external dep */ + qsort(pairs, (size_t)n_edges, sizeof(edge_pair_t), cmp_edge_by_src); + ray_err_t err = csr_build_from_pairs(pairs, n_edges, n_src_nodes, false, + sort_targets, &rel->fwd); + if (err != RAY_OK) { + ray_free(pairs_hdr); + ray_sys_free(rel); + return NULL; + } + + /* Build reverse CSR (sorted by dst) */ + qsort(pairs, (size_t)n_edges, sizeof(edge_pair_t), cmp_edge_by_dst); + err = csr_build_from_pairs(pairs, n_edges, n_dst_nodes, true, + sort_targets, &rel->rev); + if (err != RAY_OK) { + ray_free(pairs_hdr); + csr_free(&rel->fwd); + ray_sys_free(rel); + return NULL; + } + + ray_free(pairs_hdr); + return rel; +} + +/* -------------------------------------------------------------------------- + * ray_rel_build — build from FK column in source table + * -------------------------------------------------------------------------- */ + +ray_rel_t* ray_rel_build(ray_t* from_table, const char* fk_col, + int64_t n_target_nodes, bool sort_targets) { + if (!from_table || RAY_IS_ERR(from_table) || from_table->type != RAY_TABLE) + return NULL; + + int64_t fk_sym = ray_sym_intern(fk_col, strlen(fk_col)); + ray_t* fk_vec = ray_table_get_col(from_table, fk_sym); + if (!fk_vec || fk_vec->type != RAY_I64) return NULL; + if (n_target_nodes < 0) return NULL; + + int64_t n_edges = fk_vec->len; + int64_t n_src_nodes = ray_table_nrows(from_table); + + /* Build edge pairs: src = row index, dst = fk value */ + ray_t* pairs_hdr = ray_alloc((size_t)n_edges * sizeof(edge_pair_t)); + if (!pairs_hdr) return NULL; + edge_pair_t* pairs = (edge_pair_t*)ray_data(pairs_hdr); + + int64_t* fk_data = (int64_t*)ray_data(fk_vec); + for (int64_t i = 0; i < n_edges; i++) { + pairs[i].src = i; + pairs[i].dst = fk_data[i]; + pairs[i].row = i; + } + + ray_rel_t* rel = (ray_rel_t*)ray_sys_alloc(sizeof(ray_rel_t)); + if (!rel) { ray_free(pairs_hdr); return NULL; } + memset(rel, 0, sizeof(ray_rel_t)); + rel->name_sym = -1; + + /* Build forward CSR */ + qsort(pairs, (size_t)n_edges, sizeof(edge_pair_t), cmp_edge_by_src); + ray_err_t err = csr_build_from_pairs(pairs, n_edges, n_src_nodes, false, + sort_targets, &rel->fwd); + if (err != RAY_OK) { + ray_free(pairs_hdr); + ray_sys_free(rel); + return NULL; + } + + /* Build reverse CSR */ + qsort(pairs, (size_t)n_edges, sizeof(edge_pair_t), cmp_edge_by_dst); + err = csr_build_from_pairs(pairs, n_edges, n_target_nodes, true, + sort_targets, &rel->rev); + if (err != RAY_OK) { + ray_free(pairs_hdr); + csr_free(&rel->fwd); + ray_sys_free(rel); + return NULL; + } + + ray_free(pairs_hdr); + return rel; +} + +/* -------------------------------------------------------------------------- + * CSR free + * -------------------------------------------------------------------------- */ + +static void csr_free(ray_csr_t* csr) { + if (csr->offsets) ray_release(csr->offsets); + if (csr->targets) ray_release(csr->targets); + if (csr->rowmap) ray_release(csr->rowmap); + if (csr->props) ray_release(csr->props); + csr->offsets = NULL; + csr->targets = NULL; + csr->rowmap = NULL; + csr->props = NULL; +} + +void ray_rel_set_props(ray_rel_t* rel, ray_t* props) { + if (!rel || !props) return; + /* Retain twice: fwd.props and rev.props both alias the same pointer, + * and csr_free() releases each independently. */ + ray_retain(props); + ray_retain(props); + if (rel->fwd.props) ray_release(rel->fwd.props); + if (rel->rev.props) ray_release(rel->rev.props); + rel->fwd.props = props; + rel->rev.props = props; +} + +void ray_rel_free(ray_rel_t* rel) { + if (!rel) return; + csr_free(&rel->fwd); + csr_free(&rel->rev); + ray_sys_free(rel); +} + +/* --- Public CSR neighbor access ------------------------------------------- */ + +const int64_t* ray_rel_neighbors(ray_rel_t* rel, int64_t node, + uint8_t direction, int64_t* out_count) { + if (!rel) { if (out_count) *out_count = 0; return NULL; } + ray_csr_t* csr = (direction == 1) ? &rel->rev : &rel->fwd; + return ray_csr_neighbors(csr, node, out_count); +} + +int64_t ray_rel_n_nodes(ray_rel_t* rel, uint8_t direction) { + if (!rel) return 0; + ray_csr_t* csr = (direction == 1) ? &rel->rev : &rel->fwd; + return csr->n_nodes; +} + +/* -------------------------------------------------------------------------- + * CSR persistence — save/load/mmap using existing column file format + * -------------------------------------------------------------------------- */ + + +static ray_err_t csr_save(ray_csr_t* csr, const char* dir, const char* prefix) { + char path[1024]; + int len; + + len = snprintf(path, sizeof(path), "%s/%s_offsets", dir, prefix); + if (len < 0 || (size_t)len >= sizeof(path)) return RAY_ERR_IO; + ray_err_t err = ray_col_save(csr->offsets, path); + if (err != RAY_OK) return err; + + len = snprintf(path, sizeof(path), "%s/%s_targets", dir, prefix); + if (len < 0 || (size_t)len >= sizeof(path)) return RAY_ERR_IO; + err = ray_col_save(csr->targets, path); + if (err != RAY_OK) return err; + + if (csr->rowmap) { + len = snprintf(path, sizeof(path), "%s/%s_rowmap", dir, prefix); + if (len < 0 || (size_t)len >= sizeof(path)) return RAY_ERR_IO; + err = ray_col_save(csr->rowmap, path); + if (err != RAY_OK) return err; + } + + return RAY_OK; +} + +static ray_err_t csr_load_impl(ray_csr_t* csr, const char* dir, const char* prefix, + bool use_mmap) { + char path[1024]; + int len; + + len = snprintf(path, sizeof(path), "%s/%s_offsets", dir, prefix); + if (len < 0 || (size_t)len >= sizeof(path)) return RAY_ERR_IO; + csr->offsets = use_mmap ? ray_col_mmap(path) : ray_col_load(path); + if (!csr->offsets || RAY_IS_ERR(csr->offsets)) { + csr->offsets = NULL; + return RAY_ERR_IO; + } + + len = snprintf(path, sizeof(path), "%s/%s_targets", dir, prefix); + if (len < 0 || (size_t)len >= sizeof(path)) return RAY_ERR_IO; + csr->targets = use_mmap ? ray_col_mmap(path) : ray_col_load(path); + if (!csr->targets || RAY_IS_ERR(csr->targets)) { + ray_release(csr->offsets); csr->offsets = NULL; + csr->targets = NULL; + return RAY_ERR_IO; + } + + len = snprintf(path, sizeof(path), "%s/%s_rowmap", dir, prefix); + if (len < 0 || (size_t)len >= sizeof(path)) return RAY_ERR_IO; + csr->rowmap = use_mmap ? ray_col_mmap(path) : ray_col_load(path); + if (!csr->rowmap || RAY_IS_ERR(csr->rowmap)) { + /* rowmap is optional — ignore error */ + csr->rowmap = NULL; + } + + if (csr->offsets->len < 1) { + ray_release(csr->offsets); csr->offsets = NULL; + ray_release(csr->targets); csr->targets = NULL; + if (csr->rowmap) { ray_release(csr->rowmap); csr->rowmap = NULL; } + return RAY_ERR_IO; + } + csr->n_nodes = csr->offsets->len - 1; + csr->n_edges = csr->targets->len; + + /* Consistency: offsets[n_nodes] must equal targets->len */ + int64_t* off_data = (int64_t*)ray_data(csr->offsets); + if (off_data[csr->n_nodes] != csr->n_edges) { + ray_release(csr->offsets); csr->offsets = NULL; + ray_release(csr->targets); csr->targets = NULL; + if (csr->rowmap) { ray_release(csr->rowmap); csr->rowmap = NULL; } + return RAY_ERR_IO; + } + + /* Validate offset monotonicity: offsets[i] <= offsets[i+1] */ + for (int64_t i = 0; i < csr->n_nodes; i++) { + if (off_data[i] < 0 || off_data[i] > off_data[i + 1]) { + ray_release(csr->offsets); csr->offsets = NULL; + ray_release(csr->targets); csr->targets = NULL; + if (csr->rowmap) { ray_release(csr->rowmap); csr->rowmap = NULL; } + return RAY_ERR_IO; /* corrupt: non-monotonic offsets */ + } + } + + csr->sorted = false; /* caller sets if known */ + csr->props = NULL; + + return RAY_OK; +} + +ray_err_t ray_rel_save(ray_rel_t* rel, const char* dir) { + if (!rel || !dir) return RAY_ERR_IO; + + /* Create directory */ + if (mkdir(dir, 0755) != 0 && errno != EEXIST) return RAY_ERR_IO; + + ray_err_t err = csr_save(&rel->fwd, dir, "fwd"); + if (err != RAY_OK) return err; + + err = csr_save(&rel->rev, dir, "rev"); + if (err != RAY_OK) return err; + + /* Save metadata (from_table, to_table, name_sym, sorted flags) */ + char path[1024]; + int len = snprintf(path, sizeof(path), "%s/meta", dir); + if (len < 0 || (size_t)len >= sizeof(path)) return RAY_ERR_IO; + + /* Pack metadata into an I64 vector: [from_table, to_table, name_sym, fwd_sorted, rev_sorted] */ + int64_t meta_data[5]; + meta_data[0] = (int64_t)rel->from_table; + meta_data[1] = (int64_t)rel->to_table; + meta_data[2] = rel->name_sym; + meta_data[3] = rel->fwd.sorted ? 1 : 0; + meta_data[4] = rel->rev.sorted ? 1 : 0; + ray_t* meta_vec = ray_vec_from_raw(RAY_I64, meta_data, 5); + if (!meta_vec || RAY_IS_ERR(meta_vec)) return RAY_ERR_OOM; + err = ray_col_save(meta_vec, path); + ray_release(meta_vec); + + return err; +} + +static ray_rel_t* rel_load_impl(const char* dir, bool use_mmap) { + if (!dir) return NULL; + + ray_rel_t* rel = (ray_rel_t*)ray_sys_alloc(sizeof(ray_rel_t)); + if (!rel) return NULL; + memset(rel, 0, sizeof(ray_rel_t)); + + ray_err_t err = csr_load_impl(&rel->fwd, dir, "fwd", use_mmap); + if (err != RAY_OK) { ray_sys_free(rel); return NULL; } + + err = csr_load_impl(&rel->rev, dir, "rev", use_mmap); + if (err != RAY_OK) { + csr_free(&rel->fwd); + ray_sys_free(rel); + return NULL; + } + + /* Load metadata */ + char path[1024]; + int len = snprintf(path, sizeof(path), "%s/meta", dir); + if (len >= 0 && (size_t)len < sizeof(path)) { + ray_t* meta = use_mmap ? ray_col_mmap(path) : ray_col_load(path); + if (meta && !RAY_IS_ERR(meta) && meta->len >= 5) { + int64_t* md = (int64_t*)ray_data(meta); + rel->from_table = (uint16_t)md[0]; + rel->to_table = (uint16_t)md[1]; + rel->name_sym = md[2]; + rel->fwd.sorted = md[3] != 0; + rel->rev.sorted = md[4] != 0; + ray_release(meta); + } else if (meta && !RAY_IS_ERR(meta)) { + ray_release(meta); + } + } + + return rel; +} + +ray_rel_t* ray_rel_load(const char* dir) { + return rel_load_impl(dir, false); +} + +ray_rel_t* ray_rel_mmap(const char* dir) { + return rel_load_impl(dir, true); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/store/csr.h b/crates/rayforce-sys/vendor/rayforce/src/store/csr.h new file mode 100644 index 0000000..ece3a53 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/store/csr.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_CSR_H +#define RAY_CSR_H + +#include + +/* Compressed Sparse Row edge index. + * + * offsets[i]..offsets[i+1] gives the range in targets[] for node i's neighbors. + * Stored as ray_t I64 vectors — same allocator, mmap, COW as everything else. + * + * If sorted == true, targets within each adjacency list are sorted ascending. + * Required for OP_WCO_JOIN (Leapfrog Triejoin). + */ +typedef struct ray_csr { + ray_t* offsets; /* I64 vec, length = n_nodes + 1 */ + ray_t* targets; /* I64 vec, length = n_edges */ + ray_t* rowmap; /* I64 vec, length = n_edges (CSR pos -> prop row)*/ + ray_t* props; /* optional edge property table (ray_t RAY_TABLE) */ + int64_t n_nodes; + int64_t n_edges; + bool sorted; /* targets sorted per adjacency list */ +} ray_csr_t; + +/* Relationship: double-indexed CSR (forward + reverse). + * + * from_table/to_table are opaque IDs assigned by the caller (planner). + * librayforce does not manage a table registry -- it just stores the IDs + * so the caller can identify which tables this rel connects. + */ +typedef struct ray_rel { + uint16_t from_table; + uint16_t to_table; + int64_t name_sym; /* relationship name as symbol ID */ + ray_csr_t fwd; /* src -> dst */ + ray_csr_t rev; /* dst -> src */ +} ray_rel_t; + +/* O(1) neighbor range lookup — caller must ensure node is in [0, n_nodes). */ +static inline int64_t ray_csr_degree(ray_csr_t* csr, int64_t node) { + if (!csr || !csr->offsets || node < 0 || node >= csr->n_nodes) return 0; + int64_t* o = (int64_t*)ray_data(csr->offsets); + return o[node + 1] - o[node]; +} + +static inline int64_t* ray_csr_neighbors(ray_csr_t* csr, int64_t node, int64_t* out_count) { + if (!csr || !csr->offsets || !csr->targets || node < 0 || node >= csr->n_nodes) { + if (out_count) *out_count = 0; + return NULL; + } + int64_t* o = (int64_t*)ray_data(csr->offsets); + int64_t* t = (int64_t*)ray_data(csr->targets); + *out_count = o[node + 1] - o[node]; + return &t[o[node]]; +} + +#endif /* RAY_CSR_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/store/fileio.c b/crates/rayforce-sys/vendor/rayforce/src/store/fileio.c new file mode 100644 index 0000000..8586c13 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/store/fileio.c @@ -0,0 +1,270 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "fileio.h" + +#include + +/* PATH_MAX is mandated on POSIX (typically 4096 on Linux); Windows + * caps at MAX_PATH = 260 unless long-path support is enabled. Use the + * larger of the two when known so callers passing deep splayed paths + * (e.g. /db/yyyy.mm.dd/table/) don't silently truncate. */ +#ifdef RAY_OS_WINDOWS +# define RAY_PATH_MAX 4096 +#elif defined(PATH_MAX) +# define RAY_PATH_MAX PATH_MAX +#else +# define RAY_PATH_MAX 4096 +#endif + +#ifdef RAY_OS_WINDOWS + +#include + +/* ===== Windows implementation ===== */ + +/* Translate GetLastError() into errno so callers can use errno portably. */ +static void win_set_errno(void) { + DWORD e = GetLastError(); + switch (e) { + case ERROR_FILE_NOT_FOUND: + case ERROR_PATH_NOT_FOUND: errno = ENOENT; break; + case ERROR_ACCESS_DENIED: errno = EACCES; break; + case ERROR_WRITE_PROTECT: errno = EROFS; break; + case ERROR_TOO_MANY_OPEN_FILES: errno = EMFILE; break; + case ERROR_FILE_EXISTS: + case ERROR_ALREADY_EXISTS: errno = EEXIST; break; + default: errno = EIO; break; + } +} + +ray_fd_t ray_file_open(const char* path, int flags) { + if (!path) return RAY_FD_INVALID; + + DWORD access = 0; + DWORD creation = OPEN_EXISTING; + + if (flags & RAY_OPEN_READ) access |= GENERIC_READ; + if (flags & RAY_OPEN_WRITE) access |= GENERIC_WRITE; + if (flags & RAY_OPEN_CREATE) creation = OPEN_ALWAYS; + + HANDLE h = CreateFileA(path, access, FILE_SHARE_READ | FILE_SHARE_WRITE, + NULL, creation, FILE_ATTRIBUTE_NORMAL, NULL); + if (h == INVALID_HANDLE_VALUE) win_set_errno(); + return h; +} + +void ray_file_close(ray_fd_t fd) { + if (fd != RAY_FD_INVALID) CloseHandle(fd); +} + +ray_err_t ray_file_lock_ex(ray_fd_t fd) { + if (fd == RAY_FD_INVALID) return RAY_ERR_IO; + OVERLAPPED ov = {0}; + if (!LockFileEx(fd, LOCKFILE_EXCLUSIVE_LOCK, 0, MAXDWORD, MAXDWORD, &ov)) + return RAY_ERR_IO; + return RAY_OK; +} + +ray_err_t ray_file_lock_sh(ray_fd_t fd) { + if (fd == RAY_FD_INVALID) return RAY_ERR_IO; + OVERLAPPED ov = {0}; + if (!LockFileEx(fd, 0, 0, MAXDWORD, MAXDWORD, &ov)) + return RAY_ERR_IO; + return RAY_OK; +} + +ray_err_t ray_file_unlock(ray_fd_t fd) { + if (fd == RAY_FD_INVALID) return RAY_OK; + OVERLAPPED ov = {0}; + if (!UnlockFileEx(fd, 0, MAXDWORD, MAXDWORD, &ov)) + return RAY_ERR_IO; + return RAY_OK; +} + +ray_err_t ray_file_sync(ray_fd_t fd) { + if (fd == RAY_FD_INVALID) return RAY_ERR_IO; + if (!FlushFileBuffers(fd)) return RAY_ERR_IO; + return RAY_OK; +} + +ray_err_t ray_file_sync_dir(const char* path) { + /* Windows: rename durability is handled by MOVEFILE_WRITE_THROUGH in + * ray_file_rename; no separate directory fsync needed. */ + (void)path; + return RAY_OK; +} + +ray_err_t ray_file_rename(const char* old_path, const char* new_path) { + if (!old_path || !new_path) return RAY_ERR_IO; + /* MOVEFILE_WRITE_THROUGH flushes the rename to disk before returning, + * providing crash-safe durability equivalent to POSIX fsync-after-rename. */ + if (!MoveFileExA(old_path, new_path, + MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH)) + return RAY_ERR_IO; + return RAY_OK; +} + +ray_err_t ray_mkdir(const char* path) { + if (!path) return RAY_ERR_IO; + if (!CreateDirectoryA(path, NULL)) { + if (GetLastError() != ERROR_ALREADY_EXISTS) return RAY_ERR_IO; + } + return RAY_OK; +} + +ray_err_t ray_mkdir_p(const char* path) { + if (!path || !*path) return RAY_ERR_IO; + char buf[RAY_PATH_MAX]; + size_t len = strlen(path); + if (len >= sizeof(buf)) return RAY_ERR_IO; + memcpy(buf, path, len + 1); + /* Normalize trailing separator: trim it so the loop creates `buf` itself. */ + while (len > 1 && (buf[len - 1] == '/' || buf[len - 1] == '\\')) buf[--len] = '\0'; + for (size_t i = 1; i < len; i++) { + if (buf[i] == '/' || buf[i] == '\\') { + char saved = buf[i]; + buf[i] = '\0'; + if (!CreateDirectoryA(buf, NULL) && GetLastError() != ERROR_ALREADY_EXISTS) { + buf[i] = saved; + return RAY_ERR_IO; + } + buf[i] = saved; + } + } + if (!CreateDirectoryA(buf, NULL) && GetLastError() != ERROR_ALREADY_EXISTS) return RAY_ERR_IO; + return RAY_OK; +} + +#else + +/* ===== POSIX implementation ===== */ + +#include +#include +#include +#include +#include +#include +#include + +ray_fd_t ray_file_open(const char* path, int flags) { + if (!path) return RAY_FD_INVALID; + + int oflags = 0; + if ((flags & RAY_OPEN_READ) && (flags & RAY_OPEN_WRITE)) + oflags = O_RDWR; + else if (flags & RAY_OPEN_WRITE) + oflags = O_WRONLY; + else + oflags = O_RDONLY; + + if (flags & RAY_OPEN_CREATE) oflags |= O_CREAT; + + return open(path, oflags, 0644); +} + +void ray_file_close(ray_fd_t fd) { + if (fd != RAY_FD_INVALID) close(fd); +} + +ray_err_t ray_file_lock_ex(ray_fd_t fd) { + if (fd == RAY_FD_INVALID) return RAY_ERR_IO; + if (flock(fd, LOCK_EX) != 0) return RAY_ERR_IO; + return RAY_OK; +} + +ray_err_t ray_file_lock_sh(ray_fd_t fd) { + if (fd == RAY_FD_INVALID) return RAY_ERR_IO; + if (flock(fd, LOCK_SH) != 0) return RAY_ERR_IO; + return RAY_OK; +} + +ray_err_t ray_file_unlock(ray_fd_t fd) { + if (fd == RAY_FD_INVALID) return RAY_OK; + if (flock(fd, LOCK_UN) != 0) return RAY_ERR_IO; + return RAY_OK; +} + +ray_err_t ray_file_sync(ray_fd_t fd) { + if (fd == RAY_FD_INVALID) return RAY_ERR_IO; + if (fsync(fd) != 0) return RAY_ERR_IO; + return RAY_OK; +} + +ray_err_t ray_file_sync_dir(const char* path) { + if (!path) return RAY_ERR_IO; + /* Extract parent directory from path */ + char dir[1024]; + size_t len = strlen(path); + if (len >= sizeof(dir)) return RAY_ERR_IO; + memcpy(dir, path, len + 1); + /* Find last '/' */ + char* slash = strrchr(dir, '/'); + if (slash) { + if (slash == dir) + dir[1] = '\0'; /* root directory */ + else + *slash = '\0'; + } else { + dir[0] = '.'; dir[1] = '\0'; /* current directory */ + } + int fd = open(dir, O_RDONLY); + if (fd < 0) return RAY_ERR_IO; + int rc = fsync(fd); + close(fd); + return (rc == 0) ? RAY_OK : RAY_ERR_IO; +} + +ray_err_t ray_file_rename(const char* old_path, const char* new_path) { + if (!old_path || !new_path) return RAY_ERR_IO; + if (rename(old_path, new_path) != 0) return RAY_ERR_IO; + return RAY_OK; +} + +ray_err_t ray_mkdir(const char* path) { + if (!path) return RAY_ERR_IO; + if (mkdir(path, 0755) != 0 && errno != EEXIST) return RAY_ERR_IO; + return RAY_OK; +} + +ray_err_t ray_mkdir_p(const char* path) { + if (!path || !*path) return RAY_ERR_IO; + char buf[RAY_PATH_MAX]; + size_t len = strlen(path); + if (len >= sizeof(buf)) return RAY_ERR_IO; + memcpy(buf, path, len + 1); + /* Strip trailing slash so the final mkdir creates `buf` itself. */ + while (len > 1 && buf[len - 1] == '/') buf[--len] = '\0'; + for (size_t i = 1; i < len; i++) { + if (buf[i] == '/') { + buf[i] = '\0'; + if (mkdir(buf, 0755) != 0 && errno != EEXIST) return RAY_ERR_IO; + buf[i] = '/'; + } + } + if (mkdir(buf, 0755) != 0 && errno != EEXIST) return RAY_ERR_IO; + return RAY_OK; +} + +#endif diff --git a/crates/rayforce-sys/vendor/rayforce/src/store/fileio.h b/crates/rayforce-sys/vendor/rayforce/src/store/fileio.h new file mode 100644 index 0000000..658e560 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/store/fileio.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_FILEIO_H +#define RAY_FILEIO_H + +#include + +/* Cross-platform file I/O (locking, sync, atomic rename) */ +#ifdef RAY_OS_WINDOWS + #include + typedef HANDLE ray_fd_t; + #define RAY_FD_INVALID INVALID_HANDLE_VALUE +#else + typedef int ray_fd_t; + #define RAY_FD_INVALID (-1) +#endif + +#define RAY_OPEN_READ 0x01 +#define RAY_OPEN_WRITE 0x02 +#define RAY_OPEN_CREATE 0x04 + +ray_fd_t ray_file_open(const char* path, int flags); +void ray_file_close(ray_fd_t fd); +ray_err_t ray_file_lock_ex(ray_fd_t fd); +ray_err_t ray_file_lock_sh(ray_fd_t fd); +ray_err_t ray_file_unlock(ray_fd_t fd); +ray_err_t ray_file_sync(ray_fd_t fd); +ray_err_t ray_file_sync_dir(const char* path); +ray_err_t ray_file_rename(const char* old_path, const char* new_path); +ray_err_t ray_mkdir(const char* path); +ray_err_t ray_mkdir_p(const char* path); /* like `mkdir -p` */ + +#endif /* RAY_FILEIO_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/store/hnsw.c b/crates/rayforce-sys/vendor/rayforce/src/store/hnsw.c new file mode 100644 index 0000000..dc939a4 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/store/hnsw.c @@ -0,0 +1,972 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "hnsw.h" +#include "mem/sys.h" +#include +#include +#include +#include +#include +#include + +/* -------------------------------------------------------------------------- + * Distance dispatch — each metric maps to a scalar where lower = closer, + * as required by the HNSW beam search. + * + * COSINE → 1 - cos(a, b) (range [0, 2]) + * L2 → sqrt(Σ (a_i - b_i)^2) (Euclidean) + * IP → -dot(a, b) (negated so lower=closer) + * + * Note on L2: we keep the sqrt (true Euclidean), even + * though omitting it preserves ordering. The sqrt cost is dominated by + * the inner loop on modern cores, and returning true distances avoids + * surprising callers who compare against thresholds. + * -------------------------------------------------------------------------- */ + +static double hnsw_cosine_dist(const float* a, const float* b, int32_t dim) { + double dot = 0.0, na = 0.0, nb = 0.0; + for (int32_t i = 0; i < dim; i++) { + dot += (double)a[i] * b[i]; + na += (double)a[i] * a[i]; + nb += (double)b[i] * b[i]; + } + double denom = sqrt(na) * sqrt(nb); + return (denom > 0.0) ? 1.0 - dot / denom : 1.0; +} + +static double hnsw_l2_dist(const float* a, const float* b, int32_t dim) { + double s = 0.0; + for (int32_t i = 0; i < dim; i++) { + double d = (double)a[i] - (double)b[i]; + s += d * d; + } + return sqrt(s); +} + +static double hnsw_ip_dist(const float* a, const float* b, int32_t dim) { + double dot = 0.0; + for (int32_t i = 0; i < dim; i++) { + dot += (double)a[i] * (double)b[i]; + } + return -dot; +} + +static double hnsw_dist(const ray_hnsw_t* idx, const float* a, const float* b) { + switch ((ray_hnsw_metric_t)idx->metric) { + case RAY_HNSW_L2: return hnsw_l2_dist(a, b, idx->dim); + case RAY_HNSW_IP: return hnsw_ip_dist(a, b, idx->dim); + case RAY_HNSW_COSINE: /* fallthrough */ + default: return hnsw_cosine_dist(a, b, idx->dim); + } +} + +/* -------------------------------------------------------------------------- + * Random level assignment (HNSW paper, Section 3.1) + * -------------------------------------------------------------------------- */ + +static _Thread_local uint32_t hnsw_rng_state = 42; + +static uint32_t hnsw_rand(void) { + /* xorshift32 — fast, deterministic, no global state collision */ + uint32_t x = hnsw_rng_state; + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + hnsw_rng_state = x; + return x; +} + +static int32_t hnsw_random_level(int32_t M) { + double ml = 1.0 / log((double)M); + double r = (double)hnsw_rand() / (double)UINT32_MAX; + if (r < 1e-10) r = 1e-10; + int32_t level = (int32_t)floor(-log(r) * ml); + if (level >= HNSW_MAX_LAYERS) level = HNSW_MAX_LAYERS - 1; + return level; +} + +/* -------------------------------------------------------------------------- + * Candidate heap (min-heap by distance for beam search) + * -------------------------------------------------------------------------- */ + +typedef struct { + int64_t id; /* global node id */ + double dist; /* cosine distance to query */ +} hnsw_cand_t; + +static void heap_sift_up(hnsw_cand_t* h, int64_t i) { + while (i > 0) { + int64_t p = (i - 1) / 2; + if (h[p].dist <= h[i].dist) break; + hnsw_cand_t tmp = h[p]; h[p] = h[i]; h[i] = tmp; + i = p; + } +} + +static void heap_sift_down(hnsw_cand_t* h, int64_t n, int64_t i) { + for (;;) { + int64_t best = i; + int64_t l = 2 * i + 1, r = 2 * i + 2; + if (l < n && h[l].dist < h[best].dist) best = l; + if (r < n && h[r].dist < h[best].dist) best = r; + if (best == i) break; + hnsw_cand_t tmp = h[best]; h[best] = h[i]; h[i] = tmp; + i = best; + } +} + +/* Max-heap: sift keeping largest at top */ +static void maxheap_sift_up(hnsw_cand_t* h, int64_t i) { + while (i > 0) { + int64_t p = (i - 1) / 2; + if (h[p].dist >= h[i].dist) break; + hnsw_cand_t tmp = h[p]; h[p] = h[i]; h[i] = tmp; + i = p; + } +} + +static void maxheap_sift_down(hnsw_cand_t* h, int64_t n, int64_t i) { + for (;;) { + int64_t best = i; + int64_t l = 2 * i + 1, r = 2 * i + 2; + if (l < n && h[l].dist > h[best].dist) best = l; + if (r < n && h[r].dist > h[best].dist) best = r; + if (best == i) break; + hnsw_cand_t tmp = h[best]; h[best] = h[i]; h[i] = tmp; + i = best; + } +} + +/* -------------------------------------------------------------------------- + * Visited set (bitset) + * -------------------------------------------------------------------------- */ + +typedef struct { + uint8_t* bits; + int64_t n_nodes; +} hnsw_visited_t; + +static hnsw_visited_t visited_new(int64_t n_nodes) { + hnsw_visited_t v; + v.n_nodes = n_nodes; + size_t sz = ((size_t)n_nodes + 7) / 8; + v.bits = (uint8_t*)ray_sys_alloc(sz); + if (v.bits) memset(v.bits, 0, sz); + return v; +} + +static void visited_free(hnsw_visited_t* v) { + if (v->bits) ray_sys_free(v->bits); + v->bits = NULL; +} + +static bool visited_test(const hnsw_visited_t* v, int64_t id) { + if (id < 0 || id >= v->n_nodes) return true; + return (v->bits[id / 8] >> (id % 8)) & 1; +} + +static void visited_set(hnsw_visited_t* v, int64_t id) { + if (id >= 0 && id < v->n_nodes) + v->bits[id / 8] |= (uint8_t)(1 << (id % 8)); +} + +/* -------------------------------------------------------------------------- + * Layer helper: find index of global node id within a layer + * -------------------------------------------------------------------------- */ + +static int64_t layer_local_idx(const ray_hnsw_layer_t* layer, int64_t global_id) { + /* For layer 0, all nodes are present: local == global */ + /* For higher layers, linear scan (small) or we could build a reverse map */ + for (int64_t i = 0; i < layer->n_nodes; i++) { + if (layer->node_ids[i] == global_id) return i; + } + return -1; +} + +/* Get neighbor list for a node in a layer (by global id) */ +static int64_t* layer_neighbors(const ray_hnsw_layer_t* layer, int64_t global_id, + int64_t* out_M_max) { + int64_t local = layer_local_idx(layer, global_id); + if (local < 0) { *out_M_max = 0; return NULL; } + *out_M_max = layer->M_max; + return &layer->neighbors[local * layer->M_max]; +} + +/* Count actual (non -1) neighbors */ +static int64_t count_neighbors(const int64_t* nb, int64_t M_max) { + int64_t c = 0; + for (int64_t i = 0; i < M_max; i++) { + if (nb[i] < 0) break; + c++; + } + return c; +} + +/* Add a neighbor to a node's list (append if room) */ +static bool add_neighbor(int64_t* nb, int64_t M_max, int64_t new_id) { + for (int64_t i = 0; i < M_max; i++) { + if (nb[i] < 0) { nb[i] = new_id; return true; } + if (nb[i] == new_id) return true; /* already present */ + } + return false; /* full */ +} + +/* -------------------------------------------------------------------------- + * Search layer: beam search on a single layer. + * + * When `accept` is non-NULL, behaves as a filtered iterative scan: + * - Candidate-queue expansion still walks through rejected neighbours so + * accepted descendants remain reachable (preserves graph connectivity). + * - Only nodes passing `accept(node_id, ctx)` enter the result heap. + * - Candidate capacity is widened to n_nodes so pathologically selective + * filters don't silently drop unexplored regions. + * + * When `accept` is NULL, behaviour is identical to the original ef-bounded + * beam search. + * -------------------------------------------------------------------------- */ + +/* Return value convention: non-negative = number of results written. + * -1 = allocation failure (OOM) — callers must surface a distinct error + * rather than treat it as "no matches". */ +static int64_t hnsw_search_layer( + const ray_hnsw_t* idx, + const float* query, + const int64_t* entry_points, int64_t n_entries, + int32_t layer_idx, + int32_t ef, + hnsw_cand_t* results /* pre-allocated, ef entries */, + ray_hnsw_accept_fn accept, void* accept_ctx) +{ + const ray_hnsw_layer_t* layer = &idx->layers[layer_idx]; + + hnsw_visited_t vis = visited_new(idx->n_nodes); + if (!vis.bits) return -1; + + /* Candidate capacity. Unfiltered: tight bound, standard HNSW. + * Filtered: worst case is a full-graph scan, so budget n_nodes. + * Memory: n_nodes * sizeof(hnsw_cand_t) = n_nodes * 16 bytes. */ + int64_t cand_cap = ef * 2 + n_entries + 1; + if (accept && idx->n_nodes > cand_cap) cand_cap = idx->n_nodes; + hnsw_cand_t* candidates = (hnsw_cand_t*)ray_sys_alloc((size_t)cand_cap * sizeof(hnsw_cand_t)); + if (!candidates) { visited_free(&vis); return -1; } + int64_t cand_sz = 0; + int64_t res_sz = 0; + + /* Initialize with entry points. */ + for (int64_t i = 0; i < n_entries; i++) { + int64_t ep = entry_points[i]; + if (visited_test(&vis, ep)) continue; + visited_set(&vis, ep); + + double d = hnsw_dist(idx, query, idx->vectors + ep * idx->dim); + + /* Always add to candidate queue. */ + candidates[cand_sz] = (hnsw_cand_t){ ep, d }; + heap_sift_up(candidates, cand_sz); + cand_sz++; + + /* Add to results only if no filter, or filter accepts. */ + if (!accept || accept(ep, accept_ctx)) { + results[res_sz] = (hnsw_cand_t){ ep, d }; + maxheap_sift_up(results, res_sz); + res_sz++; + } + } + + /* Beam loop. */ + while (cand_sz > 0) { + hnsw_cand_t closest = candidates[0]; + candidates[0] = candidates[cand_sz - 1]; + cand_sz--; + if (cand_sz > 0) heap_sift_down(candidates, cand_sz, 0); + + /* Termination: closest unexpanded is worse than farthest accepted + * AND we already have ef accepted. When filtering, `res_sz` + * counts only accepted nodes, so this naturally delays stopping + * until we've collected ef accepted results. */ + if (res_sz >= ef && closest.dist > results[0].dist) break; + + int64_t M_max; + int64_t* nb = layer_neighbors(layer, closest.id, &M_max); + if (!nb) continue; + + for (int64_t i = 0; i < M_max; i++) { + int64_t nid = nb[i]; + if (nid < 0) break; + if (visited_test(&vis, nid)) continue; + visited_set(&vis, nid); + + double d = hnsw_dist(idx, query, idx->vectors + nid * idx->dim); + + /* Candidate-queue gate. Unfiltered: only push if the neighbour + * could improve the top-ef. Filtered: always push so rejected + * nodes remain pathways to accepted descendants. */ + bool should_explore = accept != NULL || + res_sz < ef || + d < results[0].dist; + if (should_explore && cand_sz < cand_cap) { + candidates[cand_sz] = (hnsw_cand_t){ nid, d }; + heap_sift_up(candidates, cand_sz); + cand_sz++; + } + + /* Result gate: only accepted nodes enter the top-K. */ + if (accept && !accept(nid, accept_ctx)) continue; + + if (res_sz < ef) { + results[res_sz] = (hnsw_cand_t){ nid, d }; + maxheap_sift_up(results, res_sz); + res_sz++; + } else if (d < results[0].dist) { + results[0] = (hnsw_cand_t){ nid, d }; + maxheap_sift_down(results, res_sz, 0); + } + } + } + + ray_sys_free(candidates); + visited_free(&vis); + + /* Sort results by distance ascending (insertion sort, ef is small) */ + for (int64_t i = 1; i < res_sz; i++) { + hnsw_cand_t key = results[i]; + int64_t j = i - 1; + while (j >= 0 && results[j].dist > key.dist) { + results[j + 1] = results[j]; + j--; + } + results[j + 1] = key; + } + + return res_sz; +} + +/* -------------------------------------------------------------------------- + * Greedy closest: find single nearest neighbor in a layer (used during descent) + * -------------------------------------------------------------------------- */ + +static int64_t hnsw_greedy_closest(const ray_hnsw_t* idx, const float* query, + int64_t ep, int32_t layer_idx) { + const ray_hnsw_layer_t* layer = &idx->layers[layer_idx]; + double best_dist = hnsw_dist(idx, query, idx->vectors + ep * idx->dim); + bool changed = true; + + while (changed) { + changed = false; + int64_t M_max; + int64_t* nb = layer_neighbors(layer, ep, &M_max); + if (!nb) break; + + for (int64_t i = 0; i < M_max; i++) { + int64_t nid = nb[i]; + if (nid < 0) break; + double d = hnsw_dist(idx, query, idx->vectors + nid * idx->dim); + if (d < best_dist) { + best_dist = d; + ep = nid; + changed = true; + } + } + } + return ep; +} + +/* -------------------------------------------------------------------------- + * Neighbor pruning: keep M closest neighbors (simple selection) + * -------------------------------------------------------------------------- */ + +static void prune_neighbors(const ray_hnsw_t* idx, int64_t node_id, + int64_t* nb, int64_t M_max, int64_t M_keep) { + /* Count current neighbors */ + int64_t count = count_neighbors(nb, M_max); + if (count <= M_keep) return; + + /* Compute distances from node to each neighbor */ + const float* vec = idx->vectors + node_id * idx->dim; + hnsw_cand_t* ranked = (hnsw_cand_t*)ray_sys_alloc((size_t)count * sizeof(hnsw_cand_t)); + if (!ranked) return; + + for (int64_t i = 0; i < count; i++) { + ranked[i].id = nb[i]; + ranked[i].dist = hnsw_dist(idx, vec, idx->vectors + nb[i] * idx->dim); + } + + /* Sort by distance ascending */ + for (int64_t i = 1; i < count; i++) { + hnsw_cand_t key = ranked[i]; + int64_t j = i - 1; + while (j >= 0 && ranked[j].dist > key.dist) { + ranked[j + 1] = ranked[j]; + j--; + } + ranked[j + 1] = key; + } + + /* Keep M_keep closest */ + for (int64_t i = 0; i < M_max; i++) { + nb[i] = (i < M_keep) ? ranked[i].id : -1; + } + + ray_sys_free(ranked); +} + +/* -------------------------------------------------------------------------- + * HNSW Build (Algorithm 1 from HNSW paper) + * -------------------------------------------------------------------------- */ + +ray_hnsw_t* ray_hnsw_build(const float* vectors, int64_t n_nodes, int32_t dim, + ray_hnsw_metric_t metric, + int32_t M, int32_t ef_construction) { + if (!vectors || n_nodes <= 0 || dim <= 0) return NULL; + if (M <= 0) M = HNSW_DEFAULT_M; + if (ef_construction <= 0) ef_construction = HNSW_DEFAULT_EF_C; + if (metric < RAY_HNSW_COSINE || metric > RAY_HNSW_IP) metric = RAY_HNSW_COSINE; + + ray_hnsw_t* idx = (ray_hnsw_t*)ray_sys_alloc(sizeof(ray_hnsw_t)); + if (!idx) return NULL; + memset(idx, 0, sizeof(ray_hnsw_t)); + + idx->n_nodes = n_nodes; + idx->dim = dim; + idx->M = M; + idx->M_max0 = 2 * M; + idx->ef_construction = ef_construction; + idx->metric = (int32_t)metric; + idx->entry_point = 0; + /* Copy vectors so the index owns its data — prevents use-after-free + * if the caller frees the original buffer. */ + size_t vec_bytes = (size_t)n_nodes * (size_t)dim * sizeof(float); + float* vec_copy = (float*)ray_sys_alloc(vec_bytes); + if (!vec_copy) { ray_sys_free(idx); return NULL; } + memcpy(vec_copy, vectors, vec_bytes); + idx->vectors = vec_copy; + idx->owns_data = true; + + /* Allocate node levels */ + idx->node_level = (int8_t*)ray_sys_alloc((size_t)n_nodes * sizeof(int8_t)); + if (!idx->node_level) { ray_hnsw_free(idx); return NULL; } + + /* Assign random levels to all nodes */ + int32_t max_level = 0; + for (int64_t i = 0; i < n_nodes; i++) { + int32_t level = hnsw_random_level(M); + idx->node_level[i] = (int8_t)level; + if (level > max_level) max_level = level; + } + idx->n_layers = max_level + 1; + + /* Allocate layers */ + for (int32_t l = 0; l < idx->n_layers; l++) { + ray_hnsw_layer_t* layer = &idx->layers[l]; + + /* Count nodes at this layer */ + int64_t count = 0; + for (int64_t i = 0; i < n_nodes; i++) { + if (idx->node_level[i] >= l) count++; + } + layer->n_nodes = count; + layer->M_max = (l == 0) ? idx->M_max0 : M; + + /* Allocate neighbor array and node_ids mapping */ + size_t nb_size = (size_t)count * (size_t)layer->M_max * sizeof(int64_t); + layer->neighbors = (int64_t*)ray_sys_alloc(nb_size); + layer->node_ids = (int64_t*)ray_sys_alloc((size_t)count * sizeof(int64_t)); + if (!layer->neighbors || !layer->node_ids) { + ray_hnsw_free(idx); + return NULL; + } + + /* Initialize neighbors to -1 (empty) */ + memset(layer->neighbors, 0xFF, nb_size); + + /* Fill node_ids mapping */ + int64_t j = 0; + for (int64_t i = 0; i < n_nodes; i++) { + if (idx->node_level[i] >= l) { + layer->node_ids[j++] = i; + } + } + } + + /* Temp buffer for search results during construction */ + int64_t max_ef = ef_construction > idx->M_max0 ? ef_construction : idx->M_max0; + hnsw_cand_t* search_buf = (hnsw_cand_t*)ray_sys_alloc((size_t)(max_ef + 1) * sizeof(hnsw_cand_t)); + if (!search_buf) { ray_hnsw_free(idx); return NULL; } + + /* Insert nodes one by one */ + for (int64_t i = 1; i < n_nodes; i++) { + const float* vec = vectors + i * dim; + int32_t node_level = idx->node_level[i]; + + /* Phase 1: Greedy descent from top layer to node_level+1 */ + int64_t ep = idx->entry_point; + for (int32_t l = idx->n_layers - 1; l > node_level; l--) { + ep = hnsw_greedy_closest(idx, vec, ep, l); + } + + /* Phase 2: Insert into layers [node_level ... 0] */ + for (int32_t l = node_level; l >= 0; l--) { + ray_hnsw_layer_t* layer = &idx->layers[l]; + int64_t M_max_l = layer->M_max; + int64_t M_keep = (l == 0) ? idx->M_max0 : M; + + /* Search for ef_construction nearest neighbors at this layer */ + int64_t n_found = hnsw_search_layer(idx, vec, &ep, 1, l, + ef_construction, search_buf, + NULL, NULL); + if (n_found < 0) { + /* Allocation failed inside the beam — abort the build + * rather than producing a half-connected index. */ + ray_sys_free(search_buf); + ray_hnsw_free(idx); + return NULL; + } + + /* Connect node i to the M nearest found */ + int64_t local_i = layer_local_idx(layer, i); + if (local_i < 0) continue; + + int64_t* my_nb = &layer->neighbors[local_i * M_max_l]; + int64_t n_connect = (n_found < M_keep) ? n_found : M_keep; + for (int64_t j = 0; j < n_connect; j++) { + my_nb[j] = search_buf[j].id; + } + + /* Add bidirectional edges: each neighbor also gets i */ + for (int64_t j = 0; j < n_connect; j++) { + int64_t nb_id = search_buf[j].id; + int64_t nb_local = layer_local_idx(layer, nb_id); + if (nb_local < 0) continue; + + int64_t* their_nb = &layer->neighbors[nb_local * M_max_l]; + if (!add_neighbor(their_nb, M_max_l, i)) { + /* Neighbor list full — prune to make room, then add i */ + prune_neighbors(idx, nb_id, their_nb, M_max_l, M_keep); + add_neighbor(their_nb, M_max_l, i); + } + } + + /* Update ep for next lower layer */ + if (n_found > 0) ep = search_buf[0].id; + } + + /* Update entry point if this node has higher level */ + if (node_level > idx->node_level[idx->entry_point]) { + idx->entry_point = i; + } + } + + ray_sys_free(search_buf); + return idx; +} + +/* -------------------------------------------------------------------------- + * Free + * -------------------------------------------------------------------------- */ + +void ray_hnsw_free(ray_hnsw_t* idx) { + if (!idx) return; + for (int32_t l = 0; l < idx->n_layers; l++) { + if (idx->layers[l].neighbors) ray_sys_free(idx->layers[l].neighbors); + if (idx->layers[l].node_ids) ray_sys_free(idx->layers[l].node_ids); + } + if (idx->node_level) ray_sys_free(idx->node_level); + if (idx->owns_data && idx->vectors) ray_sys_free((void*)idx->vectors); + ray_sys_free(idx); +} + +ray_hnsw_t* ray_hnsw_clone(const ray_hnsw_t* src) { + if (!src) return NULL; + + ray_hnsw_t* dst = (ray_hnsw_t*)ray_sys_alloc(sizeof(ray_hnsw_t)); + if (!dst) return NULL; + memset(dst, 0, sizeof(ray_hnsw_t)); + + /* Scalars — straight copy. */ + dst->n_nodes = src->n_nodes; + dst->dim = src->dim; + dst->n_layers = src->n_layers; + dst->M = src->M; + dst->M_max0 = src->M_max0; + dst->ef_construction = src->ef_construction; + dst->metric = src->metric; + dst->entry_point = src->entry_point; + dst->owns_data = true; + + /* node_level */ + if (src->n_nodes > 0 && src->node_level) { + size_t sz = (size_t)src->n_nodes * sizeof(int8_t); + dst->node_level = (int8_t*)ray_sys_alloc(sz); + if (!dst->node_level) { ray_hnsw_free(dst); return NULL; } + memcpy(dst->node_level, src->node_level, sz); + } + + /* Vectors */ + if (src->n_nodes > 0 && src->dim > 0 && src->vectors) { + size_t vec_bytes = (size_t)src->n_nodes * (size_t)src->dim * sizeof(float); + float* vcopy = (float*)ray_sys_alloc(vec_bytes); + if (!vcopy) { ray_hnsw_free(dst); return NULL; } + memcpy(vcopy, src->vectors, vec_bytes); + dst->vectors = vcopy; + } + + /* Per-layer neighbor + node_id arrays */ + for (int32_t l = 0; l < src->n_layers; l++) { + const ray_hnsw_layer_t* sl = &src->layers[l]; + ray_hnsw_layer_t* dl = &dst->layers[l]; + dl->n_nodes = sl->n_nodes; + dl->M_max = sl->M_max; + + if (sl->n_nodes > 0 && sl->M_max > 0 && sl->neighbors) { + size_t nb = (size_t)sl->n_nodes * (size_t)sl->M_max * sizeof(int64_t); + dl->neighbors = (int64_t*)ray_sys_alloc(nb); + if (!dl->neighbors) { ray_hnsw_free(dst); return NULL; } + memcpy(dl->neighbors, sl->neighbors, nb); + } + if (sl->n_nodes > 0 && sl->node_ids) { + size_t sz = (size_t)sl->n_nodes * sizeof(int64_t); + dl->node_ids = (int64_t*)ray_sys_alloc(sz); + if (!dl->node_ids) { ray_hnsw_free(dst); return NULL; } + memcpy(dl->node_ids, sl->node_ids, sz); + } + } + + return dst; +} + +/* -------------------------------------------------------------------------- + * Search: find K approximate nearest neighbors + * -------------------------------------------------------------------------- */ + +int64_t ray_hnsw_search(const ray_hnsw_t* idx, + const float* query, int32_t dim, + int64_t k, int32_t ef_search, + int64_t* out_ids, double* out_dists) { + if (!idx || !query || dim != idx->dim || k <= 0) return 0; + if (ef_search < k) ef_search = (int32_t)k; + if (idx->n_nodes == 0) return 0; + + /* Phase 1: Greedy descent from top layer to layer 1 */ + int64_t ep = idx->entry_point; + for (int32_t l = idx->n_layers - 1; l >= 1; l--) { + ep = hnsw_greedy_closest(idx, query, ep, l); + } + + /* Phase 2: Beam search on layer 0 with ef_search width */ + hnsw_cand_t* results = (hnsw_cand_t*)ray_sys_alloc( + (size_t)ef_search * sizeof(hnsw_cand_t)); + if (!results) return -1; /* OOM — caller must propagate error. */ + + int64_t n_found = hnsw_search_layer(idx, query, &ep, 1, 0, ef_search, results, + NULL, NULL); + if (n_found < 0) { + ray_sys_free(results); + return -1; /* OOM — caller must propagate error. */ + } + + /* Extract top-K from results (already sorted by distance ascending) */ + int64_t result_count = (n_found < k) ? n_found : k; + for (int64_t i = 0; i < result_count; i++) { + out_ids[i] = results[i].id; + out_dists[i] = results[i].dist; + } + + ray_sys_free(results); + return result_count; +} + +/* -------------------------------------------------------------------------- + * Filtered iterative-scan search: only returns nodes passing `accept(node_id, ctx)`. + * + * The beam search explores the graph normally (including through rejected + * nodes, preserving connectivity to accepted descendants); only accepted + * nodes enter the result heap. Falls back to a full-graph walk for + * pathologically selective filters — bounded by n_nodes memory. + * -------------------------------------------------------------------------- */ + +int64_t ray_hnsw_search_filter(const ray_hnsw_t* idx, + const float* query, int32_t dim, + int64_t k, int32_t ef_search, + ray_hnsw_accept_fn accept, void* ctx, + int64_t* out_ids, double* out_dists) { + if (!idx || !query || dim != idx->dim || k <= 0) return 0; + if (!accept) { + /* No predicate — fall through to the plain search so callers get + * zero overhead. */ + return ray_hnsw_search(idx, query, dim, k, ef_search, out_ids, out_dists); + } + if (ef_search < k) ef_search = (int32_t)k; + if (idx->n_nodes == 0) return 0; + + /* Descent through upper layers is filter-unaware — we only use those + * layers to pick the layer-0 entry point. The filter applies on + * layer 0 where the result set is collected. */ + int64_t ep = idx->entry_point; + for (int32_t l = idx->n_layers - 1; l >= 1; l--) { + ep = hnsw_greedy_closest(idx, query, ep, l); + } + + hnsw_cand_t* results = (hnsw_cand_t*)ray_sys_alloc( + (size_t)ef_search * sizeof(hnsw_cand_t)); + if (!results) return -1; /* OOM */ + + int64_t n_found = hnsw_search_layer(idx, query, &ep, 1, 0, ef_search, results, + accept, ctx); + if (n_found < 0) { + ray_sys_free(results); + return -1; + } + + int64_t result_count = (n_found < k) ? n_found : k; + for (int64_t i = 0; i < result_count; i++) { + out_ids[i] = results[i].id; + out_dists[i] = results[i].dist; + } + + ray_sys_free(results); + return result_count; +} + +/* -------------------------------------------------------------------------- + * Accessors + * -------------------------------------------------------------------------- */ + +int32_t ray_hnsw_dim(const ray_hnsw_t* idx) { + return idx ? idx->dim : 0; +} + +/* -------------------------------------------------------------------------- + * Persistence: save/load/mmap + * + * File layout in directory: + * hnsw_header.bin — fixed-size header + * hnsw_levels.bin — node_level[n_nodes] + * hnsw_layer_N.bin — per-layer: neighbors + node_ids + * -------------------------------------------------------------------------- */ + +typedef struct { + int64_t n_nodes; + int32_t dim; + int32_t n_layers; + int32_t M; + int32_t M_max0; + int32_t ef_construction; + int32_t metric; /* ray_hnsw_metric_t (was _pad; old files saved 0 = COSINE) */ + int64_t entry_point; +} hnsw_file_header_t; + +ray_err_t ray_hnsw_save(const ray_hnsw_t* idx, const char* dir) { + if (!idx || !dir) return RAY_ERR_IO; + + if (mkdir(dir, 0755) != 0 && errno != EEXIST) return RAY_ERR_IO; + + char path[1024]; + FILE* f; + + /* Write header */ + snprintf(path, sizeof(path), "%s/hnsw_header.bin", dir); + f = fopen(path, "wb"); + if (!f) return RAY_ERR_IO; + hnsw_file_header_t hdr = { + .n_nodes = idx->n_nodes, + .dim = idx->dim, + .n_layers = idx->n_layers, + .M = idx->M, + .M_max0 = idx->M_max0, + .ef_construction = idx->ef_construction, + .metric = idx->metric, + .entry_point = idx->entry_point + }; + if (fwrite(&hdr, sizeof(hdr), 1, f) != 1) { fclose(f); return RAY_ERR_IO; } + fclose(f); + + /* Write node levels */ + snprintf(path, sizeof(path), "%s/hnsw_levels.bin", dir); + f = fopen(path, "wb"); + if (!f) return RAY_ERR_IO; + if (fwrite(idx->node_level, sizeof(int8_t), (size_t)idx->n_nodes, f) != + (size_t)idx->n_nodes) { + fclose(f); return RAY_ERR_IO; + } + fclose(f); + + /* Write each layer */ + for (int32_t l = 0; l < idx->n_layers; l++) { + const ray_hnsw_layer_t* layer = &idx->layers[l]; + snprintf(path, sizeof(path), "%s/hnsw_layer_%d.bin", dir, l); + f = fopen(path, "wb"); + if (!f) return RAY_ERR_IO; + + /* Write layer metadata: n_nodes, M_max */ + if (fwrite(&layer->n_nodes, sizeof(int64_t), 1, f) != 1) { fclose(f); return RAY_ERR_IO; } + if (fwrite(&layer->M_max, sizeof(int64_t), 1, f) != 1) { fclose(f); return RAY_ERR_IO; } + + /* Write neighbors */ + size_t nb_count = (size_t)layer->n_nodes * (size_t)layer->M_max; + if (nb_count > 0) { + if (fwrite(layer->neighbors, sizeof(int64_t), nb_count, f) != nb_count) { + fclose(f); return RAY_ERR_IO; + } + } + + /* Write node_ids */ + if (layer->n_nodes > 0) { + if (fwrite(layer->node_ids, sizeof(int64_t), (size_t)layer->n_nodes, f) != + (size_t)layer->n_nodes) { + fclose(f); return RAY_ERR_IO; + } + } + + fclose(f); + } + + /* Write vectors */ + snprintf(path, sizeof(path), "%s/hnsw_vectors.bin", dir); + f = fopen(path, "wb"); + if (!f) return RAY_ERR_IO; + size_t vec_count = (size_t)idx->n_nodes * (size_t)idx->dim; + if (vec_count > 0) { + if (fwrite(idx->vectors, sizeof(float), vec_count, f) != vec_count) { + fclose(f); return RAY_ERR_IO; + } + } + fclose(f); + + return RAY_OK; +} + +static ray_hnsw_t* hnsw_load_impl(const char* dir, bool use_mmap) { + if (!dir) return NULL; + (void)use_mmap; /* mmap optimization deferred — both paths read into memory */ + + char path[1024]; + FILE* f; + + /* Read header */ + snprintf(path, sizeof(path), "%s/hnsw_header.bin", dir); + f = fopen(path, "rb"); + if (!f) return NULL; + hnsw_file_header_t hdr; + if (fread(&hdr, sizeof(hdr), 1, f) != 1) { fclose(f); return NULL; } + fclose(f); + + if (hdr.n_nodes <= 0 || hdr.dim <= 0 || hdr.n_layers <= 0 || + hdr.n_layers > HNSW_MAX_LAYERS || + hdr.M <= 0 || hdr.M_max0 <= 0 || + hdr.entry_point < 0 || hdr.entry_point >= hdr.n_nodes) return NULL; + + ray_hnsw_t* idx = (ray_hnsw_t*)ray_sys_alloc(sizeof(ray_hnsw_t)); + if (!idx) return NULL; + memset(idx, 0, sizeof(ray_hnsw_t)); + + idx->n_nodes = hdr.n_nodes; + idx->dim = hdr.dim; + idx->n_layers = hdr.n_layers; + idx->M = hdr.M; + idx->M_max0 = hdr.M_max0; + idx->ef_construction = hdr.ef_construction; + idx->metric = (hdr.metric >= RAY_HNSW_COSINE && hdr.metric <= RAY_HNSW_IP) + ? hdr.metric : RAY_HNSW_COSINE; + idx->entry_point = hdr.entry_point; + idx->vectors = NULL; + idx->owns_data = true; + + /* Read node levels */ + snprintf(path, sizeof(path), "%s/hnsw_levels.bin", dir); + f = fopen(path, "rb"); + if (!f) { ray_hnsw_free(idx); return NULL; } + idx->node_level = (int8_t*)ray_sys_alloc((size_t)hdr.n_nodes * sizeof(int8_t)); + if (!idx->node_level) { fclose(f); ray_hnsw_free(idx); return NULL; } + if (fread(idx->node_level, sizeof(int8_t), (size_t)hdr.n_nodes, f) != + (size_t)hdr.n_nodes) { + fclose(f); ray_hnsw_free(idx); return NULL; + } + fclose(f); + + /* Read each layer */ + for (int32_t l = 0; l < hdr.n_layers; l++) { + ray_hnsw_layer_t* layer = &idx->layers[l]; + snprintf(path, sizeof(path), "%s/hnsw_layer_%d.bin", dir, l); + f = fopen(path, "rb"); + if (!f) { ray_hnsw_free(idx); return NULL; } + + /* Read layer metadata */ + if (fread(&layer->n_nodes, sizeof(int64_t), 1, f) != 1) { fclose(f); ray_hnsw_free(idx); return NULL; } + if (fread(&layer->M_max, sizeof(int64_t), 1, f) != 1) { fclose(f); ray_hnsw_free(idx); return NULL; } + + /* Validate layer metadata against header */ + if (layer->n_nodes <= 0 || layer->n_nodes > hdr.n_nodes) { fclose(f); ray_hnsw_free(idx); return NULL; } + if (layer->M_max <= 0 || layer->M_max > 4096) { fclose(f); ray_hnsw_free(idx); return NULL; } + if ((uint64_t)layer->n_nodes > SIZE_MAX / sizeof(int64_t) / (uint64_t)layer->M_max) { + fclose(f); ray_hnsw_free(idx); return NULL; + } + + /* Allocate and read neighbors */ + size_t nb_count = (size_t)layer->n_nodes * (size_t)layer->M_max; + if (nb_count > 0) { + layer->neighbors = (int64_t*)ray_sys_alloc(nb_count * sizeof(int64_t)); + if (!layer->neighbors) { fclose(f); ray_hnsw_free(idx); return NULL; } + if (fread(layer->neighbors, sizeof(int64_t), nb_count, f) != nb_count) { + fclose(f); ray_hnsw_free(idx); return NULL; + } + } + + /* Allocate and read node_ids */ + if (layer->n_nodes > 0) { + layer->node_ids = (int64_t*)ray_sys_alloc((size_t)layer->n_nodes * sizeof(int64_t)); + if (!layer->node_ids) { fclose(f); ray_hnsw_free(idx); return NULL; } + if (fread(layer->node_ids, sizeof(int64_t), (size_t)layer->n_nodes, f) != + (size_t)layer->n_nodes) { + fclose(f); ray_hnsw_free(idx); return NULL; + } + } + + fclose(f); + } + + /* Read vectors */ + snprintf(path, sizeof(path), "%s/hnsw_vectors.bin", dir); + f = fopen(path, "rb"); + if (!f) { ray_hnsw_free(idx); return NULL; } + size_t vec_count = (size_t)hdr.n_nodes * (size_t)hdr.dim; + if (vec_count > 0) { + float* vecs = (float*)ray_sys_alloc(vec_count * sizeof(float)); + if (!vecs) { fclose(f); ray_hnsw_free(idx); return NULL; } + if (fread(vecs, sizeof(float), vec_count, f) != vec_count) { + fclose(f); ray_sys_free(vecs); ray_hnsw_free(idx); return NULL; + } + idx->vectors = vecs; + } + fclose(f); + + return idx; +} + +ray_hnsw_t* ray_hnsw_load(const char* dir) { + return hnsw_load_impl(dir, false); +} + +ray_hnsw_t* ray_hnsw_mmap(const char* dir) { + return hnsw_load_impl(dir, true); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/store/hnsw.h b/crates/rayforce-sys/vendor/rayforce/src/store/hnsw.h new file mode 100644 index 0000000..055a3c7 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/store/hnsw.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_HNSW_H +#define RAY_HNSW_H + +#include + +/* ---------- HNSW Index ---------- + * + * Multi-layer proximity graph for approximate nearest neighbor search. + * + * Memory layout per node: + * - Layer 0: up to M_max0 neighbors (default 2*M) + * - Layers 1+: up to M neighbors each + * + * Neighbor lists stored as flat arrays: + * neighbors[node * M_max + i] = neighbor_id (or -1 if unused) + * + * Each layer stores its own neighbor array for all nodes at that layer. + */ + +#define HNSW_MAX_LAYERS 16 +#define HNSW_DEFAULT_M 16 +#define HNSW_DEFAULT_EF_C 200 +#define HNSW_DEFAULT_EF_S 50 + +/* Distance metric driving beam search. HNSW requires lower-is-closer; + * we choose the encoding so each metric sorts ascending: + * COSINE → 1 - cos(a, b) range [0, 2] + * L2 → sqrt(sum(sq diff)) range [0, ∞) + * IP → -dot(a, b) range (-∞, ∞) (negated so lower=closer) */ +typedef enum { + RAY_HNSW_COSINE = 0, + RAY_HNSW_L2 = 1, + RAY_HNSW_IP = 2 +} ray_hnsw_metric_t; + +typedef struct ray_hnsw_layer { + int64_t* neighbors; /* flat array: n_nodes_in_layer * M_max entries */ + int64_t n_nodes; /* number of nodes in this layer */ + int64_t M_max; /* max neighbors per node in this layer */ + int64_t* node_ids; /* mapping: layer_idx -> global node id */ +} ray_hnsw_layer_t; + +typedef struct ray_hnsw { + int64_t n_nodes; /* total number of vectors */ + int32_t dim; /* embedding dimension */ + int32_t n_layers; /* number of layers (including layer 0) */ + int32_t M; /* max neighbors per node (layers 1+) */ + int32_t M_max0; /* max neighbors per node (layer 0) */ + int32_t ef_construction; /* beam width during construction */ + int32_t metric; /* ray_hnsw_metric_t */ + int64_t entry_point; /* entry point node (highest layer) */ + int8_t* node_level; /* max layer for each node (n_nodes entries) */ + ray_hnsw_layer_t layers[HNSW_MAX_LAYERS]; + const float* vectors; /* pointer to embedding data (not owned) */ + bool owns_data; /* true if loaded from disk (owns neighbor arrays etc.) */ +} ray_hnsw_t; + +/* --- Build / Free / Clone --- */ +ray_hnsw_t* ray_hnsw_build(const float* vectors, int64_t n_nodes, int32_t dim, + ray_hnsw_metric_t metric, + int32_t M, int32_t ef_construction); +void ray_hnsw_free(ray_hnsw_t* idx); +/* Deep-copy an index: duplicates vectors, node levels, and every layer's + * neighbor + node_id arrays. Returns a new fully-owned index with the + * same semantics as the source. Returns NULL on OOM. */ +ray_hnsw_t* ray_hnsw_clone(const ray_hnsw_t* src); + +/* --- Search --- */ +/* Returns top-K nearest neighbors as (node_id, distance) pairs. + * out_ids and out_dists must be pre-allocated with k entries. + * + * Return value: + * >= 0 : number of results written (may be < k). + * -1 : allocation failure (OOM) — callers must surface a distinct + * error rather than treat the 0-return as "no matches". + */ +int64_t ray_hnsw_search(const ray_hnsw_t* idx, + const float* query, int32_t dim, + int64_t k, int32_t ef_search, + int64_t* out_ids, double* out_dists); + +/* Predicate callback used by the filtered iterative-scan variant below. + * Return true to accept `node_id` into the result set, false to reject. + * Rejected nodes still participate in candidate-graph exploration so + * connectivity through them is preserved — this is the standard + * "iterative scan" shape. */ +typedef bool (*ray_hnsw_accept_fn)(int64_t node_id, void* ctx); + +/* Like ray_hnsw_search, but only nodes passing `accept(node_id, ctx)` + * enter the top-K result set. Candidate-queue expansion still traverses + * rejected nodes so their accepted descendants remain reachable. + * Falls back to exhaustive graph exploration for pathologically selective + * filters (bounded by idx->n_nodes). + * + * Return value matches ray_hnsw_search: >= 0 = result count, -1 = OOM. */ +int64_t ray_hnsw_search_filter(const ray_hnsw_t* idx, + const float* query, int32_t dim, + int64_t k, int32_t ef_search, + ray_hnsw_accept_fn accept, void* ctx, + int64_t* out_ids, double* out_dists); + +/* --- Accessors --- */ +int32_t ray_hnsw_dim(const ray_hnsw_t* idx); + +/* --- Persistence --- */ +ray_err_t ray_hnsw_save(const ray_hnsw_t* idx, const char* dir); +ray_hnsw_t* ray_hnsw_load(const char* dir); +ray_hnsw_t* ray_hnsw_mmap(const char* dir); + +#endif /* RAY_HNSW_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/store/journal.c b/crates/rayforce-sys/vendor/rayforce/src/store/journal.c new file mode 100644 index 0000000..d16da06 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/store/journal.c @@ -0,0 +1,656 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_OS_WINDOWS +# define _GNU_SOURCE /* fileno(), gmtime_r() */ +#endif + +#include "journal.h" +#include "fileio.h" +#include "serde.h" +#include "core/ipc.h" +#include "lang/eval.h" +#include "lang/env.h" +#include "mem/sys.h" + +#include +#include +#include +#include +#include + +#ifndef RAY_OS_WINDOWS +# include +# include +#endif + +#define RAY_JOURNAL_PATH_MAX 1024 + +/* Module-private state. Single-threaded by construction (the IPC + * dispatch loop is single-threaded for eval_payload, and replay runs + * from main before any worker thread spins up). */ +static struct { + ray_journal_mode_t mode; + FILE* fp; + char base[RAY_JOURNAL_PATH_MAX]; + char log_path[RAY_JOURNAL_PATH_MAX]; + bool in_replay; +} g_journal = { + .mode = RAY_JOURNAL_OFF, + .fp = NULL, + .base = {0}, + .log_path = {0}, + .in_replay = false, +}; + +/* ── helpers ──────────────────────────────────────────────────────── */ + +static bool path_join_ext(char* dst, size_t dstsz, const char* base, const char* ext) { + int n = snprintf(dst, dstsz, "%s%s", base, ext); + return n > 0 && (size_t)n < dstsz; +} + +static bool file_exists(const char* path) { +#ifdef RAY_OS_WINDOWS + DWORD attrs = GetFileAttributesA(path); + return attrs != INVALID_FILE_ATTRIBUTES && !(attrs & FILE_ATTRIBUTE_DIRECTORY); +#else + struct stat st; + return stat(path, &st) == 0 && S_ISREG(st.st_mode); +#endif +} + +/* Read fixed-size buffer in a loop — fread can short-read on signals. + * Returns SIZE_MAX on a real I/O error (vs. clean EOF) so the caller + * can distinguish "log ended cleanly" from "torn read mid-frame". The + * difference matters: clean EOF after N entries means N replayed; an + * error mid-frame must abort with RAY_JREPLAY_IO so we don't open the + * log for append on top of a partially-replayed state. */ +static size_t read_full(FILE* f, void* buf, size_t want) { + uint8_t* p = (uint8_t*)buf; + size_t got = 0; + while (got < want) { + size_t n = fread(p + got, 1, want - got, f); + if (n == 0) { + if (ferror(f)) return SIZE_MAX; + break; + } + got += n; + } + return got; +} + +/* Decompress an IPC payload in place if the COMPRESSED flag is set; + * otherwise no-op. Returns owned buffer + length on success (caller + * frees with ray_sys_free), NULL on failure. When no decompression + * is needed, *out_owned is set to false and *out_buf aliases payload. */ +static bool decompress_if_needed(const ray_ipc_header_t* hdr, + const uint8_t* payload, int64_t payload_len, + uint8_t** out_buf, int64_t* out_len, + bool* out_owned) +{ + if (!(hdr->flags & RAY_IPC_FLAG_COMPRESSED)) { + *out_buf = (uint8_t*)payload; + *out_len = payload_len; + *out_owned = false; + return true; + } + if (payload_len < 4) return false; + uint32_t uncomp_size; + memcpy(&uncomp_size, payload, 4); + if (uncomp_size == 0 || uncomp_size > 256u * 1024u * 1024u) return false; + + uint8_t* tmp = (uint8_t*)ray_sys_alloc(uncomp_size); + if (!tmp) return false; + size_t dlen = ray_ipc_decompress(payload + 4, (size_t)payload_len - 4, + tmp, uncomp_size); + if (dlen != uncomp_size) { ray_sys_free(tmp); return false; } + *out_buf = tmp; + *out_len = (int64_t)uncomp_size; + *out_owned = true; + return true; +} + +/* Evaluate a deserialized message exactly as eval_payload would: string + * payloads run through ray_eval_str, everything else through ray_eval. */ +static ray_t* eval_one(ray_t* msg) { + if (!msg || RAY_IS_ERR(msg)) return msg; + if (msg->type == -RAY_STR) { + const char* s = ray_str_ptr(msg); + size_t n = ray_str_len(msg); + if (!s || n == 0) return RAY_NULL_OBJ; + char* tmp = (char*)ray_sys_alloc(n + 1); + if (!tmp) return ray_error("oom", NULL); + memcpy(tmp, s, n); + tmp[n] = '\0'; + ray_t* r = ray_eval_str(tmp); + ray_sys_free(tmp); + return r; + } + return ray_eval(msg); +} + +/* ── public API ───────────────────────────────────────────────────── */ + +bool ray_journal_is_open(void) { return g_journal.fp != NULL; } + +ray_err_t ray_journal_write_bytes(const ray_ipc_header_t* hdr, + const uint8_t* payload, + int64_t payload_len) +{ + if (!g_journal.fp || g_journal.in_replay) return RAY_OK; + if (!hdr || !payload || payload_len < 0) return RAY_ERR_DOMAIN; + + if (fwrite(hdr, 1, sizeof(*hdr), g_journal.fp) != sizeof(*hdr)) + return RAY_ERR_IO; + if (payload_len > 0 && + fwrite(payload, 1, (size_t)payload_len, g_journal.fp) != (size_t)payload_len) + return RAY_ERR_IO; + + if (g_journal.mode == RAY_JOURNAL_SYNC) { + if (fflush(g_journal.fp) != 0) return RAY_ERR_IO; +#ifndef RAY_OS_WINDOWS + if (fsync(fileno(g_journal.fp)) != 0) return RAY_ERR_IO; +#else + FlushFileBuffers((HANDLE)_get_osfhandle(_fileno(g_journal.fp))); +#endif + } + return RAY_OK; +} + +ray_err_t ray_journal_replay(const char* path, + int64_t* out_chunks, + int64_t* out_eval_errors, + ray_jreplay_status_t* out_status) +{ + if (out_chunks) *out_chunks = 0; + if (out_eval_errors) *out_eval_errors = 0; + if (out_status) *out_status = RAY_JREPLAY_OK; + + FILE* f = fopen(path, "rb"); + if (!f) { + if (out_status) *out_status = RAY_JREPLAY_IO; + return RAY_ERR_IO; + } + + bool prev_in_replay = g_journal.in_replay; + g_journal.in_replay = true; + + int64_t chunks = 0; + int64_t errs = 0; + ray_jreplay_status_t status = RAY_JREPLAY_OK; + + for (;;) { + ray_ipc_header_t hdr; + size_t r = read_full(f, &hdr, sizeof(hdr)); + if (r == 0) break; /* clean EOF */ + if (r == SIZE_MAX) { status = RAY_JREPLAY_IO; break; } + if (r != sizeof(hdr)) { status = RAY_JREPLAY_BADTAIL; break; } + if (hdr.prefix != RAY_SERDE_PREFIX) { status = RAY_JREPLAY_BADTAIL; break; } + if (hdr.version != RAY_SERDE_WIRE_VERSION) { status = RAY_JREPLAY_BADTAIL; break; } + if (hdr.size <= 0 || hdr.size > 256LL*1024*1024) + { status = RAY_JREPLAY_BADTAIL; break; } + + uint8_t* buf = (uint8_t*)ray_sys_alloc((size_t)hdr.size); + if (!buf) { status = RAY_JREPLAY_OOM; break; } + size_t pr = read_full(f, buf, (size_t)hdr.size); + if (pr == SIZE_MAX) { ray_sys_free(buf); status = RAY_JREPLAY_IO; break; } + if (pr != (size_t)hdr.size) { + ray_sys_free(buf); + status = RAY_JREPLAY_BADTAIL; + break; + } + + uint8_t* payload = NULL; + int64_t pay_len = 0; + bool owned = false; + if (!decompress_if_needed(&hdr, buf, hdr.size, + &payload, &pay_len, &owned)) { + ray_sys_free(buf); + /* Framing was intact (header parsed OK, payload size matched); + * "decode failed" is a content/code bug, NOT a tail truncation, + * so do not point the operator at `truncate to recover`. */ + status = RAY_JREPLAY_DECOMP; + break; + } + + int64_t consumed = pay_len; + ray_t* msg = ray_de_raw(payload, &consumed); + if (owned) ray_sys_free(payload); + ray_sys_free(buf); + + if (!msg || RAY_IS_ERR(msg)) { + if (msg) ray_error_free(msg); /* ray_release is a no-op on errors */ + status = RAY_JREPLAY_DESER; + break; + } + + /* Re-impose the sender's restricted state for THIS frame. Without + * this a `-U` client's writes silently elevate to full privilege + * across crash-restart, since replay runs in the main thread with + * no IPC connection context. */ + bool prev_restricted = ray_eval_get_restricted(); + ray_eval_set_restricted(hdr.flags & RAY_IPC_FLAG_RESTRICTED); + ray_t* result = eval_one(msg); + ray_eval_set_restricted(prev_restricted); + ray_release(msg); + + if (result && RAY_IS_ERR(result)) { + const char* code = ray_err_code(result); + fprintf(stderr, "log: WARN chunk %lld raised: %s (during replay)\n", + (long long)chunks, code ? code : "?"); + errs++; + ray_error_free(result); + } else if (result && result != RAY_NULL_OBJ) { + ray_release(result); + } + + chunks++; + } + + fclose(f); + g_journal.in_replay = prev_in_replay; + + if (out_chunks) *out_chunks = chunks; + if (out_eval_errors) *out_eval_errors = errs; + if (out_status) *out_status = status; + + if (status == RAY_JREPLAY_OK) return RAY_OK; + if (status == RAY_JREPLAY_IO || + status == RAY_JREPLAY_OOM) return RAY_ERR_IO; + return RAY_ERR_DOMAIN; +} + +ray_err_t ray_journal_validate(const char* path, + int64_t* out_chunks, + int64_t* out_valid_bytes) +{ + if (out_chunks) *out_chunks = 0; + if (out_valid_bytes) *out_valid_bytes = 0; + + FILE* f = fopen(path, "rb"); + if (!f) return RAY_ERR_IO; + + int64_t chunks = 0; + int64_t valid_off = 0; + /* Reuse one growing buffer for payload reads — most logs hold + * many small entries plus the occasional large one, so growing on + * demand is simpler than trying to size up front. */ + uint8_t* buf = NULL; + int64_t cap = 0; + + for (;;) { + ray_ipc_header_t hdr; + size_t r = read_full(f, &hdr, sizeof(hdr)); + if (r == 0) break; /* clean EOF */ + if (r != sizeof(hdr)) break; + if (hdr.prefix != RAY_SERDE_PREFIX) break; + if (hdr.version != RAY_SERDE_WIRE_VERSION) break; + if (hdr.size <= 0 || hdr.size > 256LL*1024*1024) break; + + if (hdr.size > cap) { + uint8_t* tmp = (uint8_t*)ray_sys_alloc((size_t)hdr.size); + if (!tmp) break; /* OOM mid-validate */ + if (buf) ray_sys_free(buf); + buf = tmp; + cap = hdr.size; + } + /* Actually consume the payload bytes — fseek would silently + * succeed past EOF and we'd over-count valid frames on a + * truncated log. */ + if (read_full(f, buf, (size_t)hdr.size) != (size_t)hdr.size) break; + + valid_off += (int64_t)sizeof(hdr) + hdr.size; + chunks++; + } + if (buf) ray_sys_free(buf); + fclose(f); + + if (out_chunks) *out_chunks = chunks; + if (out_valid_bytes) *out_valid_bytes = valid_off; + return RAY_OK; +} + +/* Open .log in append mode after replay. */ +static ray_err_t open_log_for_append(void) { + g_journal.fp = fopen(g_journal.log_path, "ab"); + if (!g_journal.fp) return RAY_ERR_IO; + /* Disable stdio buffering: every fwrite must reach the OS buffer + * immediately so a SIGTERM (or any non-clean shutdown) still leaves + * the entry on disk. Without this, the default block-buffered FILE* + * keeps recent writes in user-space until 4 KB accumulates — a + * silent data-loss window that defeats the whole point of -l mode. + * In RAY_JOURNAL_SYNC mode we additionally fsync per write; here + * we just need the bytes to leave the process. */ + setvbuf(g_journal.fp, NULL, _IONBF, 0); + return RAY_OK; +} + +ray_err_t ray_journal_open(const char* base, ray_journal_mode_t mode) { + if (!base || !*base) return RAY_ERR_DOMAIN; + if (g_journal.fp) return RAY_ERR_DOMAIN; /* already open */ + + size_t blen = strlen(base); + if (blen + 5 >= sizeof(g_journal.base)) return RAY_ERR_DOMAIN; + + memcpy(g_journal.base, base, blen + 1); + g_journal.mode = mode; + if (!path_join_ext(g_journal.log_path, sizeof(g_journal.log_path), base, ".log")) + return RAY_ERR_DOMAIN; + + char qdb_path[RAY_JOURNAL_PATH_MAX]; + if (!path_join_ext(qdb_path, sizeof(qdb_path), base, ".qdb")) + return RAY_ERR_DOMAIN; + + /* 1. Snapshot — load .qdb if present. */ + if (file_exists(qdb_path)) { + bool prev_in_replay = g_journal.in_replay; + g_journal.in_replay = true; + ray_t* snap = ray_obj_load(qdb_path); + g_journal.in_replay = prev_in_replay; + + if (!snap || RAY_IS_ERR(snap)) { + const char* code = (snap && RAY_IS_ERR(snap)) ? ray_err_code(snap) : "io"; + fprintf(stderr, "log: ERROR failed to load snapshot %s (%s)\n", + qdb_path, code ? code : "io"); + if (snap) ray_error_free(snap); + return RAY_ERR_IO; + } + if (snap->type != RAY_DICT) { + fprintf(stderr, "log: ERROR snapshot %s is not a dict\n", qdb_path); + ray_release(snap); + return RAY_ERR_DOMAIN; + } + ray_t* keys = ray_dict_keys(snap); + ray_t* vals = ray_dict_vals(snap); + int64_t n = keys ? keys->len : 0; + int64_t bound = 0; + int64_t skipped = 0; + int64_t bind_errs = 0; + for (int64_t i = 0; i < n; i++) { + if (!keys || keys->type != RAY_SYM) { + fprintf(stderr, "log: WARN snapshot key vector has type %d, expected RAY_SYM — dropping %lld bindings\n", + keys ? (int)keys->type : -1, (long long)(n - i)); + skipped += n - i; + break; + } + int64_t sym_id = ((int64_t*)ray_data(keys))[i]; + ray_t* v = ray_list_get(vals, i); + if (!v) { + fprintf(stderr, "log: WARN snapshot value missing for sym %lld — skipping\n", + (long long)sym_id); + skipped++; + continue; + } + /* MUST go through ray_env_set, NOT ray_env_bind: the former + * flips the slot's user flag, the latter installs as builtin + * and the value silently drops out of the next snapshot's + * ray_env_list_user filter — silent corruption across two + * restarts. ray_env_set handles its own retain via + * env_bind_global_impl, so do NOT explicitly retain here + * (would leak one ref per binding). */ + ray_err_t e = ray_env_set(sym_id, v); + if (e != RAY_OK) { + fprintf(stderr, "log: WARN snapshot bind for sym %lld failed (%d)\n", + (long long)sym_id, (int)e); + bind_errs++; + continue; + } + bound++; + } + ray_release(snap); + fprintf(stderr, "log: loaded snapshot %s (%lld bound, %lld skipped, %lld errors)\n", + qdb_path, (long long)bound, (long long)skipped, (long long)bind_errs); + if (bind_errs > 0) { + /* Partial state is a footgun. The caller should treat this as + * fatal and either restore from backup or skip the snapshot. */ + fprintf(stderr, "log: ERROR snapshot load left env in a partially-applied state\n"); + return RAY_ERR_DOMAIN; + } + } + + /* 2. Log — replay .log if present. */ + if (file_exists(g_journal.log_path)) { + int64_t chunks = 0, errs = 0; + ray_jreplay_status_t status = RAY_JREPLAY_OK; + ray_journal_replay(g_journal.log_path, &chunks, &errs, &status); + switch (status) { + case RAY_JREPLAY_OK: { + fprintf(stderr, "log: replayed %lld entries (%lld eval errors) from %s\n", + (long long)chunks, (long long)errs, g_journal.log_path); + break; + } + case RAY_JREPLAY_BADTAIL: { + int64_t valid_chunks = 0, valid_bytes = 0; + ray_journal_validate(g_journal.log_path, &valid_chunks, &valid_bytes); + fprintf(stderr, + "log: ERROR badtail in %s after %lld entries (valid bytes = %lld)\n" + "log: hint: truncate the file at offset %lld to recover the\n" + "log: valid prefix, then restart\n", + g_journal.log_path, (long long)chunks, + (long long)valid_bytes, (long long)valid_bytes); + return RAY_ERR_DOMAIN; + } + case RAY_JREPLAY_DESER: + case RAY_JREPLAY_DECOMP: { + fprintf(stderr, + "log: ERROR replay failed at chunk %lld in %s: %s — framing\n" + "log: was intact so this is content/code mismatch, NOT\n" + "log: tail truncation. Do NOT truncate the log; either\n" + "log: fix the version skew or restore from .qdb backup.\n", + (long long)chunks, g_journal.log_path, + status == RAY_JREPLAY_DECOMP ? "decompression failed" : "deserialization failed"); + return RAY_ERR_DOMAIN; + } + case RAY_JREPLAY_OOM: + case RAY_JREPLAY_IO: { + fprintf(stderr, "log: ERROR replay aborted at chunk %lld in %s (%s)\n", + (long long)chunks, g_journal.log_path, + status == RAY_JREPLAY_OOM ? "out of memory" : "I/O failure"); + return RAY_ERR_IO; + } + } + } + + /* 3. Open log for append. */ + return open_log_for_append(); +} + +ray_err_t ray_journal_close(void) { + if (!g_journal.fp) return RAY_OK; + /* Check both fflush and fclose return — buffered ENOSPC slips + * through silently otherwise and the "best-effort durability at + * clean shutdown" promise becomes a lie. Even on failure we null + * the fp so the journal isn't left in a half-open zombie state. */ + int flush_rc = fflush(g_journal.fp); + int close_rc = fclose(g_journal.fp); + g_journal.fp = NULL; + if (flush_rc != 0 || close_rc != 0) { + fprintf(stderr, "log: ERROR journal close (flush rc=%d, close rc=%d)\n", + flush_rc, close_rc); + return RAY_ERR_IO; + } + return RAY_OK; +} + +ray_err_t ray_journal_sync(void) { + if (!g_journal.fp) return RAY_OK; + if (g_journal.mode == RAY_JOURNAL_SYNC) return RAY_OK; + if (fflush(g_journal.fp) != 0) return RAY_ERR_IO; +#ifndef RAY_OS_WINDOWS + if (fsync(fileno(g_journal.fp)) != 0) return RAY_ERR_IO; +#else + FlushFileBuffers((HANDLE)_get_osfhandle(_fileno(g_journal.fp))); +#endif + return RAY_OK; +} + +/* UTC ISO-8601 with safe filename chars (no ':'). */ +static void utc_stamp(char* buf, size_t bufsz) { + time_t t = time(NULL); + struct tm tm_; +#ifdef RAY_OS_WINDOWS + gmtime_s(&tm_, &t); +#else + gmtime_r(&t, &tm_); +#endif + strftime(buf, bufsz, "%Y.%m.%dT%H.%M.%SZ", &tm_); +} + +ray_err_t ray_journal_roll(void) { + if (!g_journal.fp) return RAY_ERR_DOMAIN; + + /* Build the archive name BEFORE closing the fp — if path build + * fails we can return without leaving the journal in a closed + * state that ray_journal_write_bytes silently no-ops on. */ + char stamp[64]; + utc_stamp(stamp, sizeof(stamp)); + char archive[RAY_JOURNAL_PATH_MAX]; + int n = snprintf(archive, sizeof(archive), "%s.%s.log", g_journal.base, stamp); + if (n <= 0 || (size_t)n >= sizeof(archive)) return RAY_ERR_DOMAIN; + + int flush_rc = fflush(g_journal.fp); + int close_rc = fclose(g_journal.fp); + g_journal.fp = NULL; + if (flush_rc != 0 || close_rc != 0) { + /* Don't rename a partial/possibly-corrupt log. Try to reopen + * for append so subsequent writes still land somewhere. */ + fprintf(stderr, "log: ERROR roll: pre-rename flush/close failed (flush=%d close=%d)\n", + flush_rc, close_rc); + (void)open_log_for_append(); /* best-effort restore; fp may stay NULL */ + return RAY_ERR_IO; + } + + if (ray_file_rename(g_journal.log_path, archive) != RAY_OK) { + /* Rename failed but the log file is still on disk under its + * original name. Reopen for append so we don't silently + * disable journaling for the rest of the process. */ + fprintf(stderr, "log: ERROR roll: rename %s -> %s failed\n", + g_journal.log_path, archive); + (void)open_log_for_append(); + return RAY_ERR_IO; + } + + /* Durability: the rename itself is atomic but the directory entry + * may not survive a power loss without a parent fsync. Best- + * effort — log if it fails but don't abort, the rename did + * succeed. */ + (void)ray_file_sync_dir(archive); + + return open_log_for_append(); +} + +ray_err_t ray_journal_snapshot(void) { + if (!g_journal.fp) return RAY_ERR_DOMAIN; + + /* Enumerate ONLY user-defined globals (slots last written via + * ray_env_set). Builtin function objects must NOT enter the + * snapshot — they hold absolute pointers from the prior process + * and would dangle on reload. ray_env_list_user is the one-bit- + * per-slot filter maintained by env.c. */ + int32_t cap = ray_env_global_count(); + if (cap <= 0) cap = 1; + int64_t* sym_ids = (int64_t*)ray_sys_alloc((size_t)cap * sizeof(int64_t)); + ray_t** vals_buf = (ray_t**) ray_sys_alloc((size_t)cap * sizeof(ray_t*)); + if (!sym_ids || !vals_buf) { + if (sym_ids) ray_sys_free(sym_ids); + if (vals_buf) ray_sys_free(vals_buf); + return RAY_ERR_OOM; + } + int32_t kept = ray_env_list_user(sym_ids, vals_buf, cap); + + ray_t* keys = ray_sym_vec_new(RAY_SYM_W64, kept); + if (!keys || RAY_IS_ERR(keys)) { + if (keys && RAY_IS_ERR(keys)) ray_error_free(keys); + ray_sys_free(sym_ids); ray_sys_free(vals_buf); + return RAY_ERR_OOM; + } + ray_t* vals = ray_list_new(kept); + if (!vals || RAY_IS_ERR(vals)) { + if (vals && RAY_IS_ERR(vals)) ray_error_free(vals); + ray_release(keys); + ray_sys_free(sym_ids); ray_sys_free(vals_buf); + return RAY_ERR_OOM; + } + for (int32_t i = 0; i < kept; i++) { + /* ray_vec_append returns an error sentinel on failure but the + * input `keys` was either mutated in place (rc==1, no cow) or + * cow'd and released internally — either way the caller still + * owns the original pointer which is now stale. Take the + * pre-call pointer so we can release whichever survived. */ + ray_t* prev_keys = keys; + keys = ray_vec_append(keys, &sym_ids[i]); + if (RAY_IS_ERR(keys)) { + ray_error_free(keys); + ray_release(prev_keys); + ray_release(vals); + ray_sys_free(sym_ids); ray_sys_free(vals_buf); + return RAY_ERR_OOM; + } + ray_t* prev_vals = vals; + vals = ray_list_append(vals, vals_buf[i]); + if (RAY_IS_ERR(vals)) { + ray_error_free(vals); + ray_release(prev_vals); + ray_release(keys); + ray_sys_free(sym_ids); ray_sys_free(vals_buf); + return RAY_ERR_OOM; + } + } + ray_sys_free(sym_ids); + ray_sys_free(vals_buf); + + ray_t* snap = ray_dict_new(keys, vals); + if (!snap || RAY_IS_ERR(snap)) { + if (snap && RAY_IS_ERR(snap)) ray_error_free(snap); + return RAY_ERR_OOM; + } + + char qdb_path[RAY_JOURNAL_PATH_MAX]; + char qdb_tmp[RAY_JOURNAL_PATH_MAX]; + if (!path_join_ext(qdb_path, sizeof(qdb_path), g_journal.base, ".qdb") || + !path_join_ext(qdb_tmp, sizeof(qdb_tmp), g_journal.base, ".qdb.tmp")) { + ray_release(snap); + return RAY_ERR_DOMAIN; + } + + /* ray_obj_save writes prefix-headered bytes (same wire framing). */ + ray_err_t e = ray_obj_save(snap, qdb_tmp); + ray_release(snap); + if (e != RAY_OK) { + /* Don't leave a half-written .qdb.tmp behind to confuse the + * next snapshot or the operator. */ + remove(qdb_tmp); + return e; + } + + if (ray_file_rename(qdb_tmp, qdb_path) != RAY_OK) { + remove(qdb_tmp); + return RAY_ERR_IO; + } + /* Parent-dir fsync: rename(2) is atomic but the directory entry + * isn't durable across a power loss without it. Best-effort. */ + (void)ray_file_sync_dir(qdb_path); + + return ray_journal_roll(); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/store/journal.h b/crates/rayforce-sys/vendor/rayforce/src/store/journal.h new file mode 100644 index 0000000..1f336c5 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/store/journal.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Transaction-log journaling — q/kdb's `-l` / `-L` ported to Rayforce. + * + * Wire format: every entry is a complete IPC message (16-byte + * ray_ipc_header_t followed by serialized payload), so log frames + * share parser code with the live IPC path. Concatenation by `cat` + * is valid by construction. + * + * Open with ray_journal_open(base, mode): + * 1. If .qdb exists, load it and bind every key into the global env. + * 2. If .log exists, replay it (badtail is fatal, eval errors warn). + * 3. Open .log for append. + * + * After open, the IPC dispatch hook (eval_payload in core/ipc.c) calls + * ray_journal_write_bytes() for every inbound sync message before + * evaluating it. Async messages and responses are not journaled, + * matching q's policy of journaling only the .z.ps stream. + * + * Replay is single-threaded by construction (it runs from main, before + * the poll loop starts) so the module is intentionally not thread-safe; + * the IPC dispatch loop is also single-threaded for eval_payload, so the + * shared file handle does not need a mutex either. + */ +#ifndef RAY_JOURNAL_H +#define RAY_JOURNAL_H + +#include +#include "store/serde.h" + +typedef enum { + RAY_JOURNAL_OFF = 0, + RAY_JOURNAL_ASYNC = 1, /* -l: write, no per-message fsync */ + RAY_JOURNAL_SYNC = 2, /* -L: write + fsync per message */ +} ray_journal_mode_t; + +typedef enum { + RAY_JREPLAY_OK = 0, + RAY_JREPLAY_BADTAIL = 1, /* truncated frame / bad magic / version mismatch — framing broken */ + RAY_JREPLAY_IO = 2, /* file open / read I/O failure */ + RAY_JREPLAY_OOM = 3, /* allocation failed mid-replay — transient, retryable */ + RAY_JREPLAY_DESER = 4, /* header valid but ray_de_raw rejected the payload */ + RAY_JREPLAY_DECOMP = 5, /* compressed payload, but decompression failed */ +} ray_jreplay_status_t; + +/* Open the journal: load .qdb, replay .log, open log for append. + * Returns RAY_OK on success. Prints a one-line summary to stderr + * ("log: replayed N entries (M eval errors)"). Returns RAY_ERR_DOMAIN + * if the log replay hits a badtail; the caller should print a recovery + * hint and exit non-zero. */ +ray_err_t ray_journal_open(const char* base, ray_journal_mode_t mode); + +/* True iff a journal is currently open for append. */ +bool ray_journal_is_open(void); + +/* Append one entry to the active journal. No-op (returns RAY_OK) if + * no journal is open or if a replay is currently in progress (we do + * NOT recursively log replayed messages even if .log.write is called + * from a replayed entry). In RAY_JOURNAL_SYNC mode, fflush + fsync + * before returning. */ +ray_err_t ray_journal_write_bytes(const ray_ipc_header_t* hdr, + const uint8_t* payload, + int64_t payload_len); + +/* Replay a log file, evaluating each entry in order. Sets *out_chunks + * to entries successfully replayed and *out_eval_errors to entries that + * deserialized cleanly but raised an error during ray_eval (those are + * skipped with a stderr warning, not fatal — framing was intact). + * *out_status is RAY_JREPLAY_OK on a clean tail or RAY_JREPLAY_BADTAIL + * if a truncated/corrupt frame was found. */ +ray_err_t ray_journal_replay(const char* path, + int64_t* out_chunks, + int64_t* out_eval_errors, + ray_jreplay_status_t* out_status); + +/* Validate (parse but don't eval) — q's `-11!(-2; file)` analogue. + * *out_chunks counts valid entries; *out_valid_bytes is the byte + * offset of the first bad header (== file size on a clean log). */ +ray_err_t ray_journal_validate(const char* path, + int64_t* out_chunks, + int64_t* out_valid_bytes); + +/* Close the active log, rename it to ..log, open a + * fresh empty .log for append. Errors if no journal is open. */ +ray_err_t ray_journal_roll(void); + +/* Serialize every user (non-reserved) global env binding into a dict and + * write it as a single entry to .qdb.tmp, then atomic-rename to + * .qdb, then call ray_journal_roll. After this, the .log file + * is fresh and a future restart loads .qdb instead of replaying the + * old (now archived) log. */ +ray_err_t ray_journal_snapshot(void); + +/* Force fflush + fsync on the active journal. No-op (RAY_OK) when no + * journal is open or when in RAY_JOURNAL_SYNC mode (where every write + * already syncs). */ +ray_err_t ray_journal_sync(void); + +/* Close the active journal. No-op if none is open. */ +ray_err_t ray_journal_close(void); + +#endif /* RAY_JOURNAL_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/store/meta.c b/crates/rayforce-sys/vendor/rayforce/src/store/meta.c new file mode 100644 index 0000000..d4889f3 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/store/meta.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "meta.h" +#include "store/col.h" +#include +#include + +/* -------------------------------------------------------------------------- + * .d file: serialized I64 vector of column name symbol IDs + * + * ray_meta_save_d: write schema vector to .d file + * ray_meta_load_d: read .d file back as I64 vector + * -------------------------------------------------------------------------- */ + +ray_err_t ray_meta_save_d(ray_t* schema, const char* path) { + if (!schema || RAY_IS_ERR(schema)) return RAY_ERR_TYPE; + return ray_col_save(schema, path); +} + +ray_t* ray_meta_load_d(const char* path) { + return ray_col_load(path); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/store/meta.h b/crates/rayforce-sys/vendor/rayforce/src/store/meta.h new file mode 100644 index 0000000..f76065c --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/store/meta.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_META_H +#define RAY_META_H + +#include + +/* Metadata */ +ray_err_t ray_meta_save_d(ray_t* schema, const char* path); +ray_t* ray_meta_load_d(const char* path); + +#endif /* RAY_META_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/store/part.c b/crates/rayforce-sys/vendor/rayforce/src/store/part.c new file mode 100644 index 0000000..0646ddb --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/store/part.c @@ -0,0 +1,503 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if defined(__APPLE__) +#define _DARWIN_C_SOURCE +#elif !defined(RAY_OS_WINDOWS) +#define _GNU_SOURCE +#endif +#include "part.h" +#include "core/platform.h" +#include "mem/sys.h" +#include "ops/ops.h" +#include "store/splay.h" +#include "table/sym.h" +#include +#include +#include +#include + +/* Validate YYYY.MM.DD format: exactly 10 chars, dots at pos 4/7, + * month 01-12, day 01-31. */ +static bool is_date_dir(const char* name) { + if (strlen(name) != 10) return false; + if (name[4] != '.' || name[7] != '.') return false; + for (int i = 0; i < 10; i++) { + if (i == 4 || i == 7) continue; + if (name[i] < '0' || name[i] > '9') return false; + } + int month = (name[5] - '0') * 10 + (name[6] - '0'); + int day = (name[8] - '0') * 10 + (name[9] - '0'); + return month >= 1 && month <= 12 && day >= 1 && day <= 31; +} + +/* Check if string is a pure integer (digits only, possibly with leading minus). */ +static bool is_integer_str(const char* s) { + if (!*s) return false; + if (*s == '-') s++; + if (!*s) return false; + for (; *s; s++) + if (*s < '0' || *s > '9') return false; + return true; +} + +/* Infer MAPCOMMON sub-type from partition directory names. */ +static uint8_t infer_mc_type(char** part_dirs, int64_t part_count) { + bool all_date = true, all_int = true; + for (int64_t i = 0; i < part_count; i++) { + if (all_date && !is_date_dir(part_dirs[i])) all_date = false; + if (all_int && !is_integer_str(part_dirs[i])) all_int = false; + if (!all_date && !all_int) break; + } + if (all_date) return RAY_MC_DATE; + if (all_int) return RAY_MC_I64; + return RAY_MC_SYM; +} + +/* Parse "YYYY.MM.DD" → days since 2000-01-01 (Rayforce epoch). + * Uses inverse of Hinnant's civil_from_days algorithm (same as exec.c). */ +static int32_t parse_date_dir(const char* name) { + int64_t y = (name[0]-'0')*1000 + (name[1]-'0')*100 + + (name[2]-'0')*10 + (name[3]-'0'); + int64_t m = (name[5]-'0')*10 + (name[6]-'0'); + int64_t d = (name[8]-'0')*10 + (name[9]-'0'); + y -= (m <= 2); + int64_t era = (y >= 0 ? y : y - 399) / 400; + uint64_t yoe = (uint64_t)(y - era * 400); + uint64_t doy = (153 * (m > 2 ? (uint64_t)m-3 : (uint64_t)m+9) + 2)/5 + (uint64_t)d - 1; + uint64_t doe = yoe*365 + yoe/4 - yoe/100 + doy; + return (int32_t)(era * 146097 + (int64_t)doe - 719468 - 10957); +} + +/* Parse integer string → int64_t. Caller guarantees is_integer_str(). */ +static int64_t parse_int_dir(const char* s) { + int neg = 0; + if (*s == '-') { neg = 1; s++; } + int64_t v = 0; + for (; *s; s++) v = v * 10 + (*s - '0'); + return neg ? -v : v; +} + +/* -------------------------------------------------------------------------- + * Partitioned table: date-partitioned directory of splayed tables + * + * Format: + * db_root/sym — global symbol intern table + * db_root/YYYY.MM.DD/ — partition directories + * db_root/YYYY.MM.DD/table — splayed table per partition + * + * No symlink check: local-trust file format; path traversal checks + * cover main attack vector. + * -------------------------------------------------------------------------- */ + +/* -------------------------------------------------------------------------- + * collect_part_dirs — scan db_root for partition directories + * + * Collects directory names that match digit/dot pattern, bubble-sorts them. + * If skip_sym is true, entries named "sym" are skipped. + * Caller must free each entry with ray_sys_free and the array itself. + * -------------------------------------------------------------------------- */ + +static ray_err_t collect_part_dirs(const char* db_root, char*** out_dirs, + int64_t* out_count, bool skip_sym) { + DIR* d = opendir(db_root); + if (!d) return RAY_ERR_IO; + + char** part_dirs = NULL; + int64_t part_count = 0; + int64_t part_cap = 0; + + struct dirent* ent; + while ((ent = readdir(d)) != NULL) { + if (ent->d_name[0] == '.') continue; + if (skip_sym && strcmp(ent->d_name, "sym") == 0) continue; + + /* Partition directory name format validation is intentionally loose: + * accepts any sequence of digits and dots (e.g. "2024.01.15"). + * Invalid entries fail during splay load and are caught there. */ + bool valid = (ent->d_name[0] != '\0'); + for (const char* c = ent->d_name; *c; c++) { + if (*c == '.' || (*c >= '0' && *c <= '9')) continue; + valid = false; break; + } + if (!valid) continue; + + if (part_count >= part_cap) { + part_cap = part_cap == 0 ? 16 : part_cap * 2; + char** tmp = (char**)ray_sys_realloc(part_dirs, (size_t)part_cap * sizeof(char*)); + if (!tmp) break; + part_dirs = tmp; + } + char* dup = ray_sys_strdup(ent->d_name); + if (!dup) break; + part_dirs[part_count++] = dup; + } + closedir(d); + + if (part_count == 0) { + ray_sys_free(part_dirs); + return RAY_ERR_IO; + } + + /* Sort partition names for deterministic order. + * O(n^2) but partition count is typically small (< 1000 daily partitions). */ + for (int64_t i = 0; i < part_count - 1; i++) { + for (int64_t j = i + 1; j < part_count; j++) { + if (strcmp(part_dirs[i], part_dirs[j]) > 0) { + char* tmp = part_dirs[i]; + part_dirs[i] = part_dirs[j]; + part_dirs[j] = tmp; + } + } + } + + *out_dirs = part_dirs; + *out_count = part_count; + return RAY_OK; +} + +/* -------------------------------------------------------------------------- + * ray_part_load — load a partitioned table + * + * Discovers partition directories, loads each splayed table, and + * concatenates columns across partitions. + * -------------------------------------------------------------------------- */ + +ray_t* ray_part_load(const char* db_root, const char* table_name) { + if (!db_root || !table_name) return ray_error("io", NULL); + + /* Validate table_name: no path separators or traversal */ + if (strchr(table_name, '/') || strchr(table_name, '\\') || + strstr(table_name, "..") || table_name[0] == '.') + return ray_error("io", NULL); + + /* Scan db_root for partition directories */ + char** part_dirs = NULL; + int64_t part_count = 0; + ray_err_t collect_err = collect_part_dirs(db_root, &part_dirs, &part_count, false); + if (collect_err != RAY_OK) return ray_error("io", NULL); + + /* Build sym_path for this db_root */ + char sym_path[1024]; + int sn = snprintf(sym_path, sizeof(sym_path), "%s/sym", db_root); + if (sn < 0 || (size_t)sn >= sizeof(sym_path)) { + for (int64_t i = 0; i < part_count; i++) ray_sys_free(part_dirs[i]); + ray_sys_free(part_dirs); + return ray_error("io", NULL); + } + + /* Load first partition to get schema. */ + char path[1024]; + int n = snprintf(path, sizeof(path), "%s/%s/%s", db_root, part_dirs[0], table_name); + if (n < 0 || (size_t)n >= sizeof(path)) { + for (int64_t i = 0; i < part_count; i++) ray_sys_free(part_dirs[i]); + ray_sys_free(part_dirs); + return ray_error("io", NULL); + } + ray_t* first = ray_splay_load(path, sym_path); + if (!first || RAY_IS_ERR(first)) { + for (int64_t i = 0; i < part_count; i++) ray_sys_free(part_dirs[i]); + ray_sys_free(part_dirs); + return first; + } + + if (part_count == 1) { + for (int64_t i = 0; i < part_count; i++) ray_sys_free(part_dirs[i]); + ray_sys_free(part_dirs); + return first; + } + + /* Load remaining partitions and concatenate */ + int64_t ncols = ray_table_ncols(first); + /* Accumulate rows from all partitions */ + ray_t** all_dfs = (ray_t**)ray_sys_alloc((size_t)part_count * sizeof(ray_t*)); + if (!all_dfs) { + ray_release(first); + for (int64_t i = 0; i < part_count; i++) ray_sys_free(part_dirs[i]); + ray_sys_free(part_dirs); + return ray_error("oom", NULL); + } + all_dfs[0] = first; + + int64_t fail_count = 0; + for (int64_t p = 1; p < part_count; p++) { + n = snprintf(path, sizeof(path), "%s/%s/%s", db_root, part_dirs[p], table_name); + if (n < 0 || (size_t)n >= sizeof(path)) { all_dfs[p] = NULL; fail_count++; continue; } + all_dfs[p] = ray_splay_load(path, NULL); + if (!all_dfs[p] || RAY_IS_ERR(all_dfs[p])) { + all_dfs[p] = NULL; + fail_count++; + } + } + if (fail_count > 0) { + /* One or more partition splay loads failed -- abort entire load */ + for (int64_t p = 0; p < part_count; p++) { + if (all_dfs[p] && !RAY_IS_ERR(all_dfs[p])) + ray_release(all_dfs[p]); + ray_sys_free(part_dirs[p]); + } + ray_sys_free(all_dfs); + ray_sys_free(part_dirs); + return ray_error("io", NULL); + } + + /* Build combined table by concatenating columns */ + ray_t* result = ray_table_new(ncols); + for (int64_t c = 0; c < ncols; c++) { + int64_t name_id = ray_table_col_name(first, c); + ray_t* combined = ray_table_get_col_idx(first, c); + if (!combined) continue; + ray_retain(combined); + + for (int64_t p = 1; p < part_count; p++) { + if (!all_dfs[p] || RAY_IS_ERR(all_dfs[p])) continue; + ray_t* part_col = ray_table_get_col_idx(all_dfs[p], c); + if (part_col) { + ray_t* new_combined = ray_vec_concat(combined, part_col); + ray_release(combined); + if (!new_combined || RAY_IS_ERR(new_combined)) { + combined = NULL; + break; + } + combined = new_combined; + } + } + + if (!combined) { + ray_release(result); + result = NULL; + break; + } + result = ray_table_add_col(result, name_id, combined); + ray_release(combined); + if (!result || RAY_IS_ERR(result)) break; + } + + /* Cleanup */ + for (int64_t p = 0; p < part_count; p++) { + if (all_dfs[p] && !RAY_IS_ERR(all_dfs[p])) + ray_release(all_dfs[p]); + ray_sys_free(part_dirs[p]); + } + ray_sys_free(all_dfs); + ray_sys_free(part_dirs); + + return result ? result : ray_error("oom", NULL); +} + +/* -------------------------------------------------------------------------- + * ray_read_parted — zero-copy open of a partitioned table + * + * Builds parted columns (RAY_PARTED_BASE + base_type) where each segment + * is an mmap'd vector from ray_read_splayed. Also builds a MAPCOMMON column + * with partition key names and row counts. + * -------------------------------------------------------------------------- */ + +ray_t* ray_read_parted(const char* db_root, const char* table_name) { + if (!db_root || !table_name) return ray_error("io", NULL); + + /* Validate table_name: no path separators or traversal */ + if (strchr(table_name, '/') || strchr(table_name, '\\') || + strstr(table_name, "..") || table_name[0] == '.') + return ray_error("io", NULL); + + /* Build sym_path. */ + char sym_path[1024]; + int sn = snprintf(sym_path, sizeof(sym_path), "%s/sym", db_root); + if (sn < 0 || (size_t)sn >= sizeof(sym_path)) + return ray_error("io", NULL); + + /* Load global symfile if present. Tables without RAY_SYM columns + * never produce a global symfile (.db.splayed.set only writes per-table + * sym files inside the leaf splayed dir), so a missing root-level + * symfile is normal — not an error. */ + struct stat sym_st; + if (stat(sym_path, &sym_st) == 0) { + ray_err_t sym_err = ray_sym_load(sym_path); + if (sym_err != RAY_OK) return ray_error(ray_err_code_str(sym_err), NULL); + } + + /* Scan db_root for partition directories (skip "sym" entry) */ + char** part_dirs = NULL; + int64_t part_count = 0; + ray_err_t collect_err = collect_part_dirs(db_root, &part_dirs, &part_count, true); + if (collect_err != RAY_OK) return ray_error("io", NULL); + + /* Open each partition via ray_read_splayed */ + ray_t** part_tables = (ray_t**)ray_sys_alloc((size_t)part_count * sizeof(ray_t*)); + if (!part_tables) goto fail_dirs; + memset(part_tables, 0, (size_t)part_count * sizeof(ray_t*)); + + char path[1024]; + for (int64_t p = 0; p < part_count; p++) { + int pn = snprintf(path, sizeof(path), "%s/%s/%s", db_root, part_dirs[p], table_name); + if (pn < 0 || (size_t)pn >= sizeof(path)) { + part_tables[p] = NULL; + goto fail_tables; + } + part_tables[p] = ray_read_splayed(path, NULL); + if (!part_tables[p] || RAY_IS_ERR(part_tables[p])) { + part_tables[p] = NULL; + goto fail_tables; + } + } + + /* Get schema from first partition */ + int64_t ncols = ray_table_ncols(part_tables[0]); + if (ncols <= 0) goto fail_tables; + + /* Infer MAPCOMMON sub-type from partition directory names */ + uint8_t mc_type = infer_mc_type(part_dirs, part_count); + + /* Build result table: 1 MAPCOMMON + ncols data columns */ + ray_t* result = ray_table_new(ncols + 2); + if (!result || RAY_IS_ERR(result)) goto fail_tables; + + /* ---- MAPCOMMON column (first) ---- */ + { + /* key_values type matches inferred partition key type */ + int8_t kv_type = (mc_type == RAY_MC_DATE) ? RAY_DATE + : (mc_type == RAY_MC_I64) ? RAY_I64 + : RAY_SYM; + ray_t* key_values = ray_vec_new(kv_type, part_count); + ray_t* row_counts = ray_vec_new(RAY_I64, part_count); + if (!key_values || RAY_IS_ERR(key_values) || + !row_counts || RAY_IS_ERR(row_counts)) { + if (key_values && !RAY_IS_ERR(key_values)) ray_release(key_values); + if (row_counts && !RAY_IS_ERR(row_counts)) ray_release(row_counts); + ray_release(result); + goto fail_tables; + } + + int64_t* rc_data = (int64_t*)ray_data(row_counts); + if (mc_type == RAY_MC_DATE) { + int32_t* kv_data = (int32_t*)ray_data(key_values); + for (int64_t p = 0; p < part_count; p++) { + kv_data[p] = parse_date_dir(part_dirs[p]); + rc_data[p] = ray_table_nrows(part_tables[p]); + } + } else if (mc_type == RAY_MC_I64) { + int64_t* kv_data = (int64_t*)ray_data(key_values); + for (int64_t p = 0; p < part_count; p++) { + kv_data[p] = parse_int_dir(part_dirs[p]); + rc_data[p] = ray_table_nrows(part_tables[p]); + } + } else { + int64_t* kv_data = (int64_t*)ray_data(key_values); + for (int64_t p = 0; p < part_count; p++) { + kv_data[p] = ray_sym_intern(part_dirs[p], strlen(part_dirs[p])); + rc_data[p] = ray_table_nrows(part_tables[p]); + } + } + key_values->len = part_count; + row_counts->len = part_count; + + ray_t* mapcommon = ray_alloc(2 * sizeof(ray_t*)); + if (!mapcommon || RAY_IS_ERR(mapcommon)) { + ray_release(key_values); + ray_release(row_counts); + ray_release(result); + goto fail_tables; + } + mapcommon->type = RAY_MAPCOMMON; + mapcommon->len = 2; + mapcommon->attrs = mc_type; + memset(mapcommon->nullmap, 0, 16); + + ray_t** mc_ptrs = (ray_t**)ray_data(mapcommon); + mc_ptrs[0] = key_values; ray_retain(key_values); + mc_ptrs[1] = row_counts; ray_retain(row_counts); + + const char* mc_name = (mc_type == RAY_MC_DATE) ? "date" : "part"; + int64_t part_name_id = ray_sym_intern(mc_name, strlen(mc_name)); + result = ray_table_add_col(result, part_name_id, mapcommon); + if (!result || RAY_IS_ERR(result)) { + ray_release(mapcommon); + ray_release(key_values); + ray_release(row_counts); + goto fail_tables; + } + + ray_release(mapcommon); + ray_release(key_values); + ray_release(row_counts); + } + + /* ---- Data columns (after MAPCOMMON) ---- */ + for (int64_t c = 0; c < ncols; c++) { + int64_t name_id = ray_table_col_name(part_tables[0], c); + ray_t* first_seg = ray_table_get_col_idx(part_tables[0], c); + if (!first_seg) continue; + + ray_t* parted = ray_alloc((size_t)part_count * sizeof(ray_t*)); + if (!parted || RAY_IS_ERR(parted)) { + ray_release(result); + goto fail_tables; + } + parted->type = RAY_PARTED_BASE + first_seg->type; + parted->len = part_count; + parted->attrs = 0; + memset(parted->nullmap, 0, 16); + + ray_t** segs = (ray_t**)ray_data(parted); + for (int64_t p = 0; p < part_count; p++) { + ray_t* seg = ray_table_get_col_idx(part_tables[p], c); + if (!seg) { + segs[p] = NULL; + continue; + } + ray_retain(seg); + segs[p] = seg; + ray_vm_advise_willneed(ray_data(seg), + (size_t)seg->len * ray_sym_elem_size(seg->type, seg->attrs)); + } + + result = ray_table_add_col(result, name_id, parted); + ray_release(parted); + if (!result || RAY_IS_ERR(result)) goto fail_tables; + } + + /* Release partition sub-tables (segment vectors survive via retain) */ + for (int64_t p = 0; p < part_count; p++) { + if (part_tables[p]) ray_release(part_tables[p]); + ray_sys_free(part_dirs[p]); + } + ray_sys_free(part_tables); + ray_sys_free(part_dirs); + + return result; + +fail_tables: + for (int64_t p = 0; p < part_count; p++) { + if (part_tables[p] && !RAY_IS_ERR(part_tables[p])) + ray_release(part_tables[p]); + } + ray_sys_free(part_tables); + +fail_dirs: + for (int64_t p = 0; p < part_count; p++) + ray_sys_free(part_dirs[p]); + ray_sys_free(part_dirs); + + return ray_error("io", NULL); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/store/part.h b/crates/rayforce-sys/vendor/rayforce/src/store/part.h new file mode 100644 index 0000000..aef3b46 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/store/part.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_PART_H +#define RAY_PART_H + +#include + +/* Partitioned table */ +ray_t* ray_part_load(const char* db_root, const char* table_name); +ray_t* ray_read_parted(const char* db_root, const char* table_name); + +#endif /* RAY_PART_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/store/serde.c b/crates/rayforce-sys/vendor/rayforce/src/store/serde.c new file mode 100644 index 0000000..0c27da1 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/store/serde.c @@ -0,0 +1,984 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_OS_WINDOWS +# define _GNU_SOURCE /* fileno() for fsync-after-fwrite below */ +#endif + +#include "serde.h" +#include "store/col.h" +#include "store/fileio.h" +#include "core/types.h" +#include "mem/heap.h" +#include "vec/str.h" +#include "vec/vec.h" + +#ifndef RAY_OS_WINDOWS +# include +#endif +#include "table/sym.h" +#include "lang/env.h" +#include +#include + +/* -------------------------------------------------------------------------- + * Wire format: + * + * byte 0: type tag (int8_t — negative = atom, positive = vector/compound) + * + * Atoms (type < 0): + * BOOL/U8: 1 byte value + * I16: 2 bytes + * I32/DATE/TIME: 4 bytes + * F32: 4 bytes + * I64/TIMESTAMP: 8 bytes + * F64: 8 bytes + * SYM: null-terminated string (interned on deserialize) + * GUID: 16 bytes + * STR: i64 length + raw bytes (no null terminator) + * + * Vectors (type > 0): + * attrs byte + i64 length + element data + * SYM vector: each element as null-terminated string + * STR vector: each element as i64 length + raw bytes + * LIST: each element recursively serialized + * + * TABLE/DICT: attrs byte + keys(recursive) + values(recursive) + * LAMBDA: attrs byte + params(recursive) + body(recursive) + * UNARY/BINARY/VARY: function name as null-terminated string + * ERROR: 8-byte sdata (packed error code) + * NULL (type=0 with len=0): just the type byte + * -------------------------------------------------------------------------- */ + +/* Helper: strlen with bounds */ +static size_t safe_strlen(const uint8_t* buf, int64_t max) { + for (int64_t i = 0; i < max; i++) + if (buf[i] == 0) return (size_t)i; + return (size_t)max; +} + +/* Null bitmap size for a vector (0 if no nulls) */ +static int64_t null_bitmap_size(ray_t* v) { + if (!(v->attrs & RAY_ATTR_HAS_NULLS)) return 0; + return (v->len + 7) / 8; +} + +/* Write null bitmap bytes into buf. Returns bytes written. + * Uses ray_vec_nullmap_bytes so HAS_INDEX, slice, ext, and inline storage + * forms all serialize the correct bits. bit_offset is non-zero only for + * slices, which (per pre-existing serde behaviour) are saved as if they + * had no nulls — null_bitmap_size returns 0 since the slice's own attrs + * lack HAS_NULLS — so we never reach this with off>0. */ +static int64_t ser_null_bitmap(uint8_t* buf, ray_t* v) { + int64_t bsz = null_bitmap_size(v); + if (bsz <= 0) return 0; + + int64_t bit_off = 0, len_bits = 0; + const uint8_t* bits = ray_vec_nullmap_bytes(v, &bit_off, &len_bits); + if (!bits || bit_off != 0) { + memset(buf, 0, (size_t)bsz); + return bsz; + } + int64_t avail_bytes = (len_bits + 7) / 8; + int64_t copy = bsz < avail_bytes ? bsz : avail_bytes; + memcpy(buf, bits, (size_t)copy); + if (copy < bsz) memset(buf + copy, 0, (size_t)(bsz - copy)); + return bsz; +} + +/* Restore null bitmap from buf into vector. Returns bytes consumed. */ +static int64_t de_null_bitmap(const uint8_t* buf, int64_t avail, ray_t* v) { + int64_t bsz = (v->len + 7) / 8; + if (avail < bsz) return -1; + + v->attrs |= RAY_ATTR_HAS_NULLS; + + if (v->type == RAY_STR || v->len > 128) { + /* Must use external nullmap (STR always, others when > 128 elements) */ + ray_t* ext = ray_vec_new(RAY_U8, bsz); + if (!ext || RAY_IS_ERR(ext)) return -1; + ext->len = bsz; + memcpy(ray_data(ext), buf, (size_t)bsz); + v->attrs |= RAY_ATTR_NULLMAP_EXT; + v->ext_nullmap = ext; + } else { + /* Inline nullmap */ + memcpy(v->nullmap, buf, (size_t)bsz); + } + return bsz; +} + +/* -------------------------------------------------------------------------- + * ray_serde_size — calculate serialized size (excluding IPC header) + * -------------------------------------------------------------------------- */ + +int64_t ray_serde_size(ray_t* obj) { + if (!obj) return 1; /* RAY_SERDE_NULL marker */ + if (RAY_IS_ERR(obj)) return 1 + 8; /* type + sdata */ + if (RAY_IS_NULL(obj)) return 1; /* just the null type byte */ + + int8_t type = obj->type; + + /* Atoms (negative type). Format: type(1) + flags(1) + value-bytes. + * `flags` carries the typed-null bit so a deserialize round-trip + * restores 0Nl/0Nf/0Nd/0Nt etc. instead of decoding the zero-value + * payload as a plain atom (see ray_typed_null / RAY_ATOM_IS_NULL). */ + if (type < 0) { + int8_t base = -type; + switch (base) { + case RAY_BOOL: + case RAY_U8: return 1 + 1 + 1; + case RAY_I16: return 1 + 1 + 2; + case RAY_I32: + case RAY_DATE: + case RAY_TIME: + case RAY_F32: return 1 + 1 + 4; + case RAY_I64: + case RAY_TIMESTAMP: + case RAY_F64: return 1 + 1 + 8; + case RAY_GUID: return 1 + 1 + 16; + case RAY_SYM: { + ray_t* s = ray_sym_str(obj->i64); + return 1 + 1 + (s ? (int64_t)ray_str_len(s) : 0) + 1; /* +1 for null terminator */ + } + case RAY_STR: { + return 1 + 1 + 8 + (int64_t)ray_str_len(obj); + } + default: return 0; + } + } + + /* NULL object: type=LIST with len=0, but we check for actual NULL semantics */ + + /* Vectors — format: type(1) + attrs(1) + len(8) + data + nullmap */ + int64_t nbm = null_bitmap_size(obj); + + /* Overflow guard: worst case is GUID at 16 bytes/elem */ + if (obj->len > (INT64_MAX - 32) / 16) return -1; + + switch (type) { + case RAY_BOOL: + case RAY_U8: return 1 + 1 + 8 + obj->len + nbm; + case RAY_I16: return 1 + 1 + 8 + obj->len * 2 + nbm; + case RAY_I32: + case RAY_DATE: + case RAY_TIME: + case RAY_F32: return 1 + 1 + 8 + obj->len * 4 + nbm; + case RAY_I64: + case RAY_TIMESTAMP: + case RAY_F64: return 1 + 1 + 8 + obj->len * 8 + nbm; + case RAY_GUID: return 1 + 1 + 8 + obj->len * 16 + nbm; + case RAY_SYM: { + int64_t size = 1 + 1 + 8; + int64_t* ids = (int64_t*)ray_data(obj); + for (int64_t i = 0; i < obj->len; i++) { + ray_t* s = ray_sym_str(ids[i]); + size += (s ? (int64_t)ray_str_len(s) : 0) + 1; + } + return size + nbm; + } + case RAY_STR: { + int64_t size = 1 + 1 + 8; + ray_str_t* elems = (ray_str_t*)ray_data(obj); + for (int64_t i = 0; i < obj->len; i++) + size += 8 + elems[i].len; /* i64 length + raw bytes */ + return size + nbm; + } + case RAY_LIST: { + int64_t size = 1 + 1 + 8; + ray_t** elems = (ray_t**)ray_data(obj); + for (int64_t i = 0; i < obj->len; i++) + size += ray_serde_size(elems[i]); + return size; + } + case RAY_TABLE: { + /* type + attrs + schema(recursive) + cols(recursive RAY_LIST) */ + ray_t** slots = (ray_t**)ray_data(obj); + return 1 + 1 + ray_serde_size(slots[0]) + ray_serde_size(slots[1]); + } + case RAY_DICT: { + /* type + attrs + keys(recursive) + vals(recursive) */ + ray_t** slots = (ray_t**)ray_data(obj); + return 1 + 1 + ray_serde_size(slots[0]) + ray_serde_size(slots[1]); + } + case RAY_LAMBDA: { + ray_t** slots = (ray_t**)ray_data(obj); + return 1 + 1 + ray_serde_size(slots[0]) + ray_serde_size(slots[1]); + } + case RAY_UNARY: + case RAY_BINARY: + case RAY_VARY: { + /* Serialize by name (null-terminated string in nullmap) */ + const char* name = ray_fn_name(obj); + size_t nlen = strlen(name); if (nlen > 15) nlen = 15; + return 1 + (int64_t)nlen + 1; /* type + name + null terminator */ + } + case RAY_ERROR: + return 1 + 8; /* sdata */ + default: + return 0; + } +} + +/* -------------------------------------------------------------------------- + * ray_ser_raw — serialize into buffer, returns bytes written + * -------------------------------------------------------------------------- */ + +int64_t ray_ser_raw(uint8_t* buf, ray_t* obj) { + if (!obj) { + buf[0] = RAY_SERDE_NULL; + return 1; + } + if (RAY_IS_ERR(obj)) { + buf[0] = (uint8_t)RAY_ERROR; + memcpy(buf + 1, obj->sdata, 7); + buf[8] = 0; + return 1 + 8; + } + + int8_t type = obj->type; + buf[0] = (uint8_t)type; + buf++; + + /* Atoms — format: type(1) + flags(1) + value-bytes. `flags` bit 0 + * carries the typed-null marker (nullmap[0] & 1 on the source atom) + * so (de (ser 0Nl)) roundtrips instead of decoding as plain 0. */ + if (type < 0) { + uint8_t aflags = (uint8_t)(obj->nullmap[0] & 1); + buf[0] = aflags; + buf++; + int8_t base = -type; + switch (base) { + case RAY_BOOL: + case RAY_U8: + buf[0] = obj->u8; + return 1 + 1 + 1; + case RAY_I16: + memcpy(buf, &obj->i16, 2); + return 1 + 1 + 2; + case RAY_I32: + case RAY_DATE: + case RAY_TIME: + memcpy(buf, &obj->i32, 4); + return 1 + 1 + 4; + case RAY_F32: + memcpy(buf, &obj->i32, 4); /* same 4-byte slot */ + return 1 + 1 + 4; + case RAY_I64: + case RAY_TIMESTAMP: + memcpy(buf, &obj->i64, 8); + return 1 + 1 + 8; + case RAY_F64: + memcpy(buf, &obj->f64, 8); + return 1 + 1 + 8; + case RAY_GUID: { + /* GUID atom stored via obj pointer to 16-byte data */ + ray_t* gv = obj->obj; + if (gv) memcpy(buf, ray_data(gv), 16); + else memset(buf, 0, 16); + return 1 + 1 + 16; + } + case RAY_SYM: { + ray_t* s = ray_sym_str(obj->i64); + if (s) { + size_t slen = ray_str_len(s); + memcpy(buf, ray_str_ptr(s), slen); + buf[slen] = '\0'; + return 1 + 1 + (int64_t)slen + 1; + } + buf[0] = '\0'; + return 1 + 1 + 1; + } + case RAY_STR: { + size_t slen = ray_str_len(obj); + const char* p = ray_str_ptr(obj); + if (!p) { p = ""; slen = 0; } + int64_t n = (int64_t)slen; + memcpy(buf, &n, 8); + memcpy(buf + 8, p, slen); + return 1 + 1 + 8 + (int64_t)slen; + } + default: return 0; + } + } + + /* Vectors and compound types */ + int64_t c; + + /* Attrs byte: preserve HAS_NULLS, clear SLICE/NULLMAP_EXT/ARENA (internal flags) */ + uint8_t wire_attrs = obj->attrs & (RAY_ATTR_HAS_NULLS); + + switch (type) { + case RAY_BOOL: + case RAY_U8: { + buf[0] = wire_attrs; buf++; + memcpy(buf, &obj->len, 8); buf += 8; + memcpy(buf, ray_data(obj), obj->len); + c = 1 + 1 + 8 + obj->len; + c += ser_null_bitmap(buf + obj->len, obj); + return c; + } + case RAY_I16: { + buf[0] = wire_attrs; buf++; + memcpy(buf, &obj->len, 8); buf += 8; + int64_t dsz = obj->len * 2; + memcpy(buf, ray_data(obj), dsz); + c = 1 + 1 + 8 + dsz; + c += ser_null_bitmap(buf + dsz, obj); + return c; + } + case RAY_I32: + case RAY_DATE: + case RAY_TIME: + case RAY_F32: { + buf[0] = wire_attrs; buf++; + memcpy(buf, &obj->len, 8); buf += 8; + int64_t dsz = obj->len * 4; + memcpy(buf, ray_data(obj), dsz); + c = 1 + 1 + 8 + dsz; + c += ser_null_bitmap(buf + dsz, obj); + return c; + } + case RAY_I64: + case RAY_TIMESTAMP: + case RAY_F64: { + buf[0] = wire_attrs; buf++; + memcpy(buf, &obj->len, 8); buf += 8; + int64_t dsz = obj->len * 8; + memcpy(buf, ray_data(obj), dsz); + c = 1 + 1 + 8 + dsz; + c += ser_null_bitmap(buf + dsz, obj); + return c; + } + case RAY_GUID: { + buf[0] = wire_attrs; buf++; + memcpy(buf, &obj->len, 8); buf += 8; + int64_t dsz = obj->len * 16; + memcpy(buf, ray_data(obj), dsz); + c = 1 + 1 + 8 + dsz; + c += ser_null_bitmap(buf + dsz, obj); + return c; + } + case RAY_SYM: { + buf[0] = wire_attrs; buf++; + memcpy(buf, &obj->len, 8); buf += 8; + int64_t* ids = (int64_t*)ray_data(obj); + c = 0; + for (int64_t i = 0; i < obj->len; i++) { + ray_t* s = ray_sym_str(ids[i]); + if (s) { + size_t slen = ray_str_len(s); + memcpy(buf + c, ray_str_ptr(s), slen); + c += (int64_t)slen; + } + buf[c] = '\0'; + c++; + } + c += ser_null_bitmap(buf + c, obj); + return 1 + 1 + 8 + c; + } + + case RAY_STR: { + buf[0] = wire_attrs; buf++; + memcpy(buf, &obj->len, 8); buf += 8; + ray_str_t* elems = (ray_str_t*)ray_data(obj); + const char* pool = obj->str_pool ? (const char*)ray_data(obj->str_pool) : NULL; + c = 0; + for (int64_t i = 0; i < obj->len; i++) { + int64_t slen = (int64_t)elems[i].len; + memcpy(buf + c, &slen, 8); + c += 8; + const char* p = ray_str_t_ptr(&elems[i], pool); + memcpy(buf + c, p, (size_t)slen); + c += slen; + } + c += ser_null_bitmap(buf + c, obj); + return 1 + 1 + 8 + c; + } + + case RAY_LIST: { + buf[0] = obj->attrs; + buf++; + memcpy(buf, &obj->len, 8); + buf += 8; + ray_t** elems = (ray_t**)ray_data(obj); + c = 0; + for (int64_t i = 0; i < obj->len; i++) + c += ray_ser_raw(buf + c, elems[i]); + return 1 + 1 + 8 + c; + } + + case RAY_TABLE: { + /* Layout: type + attrs + schema(recursive) + cols(recursive RAY_LIST) */ + buf[0] = obj->attrs; + buf++; + ray_t** slots = (ray_t**)ray_data(obj); + c = ray_ser_raw(buf, slots[0]); /* schema (RAY_I64 vector) */ + c += ray_ser_raw(buf + c, slots[1]); /* cols (RAY_LIST) */ + return 1 + 1 + c; + } + + case RAY_DICT: { + buf[0] = obj->attrs; + buf++; + ray_t** slots = (ray_t**)ray_data(obj); + c = ray_ser_raw(buf, slots[0]); + c += ray_ser_raw(buf + c, slots[1]); + return 1 + 1 + c; + } + + case RAY_LAMBDA: { + buf[0] = obj->attrs; + buf++; + ray_t** slots = (ray_t**)ray_data(obj); + c = ray_ser_raw(buf, slots[0]); /* params */ + c += ray_ser_raw(buf + c, slots[1]); /* body */ + return 1 + 1 + c; + } + + case RAY_UNARY: + case RAY_BINARY: + case RAY_VARY: { + /* Serialize builtin by name (null-terminated) */ + const char* name = ray_fn_name(obj); + size_t nlen = strlen(name); if (nlen > 15) nlen = 15; + memcpy(buf, name, nlen); + buf[nlen] = 0; + return 1 + (int64_t)nlen + 1; + } + + case RAY_ERROR: + memcpy(buf, obj->sdata, 7); + buf[7] = 0; + return 1 + 8; + + default: + return 0; + } +} + +/* -------------------------------------------------------------------------- + * ray_de_raw — deserialize from buffer + * -------------------------------------------------------------------------- */ + +ray_t* ray_de_raw(uint8_t* buf, int64_t* len) { + if (*len < 1) return NULL; + + int8_t type = (int8_t)buf[0]; + buf++; + (*len)--; + + /* Null */ + if ((uint8_t)type == RAY_SERDE_NULL) return NULL; + + /* Atoms — read 1-byte flags (typed-null bit) before the value. If + * the null bit is set we always return ray_typed_null(type) regardless + * of the value bytes, which are still read/skipped to keep the buffer + * position in sync with the serialized length. */ + if (type < 0) { + if (*len < 1) return ray_error("domain", NULL); + uint8_t aflags = buf[0]; + buf++; (*len)--; + bool is_null = (aflags & 1) != 0; + int8_t base = -type; + switch (base) { + case RAY_BOOL: + if (*len < 1) return ray_error("domain", NULL); + (*len)--; + return is_null ? ray_typed_null(type) : ray_bool(buf[0]); + case RAY_U8: + if (*len < 1) return ray_error("domain", NULL); + (*len)--; + return is_null ? ray_typed_null(type) : ray_u8(buf[0]); + case RAY_I16: + if (*len < 2) return ray_error("domain", NULL); + { int16_t v; memcpy(&v, buf, 2); *len -= 2; + return is_null ? ray_typed_null(type) : ray_i16(v); } + case RAY_I32: + if (*len < 4) return ray_error("domain", NULL); + { int32_t v; memcpy(&v, buf, 4); *len -= 4; + return is_null ? ray_typed_null(type) : ray_i32(v); } + case RAY_DATE: + if (*len < 4) return ray_error("domain", NULL); + { int32_t v; memcpy(&v, buf, 4); *len -= 4; + return is_null ? ray_typed_null(type) : ray_date((int64_t)v); } + case RAY_TIME: + if (*len < 4) return ray_error("domain", NULL); + { int32_t v; memcpy(&v, buf, 4); *len -= 4; + return is_null ? ray_typed_null(type) : ray_time((int64_t)v); } + case RAY_F32: + if (*len < 4) return ray_error("domain", NULL); + { float v; memcpy(&v, buf, 4); *len -= 4; + return is_null ? ray_typed_null(-RAY_F64) + : ray_f64((double)v); /* promote to f64 atom */ } + case RAY_I64: + if (*len < 8) return ray_error("domain", NULL); + { int64_t v; memcpy(&v, buf, 8); *len -= 8; + return is_null ? ray_typed_null(type) : ray_i64(v); } + case RAY_TIMESTAMP: + if (*len < 8) return ray_error("domain", NULL); + { int64_t v; memcpy(&v, buf, 8); *len -= 8; + return is_null ? ray_typed_null(type) : ray_timestamp(v); } + case RAY_F64: + if (*len < 8) return ray_error("domain", NULL); + { double v; memcpy(&v, buf, 8); *len -= 8; + return is_null ? ray_typed_null(type) : ray_f64(v); } + case RAY_GUID: + if (*len < 16) return ray_error("domain", NULL); + *len -= 16; + return is_null ? ray_typed_null(type) : ray_guid(buf); + case RAY_SYM: { + size_t slen = safe_strlen(buf, *len); + if ((int64_t)slen >= *len) return ray_error("domain", NULL); + *len -= (int64_t)slen + 1; + if (is_null) return ray_typed_null(type); + int64_t id = ray_sym_intern((const char*)buf, slen); + return ray_sym(id); + } + case RAY_STR: { + if (*len < 8) return ray_error("domain", NULL); + int64_t slen; memcpy(&slen, buf, 8); + buf += 8; *len -= 8; + if (*len < slen || slen < 0) return ray_error("domain", NULL); + *len -= slen; + if (is_null) return ray_typed_null(type); + return ray_str((const char*)buf, (size_t)slen); + } + default: + return ray_error("type", NULL); + } + } + + /* Vectors and compounds */ + int64_t l; + + switch (type) { + case RAY_BOOL: + case RAY_U8: + case RAY_I16: + case RAY_I32: + case RAY_DATE: + case RAY_TIME: + case RAY_F32: + case RAY_I64: + case RAY_TIMESTAMP: + case RAY_F64: + case RAY_GUID: { + if (*len < 9) return ray_error("domain", NULL); + uint8_t attrs = buf[0]; + buf++; + memcpy(&l, buf, 8); + buf += 8; + *len -= 9; + + if (l < 0 || l > 1000000000) return ray_error("domain", NULL); + + uint8_t esz = ray_type_sizes[type]; + int64_t data_bytes = l * esz; + if (*len < data_bytes) return ray_error("domain", NULL); + + ray_t* vec = ray_vec_from_raw(type, buf, l); + if (!vec || RAY_IS_ERR(vec)) return vec; + buf += data_bytes; + *len -= data_bytes; + + /* Restore null bitmap if present */ + if (attrs & RAY_ATTR_HAS_NULLS) { + int64_t consumed = de_null_bitmap(buf, *len, vec); + if (consumed < 0) { ray_release(vec); return ray_error("domain", NULL); } + buf += consumed; + *len -= consumed; + } + return vec; + } + + case RAY_SYM: { + if (*len < 9) return ray_error("domain", NULL); + uint8_t attrs = buf[0]; + buf++; + memcpy(&l, buf, 8); + buf += 8; + *len -= 9; + + if (l < 0 || l > 1000000000) return ray_error("domain", NULL); + + ray_t* vec = ray_vec_new(RAY_SYM, l); + if (!vec || RAY_IS_ERR(vec)) return vec; + vec->len = l; + int64_t* ids = (int64_t*)ray_data(vec); + for (int64_t i = 0; i < l; i++) { + size_t slen = safe_strlen(buf, *len); + if ((int64_t)slen >= *len) { + vec->len = i; + ray_release(vec); + return ray_error("domain", NULL); + } + ids[i] = ray_sym_intern((const char*)buf, slen); + buf += slen + 1; + *len -= (int64_t)slen + 1; + } + + if (attrs & RAY_ATTR_HAS_NULLS) { + int64_t consumed = de_null_bitmap(buf, *len, vec); + if (consumed < 0) { ray_release(vec); return ray_error("domain", NULL); } + buf += consumed; + *len -= consumed; + } + return vec; + } + + case RAY_STR: { + if (*len < 9) return ray_error("domain", NULL); + uint8_t attrs = buf[0]; + buf++; + memcpy(&l, buf, 8); + buf += 8; + *len -= 9; + + if (l < 0 || l > 1000000000) return ray_error("domain", NULL); + + /* Build STR vector by appending each string via ray_str_vec_append */ + ray_t* vec = ray_vec_new(RAY_STR, l); + if (!vec || RAY_IS_ERR(vec)) return vec; + vec->len = 0; + for (int64_t i = 0; i < l; i++) { + if (*len < 8) { ray_release(vec); return ray_error("domain", NULL); } + int64_t slen; memcpy(&slen, buf, 8); + buf += 8; *len -= 8; + if (*len < slen || slen < 0) { ray_release(vec); return ray_error("domain", NULL); } + ray_t* nv = ray_str_vec_append(vec, (const char*)buf, (size_t)slen); + if (!nv || RAY_IS_ERR(nv)) { ray_release(vec); return nv ? nv : ray_error("oom", NULL); } + vec = nv; + buf += slen; + *len -= slen; + } + + if (attrs & RAY_ATTR_HAS_NULLS) { + int64_t consumed = de_null_bitmap(buf, *len, vec); + if (consumed < 0) { ray_release(vec); return ray_error("domain", NULL); } + buf += consumed; + *len -= consumed; + } + return vec; + } + + case RAY_LIST: { + if (*len < 9) return ray_error("domain", NULL); + uint8_t list_attrs = buf[0]; + buf++; + memcpy(&l, buf, 8); + buf += 8; + *len -= 9; + + if (l < 0 || l > 1000000000) return ray_error("domain", NULL); + + ray_t* list = ray_alloc(l * sizeof(ray_t*)); + if (!list || RAY_IS_ERR(list)) return list; + list->type = RAY_LIST; + list->attrs = list_attrs; + list->len = l; + ray_t** elems = (ray_t**)ray_data(list); + + int64_t saved = *len; + for (int64_t i = 0; i < l; i++) { + elems[i] = ray_de_raw(buf + (saved - *len), len); + if (!elems[i] || RAY_IS_ERR(elems[i])) { + /* Clean up already-deserialized elements */ + for (int64_t j = 0; j < i; j++) ray_release(elems[j]); + list->len = 0; + ray_release(list); + return elems[i] ? elems[i] : ray_error("domain", NULL); + } + } + return list; + } + + case RAY_TABLE: { + if (*len < 1) return ray_error("domain", NULL); + /* uint8_t tbl_attrs = buf[0]; — tables rebuild attrs via ray_table_add_col */ + buf++; + *len -= 1; + + int64_t saved = *len; + /* Deserialize schema (I64 vector of sym IDs) */ + ray_t* schema = ray_de_raw(buf, len); + if (!schema || RAY_IS_ERR(schema)) return schema; + + /* Deserialize columns (as LIST) */ + ray_t* cols = ray_de_raw(buf + (saved - *len), len); + if (!cols || RAY_IS_ERR(cols)) { + ray_release(schema); + return cols; + } + + /* Reconstruct table */ + if (cols->type != RAY_LIST || schema->type != RAY_I64) { + ray_release(schema); + ray_release(cols); + return ray_error("domain", NULL); + } + + int64_t ncols = cols->len; + ray_t* tbl = ray_table_new(ncols); + if (!tbl || RAY_IS_ERR(tbl)) { + ray_release(schema); + ray_release(cols); + return tbl; + } + + int64_t* name_ids = (int64_t*)ray_data(schema); + ray_t** col_ptrs = (ray_t**)ray_data(cols); + for (int64_t i = 0; i < ncols && i < schema->len; i++) { + ray_t* new_tbl = ray_table_add_col(tbl, name_ids[i], col_ptrs[i]); + if (!new_tbl || RAY_IS_ERR(new_tbl)) { + ray_release(tbl); + ray_release(schema); + ray_release(cols); + return new_tbl; + } + tbl = new_tbl; + } + + ray_release(schema); + ray_release(cols); + return tbl; + } + + case RAY_DICT: { + if (*len < 1) return ray_error("domain", NULL); + uint8_t dict_attrs = buf[0]; + buf++; + *len -= 1; + + int64_t saved = *len; + ray_t* keys = ray_de_raw(buf, len); + if (!keys || RAY_IS_ERR(keys)) return keys; + + ray_t* vals = ray_de_raw(buf + (saved - *len), len); + if (!vals || RAY_IS_ERR(vals)) { + ray_release(keys); + return vals; + } + + /* Build dict: alloc with 2 slots */ + ray_t* dict = ray_alloc(2 * sizeof(ray_t*)); + if (!dict || RAY_IS_ERR(dict)) { + ray_release(keys); + ray_release(vals); + return dict; + } + dict->type = RAY_DICT; + dict->attrs = dict_attrs; + dict->len = 2; + ((ray_t**)ray_data(dict))[0] = keys; + ((ray_t**)ray_data(dict))[1] = vals; + return dict; + } + + case RAY_LAMBDA: { + if (*len < 1) return ray_error("domain", NULL); + uint8_t lam_attrs = buf[0]; + buf++; + *len -= 1; + + int64_t saved = *len; + ray_t* params = ray_de_raw(buf, len); + if (!params || RAY_IS_ERR(params)) return params; + + ray_t* body = ray_de_raw(buf + (saved - *len), len); + if (!body || RAY_IS_ERR(body)) { + ray_release(params); + return body; + } + + /* Build lambda: allocate with 7 slots (same as eval.c) */ + ray_t* lambda = ray_alloc(7 * sizeof(ray_t*)); + if (!lambda || RAY_IS_ERR(lambda)) { + ray_release(params); + ray_release(body); + return lambda; + } + lambda->type = RAY_LAMBDA; + lambda->attrs = lam_attrs; + lambda->len = 0; + memset(ray_data(lambda), 0, 7 * sizeof(ray_t*)); + ((ray_t**)ray_data(lambda))[0] = params; + ((ray_t**)ray_data(lambda))[1] = body; + return lambda; + } + + case RAY_UNARY: + case RAY_BINARY: + case RAY_VARY: { + /* Deserialize builtin by name: read null-terminated string, + * look up in the global environment. */ + size_t nlen = safe_strlen(buf, *len); + if ((int64_t)nlen >= *len) return ray_error("domain", NULL); + int64_t sym = ray_sym_intern((const char*)buf, nlen); + *len -= (int64_t)nlen + 1; + ray_t* fn = ray_env_get(sym); + if (!fn) return ray_error("name", NULL); + ray_retain(fn); + return fn; + } + + case RAY_ERROR: { + if (*len < 8) return ray_error("domain", NULL); + ray_t* err = ray_error((const char*)buf, NULL); + *len -= 8; + return err; + } + + default: + return ray_error("type", NULL); + } +} + +/* -------------------------------------------------------------------------- + * ray_ser — top-level: serialize with IPC header + * -------------------------------------------------------------------------- */ + +ray_t* ray_ser(ray_t* obj) { + int64_t payload = ray_serde_size(obj); + if (payload <= 0) return ray_error("domain", payload < 0 ? "serialization overflow" : NULL); + + int64_t total = (int64_t)sizeof(ray_ipc_header_t) + payload; + ray_t* buf = ray_vec_new(RAY_U8, total); + if (!buf || RAY_IS_ERR(buf)) return buf; + buf->len = total; + + ray_ipc_header_t* hdr = (ray_ipc_header_t*)ray_data(buf); + hdr->prefix = RAY_SERDE_PREFIX; + hdr->version = RAY_SERDE_WIRE_VERSION; + hdr->flags = 0; + hdr->endian = 0; + hdr->msgtype = 0; + hdr->size = payload; + + int64_t written = ray_ser_raw((uint8_t*)ray_data(buf) + sizeof(ray_ipc_header_t), obj); + if (written == 0) { + ray_release(buf); + return ray_error("domain", NULL); + } + + return buf; +} + +/* -------------------------------------------------------------------------- + * ray_de — top-level: deserialize from U8 vector + * -------------------------------------------------------------------------- */ + +ray_t* ray_de(ray_t* bytes) { + if (!bytes || RAY_IS_ERR(bytes)) return ray_error("type", NULL); + if (bytes->type != RAY_U8 && bytes->type != -RAY_U8) + return ray_error("type", NULL); + + int64_t total = bytes->len; + uint8_t* buf = (uint8_t*)ray_data(bytes); + + if (total < (int64_t)sizeof(ray_ipc_header_t)) + return ray_error("domain", NULL); + + ray_ipc_header_t* hdr = (ray_ipc_header_t*)buf; + if (hdr->prefix != RAY_SERDE_PREFIX) + return ray_error("domain", NULL); + if (hdr->version != RAY_SERDE_WIRE_VERSION) + return ray_error("version", "serde wire version mismatch"); + if (hdr->size < 0 || hdr->size > 1000000000) + return ray_error("domain", NULL); + if (hdr->size + (int64_t)sizeof(ray_ipc_header_t) != total) + return ray_error("domain", NULL); + + int64_t len = hdr->size; + return ray_de_raw(buf + sizeof(ray_ipc_header_t), &len); +} + +/* -------------------------------------------------------------------------- + * File I/O: save/load any object in binary format + * -------------------------------------------------------------------------- */ + +ray_err_t ray_obj_save(ray_t* obj, const char* path) { + ray_t* bytes = ray_ser(obj); + if (!bytes || RAY_IS_ERR(bytes)) { + if (bytes && RAY_IS_ERR(bytes)) ray_error_free(bytes); + return RAY_ERR_DOMAIN; + } + + FILE* f = fopen(path, "wb"); + if (!f) { ray_release(bytes); return RAY_ERR_IO; } + + size_t total = (size_t)bytes->len; + size_t n = fwrite(ray_data(bytes), 1, total, f); + if (n != total) { + fclose(f); ray_release(bytes); + return RAY_ERR_IO; + } + + /* Durability: fflush + fsync BEFORE fclose so a buffered write + * hitting ENOSPC inside fclose doesn't slip through silently. + * Callers (esp. ray_journal_snapshot) write to a .tmp then rename + * — without this fsync the .tmp may be empty/partial on disk + * when the rename atomically swaps it in. */ + if (fflush(f) != 0) { + fclose(f); ray_release(bytes); + return RAY_ERR_IO; + } +#ifndef RAY_OS_WINDOWS + if (fsync(fileno(f)) != 0) { + fclose(f); ray_release(bytes); + return RAY_ERR_IO; + } +#endif + /* fclose itself can fail (final flush of any platform-level + * buffer). Check it. */ + int close_rc = fclose(f); + ray_release(bytes); + return close_rc == 0 ? RAY_OK : RAY_ERR_IO; +} + +ray_t* ray_obj_load(const char* path) { + FILE* f = fopen(path, "rb"); + if (!f) return ray_error("io", NULL); + + /* Check fseek/ftell return values — silent failures here let a + * truncated read through as "valid empty file" or worse. */ + if (fseek(f, 0, SEEK_END) != 0) { fclose(f); return ray_error("io", "fseek end"); } + long sz = ftell(f); + if (sz < 0) { fclose(f); return ray_error("io", "ftell"); } + if (fseek(f, 0, SEEK_SET) != 0) { fclose(f); return ray_error("io", "fseek set"); } + + if (sz == 0) { fclose(f); return ray_error("io", "empty file"); } + + ray_t* buf = ray_vec_new(RAY_U8, sz); + if (!buf || RAY_IS_ERR(buf)) { fclose(f); return buf; } + buf->len = sz; + + size_t n = fread(ray_data(buf), 1, (size_t)sz, f); + fclose(f); + + if ((long)n != sz) { ray_release(buf); return ray_error("io", "short read"); } + + ray_t* result = ray_de(buf); + ray_release(buf); + return result; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/store/serde.h b/crates/rayforce-sys/vendor/rayforce/src/store/serde.h new file mode 100644 index 0000000..d0d6a11 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/store/serde.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_SERDE_H +#define RAY_SERDE_H + +#include + +/* Wire format prefix */ +#define RAY_SERDE_PREFIX 0xcefadefa + +/* Wire format version. Bumped whenever the on-the-wire layout of any + * serialized value changes (e.g. a new field is added to the atom + * record) so a peer running older code detects the mismatch and + * rejects the payload instead of silently mis-parsing. Decoupled from + * RAY_VERSION_MAJOR on purpose: API version and wire version evolve + * independently. + * + * Version 2 — atoms: type(1) + value-bytes. + * Version 3 — atoms: type(1) + flags(1) + value-bytes. `flags` bit 0 + * carries the typed-null marker so (de (ser 0Nl)) round- + * trips (previously decoded as ray_i64(0) and dropped the + * null bit). */ +#define RAY_SERDE_WIRE_VERSION 3 + +/* Wire-only null marker (not a valid ray_t type) */ +#define RAY_SERDE_NULL 126 + +typedef struct ray_ipc_header_t { + uint32_t prefix; /* RAY_SERDE_PREFIX */ + uint8_t version; /* RAY_VERSION_MAJOR */ + uint8_t flags; /* 0 */ + uint8_t endian; /* 0 = little */ + uint8_t msgtype; /* 0 = async, 1 = sync, 2 = response */ + int64_t size; /* payload size in bytes */ +} ray_ipc_header_t; + +_Static_assert(sizeof(ray_ipc_header_t) == 16, "ipc header must be 16 bytes"); + +/* Calculate serialized size of an object (excluding IPC header) */ +int64_t ray_serde_size(ray_t* obj); + +/* Serialize object into buffer. Returns bytes written, 0 on error. + * Buffer must have at least ray_serde_size(obj) bytes. */ +int64_t ray_ser_raw(uint8_t* buf, ray_t* obj); + +/* Deserialize object from buffer. Returns reconstructed ray_t*. + * *len is updated to reflect bytes consumed. */ +ray_t* ray_de_raw(uint8_t* buf, int64_t* len); + +/* Top-level: serialize to U8 vector with IPC header */ +ray_t* ray_ser(ray_t* obj); + +/* Top-level: deserialize from U8 vector (validates IPC header) */ +ray_t* ray_de(ray_t* bytes); + +/* File I/O: save/load any object in binary format */ +ray_err_t ray_obj_save(ray_t* obj, const char* path); +ray_t* ray_obj_load(const char* path); + +#endif /* RAY_SERDE_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/store/splay.c b/crates/rayforce-sys/vendor/rayforce/src/store/splay.c new file mode 100644 index 0000000..32ce082 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/store/splay.c @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "splay.h" +#include "store/col.h" +#include "store/fileio.h" +#include +#include + +/* -------------------------------------------------------------------------- + * Splayed table: directory of column files + .d schema file + * + * Format: + * dir/.d — I64 vector of column name symbol IDs + * dir/ — column file per column + * + * No symlink check: local-trust file format; path traversal checks + * (rejecting '/', '\\', '..', leading '.') cover main attack vector. + * -------------------------------------------------------------------------- */ + +/* Post-load validation: reject if sym table is empty but table has RAY_SYM + * columns, or if schema expected columns but none could be loaded. */ +static ray_err_t validate_sym_columns(ray_t* tbl, int64_t schema_ncols) { + if (ray_sym_count() != 0) return RAY_OK; + + int64_t nc = ray_table_ncols(tbl); + if (schema_ncols > 0 && nc == 0) return RAY_ERR_CORRUPT; + + for (int64_t c = 0; c < nc; c++) { + ray_t* col = ray_table_get_col_idx(tbl, c); + if (col && col->type == RAY_SYM) return RAY_ERR_CORRUPT; + } + return RAY_OK; +} + +/* -------------------------------------------------------------------------- + * ray_splay_save — save a table to a splayed table directory + * -------------------------------------------------------------------------- */ + +ray_err_t ray_splay_save(ray_t* tbl, const char* dir, const char* sym_path) { + if (!tbl || RAY_IS_ERR(tbl)) return RAY_ERR_TYPE; + if (!dir) return RAY_ERR_IO; + + /* Create directory and any missing parents (mkdir -p semantics). + * Required for partitioned layouts like "/db/2024.01.01/t/" where the + * caller hasn't pre-created the date partition. */ + ray_err_t mkdir_err = ray_mkdir_p(dir); + if (mkdir_err != RAY_OK) return mkdir_err; + + /* Save symbol table if sym_path provided */ + if (sym_path) { + ray_err_t sym_err = ray_sym_save(sym_path); + if (sym_err != RAY_OK) return sym_err; + } + + int64_t ncols = ray_table_ncols(tbl); + + /* Save .d schema file */ + ray_t* schema = ray_table_schema(tbl); + if (schema) { + char path[1024]; + int path_len = snprintf(path, sizeof(path), "%s/.d", dir); + if (path_len < 0 || (size_t)path_len >= sizeof(path)) return RAY_ERR_RANGE; + ray_err_t err = ray_col_save(schema, path); + if (err != RAY_OK) return err; + } + + /* Save each column */ + for (int64_t c = 0; c < ncols; c++) { + ray_t* col = ray_table_get_col_idx(tbl, c); + int64_t name_id = ray_table_col_name(tbl, c); + if (!col) continue; + + /* Get column name string */ + ray_t* name_atom = ray_sym_str(name_id); + if (!name_atom) continue; + + const char* name = ray_str_ptr(name_atom); + size_t name_len = ray_str_len(name_atom); + + /* Reject names with path separators, traversal, or starting with '.' */ + if (name_len == 0 || name[0] == '.' || + memchr(name, '/', name_len) || memchr(name, '\\', name_len) || + memchr(name, '\0', name_len)) + continue; + + char path[1024]; + int path_len = snprintf(path, sizeof(path), "%s/%.*s", dir, (int)name_len, name); + if (path_len < 0 || (size_t)path_len >= sizeof(path)) return RAY_ERR_RANGE; + + ray_err_t err = ray_col_save(col, path); + /* On partial failure, columns 0..c-1 remain on disk. + * Caller should clean up or use atomic rename for safe writes. */ + if (err != RAY_OK) return err; + } + + return RAY_OK; +} + +/* -------------------------------------------------------------------------- + * splay_load_impl — shared implementation for ray_splay_load / ray_read_splayed + * + * When use_mmap is false, columns are loaded via ray_col_load (buddy copy). + * When use_mmap is true, columns are loaded via ray_col_mmap (zero-copy). + * The .d schema is always loaded via ray_col_load (small, buddy copy). + * -------------------------------------------------------------------------- */ + +static ray_t* splay_load_impl(const char* dir, const char* sym_path, bool use_mmap) { + if (!dir) return ray_error("io", NULL); + + /* Load symbol table if sym_path provided */ + if (sym_path) { + ray_err_t sym_err = ray_sym_load(sym_path); + if (sym_err != RAY_OK) return ray_error(ray_err_code_str(sym_err), NULL); + } + + /* Load .d schema */ + char path[1024]; + int path_len = snprintf(path, sizeof(path), "%s/.d", dir); + if (path_len < 0 || (size_t)path_len >= sizeof(path)) + return ray_error("range", NULL); + ray_t* schema = ray_col_load(path); + if (!schema || RAY_IS_ERR(schema)) return schema; + + int64_t ncols = schema->len; + int64_t* name_ids = (int64_t*)ray_data(schema); + + ray_t* tbl = ray_table_new(ncols); + if (!tbl || RAY_IS_ERR(tbl)) { + ray_release(schema); + return tbl; + } + + /* Load each column */ + for (int64_t c = 0; c < ncols; c++) { + int64_t name_id = name_ids[c]; + ray_t* name_atom = ray_sym_str(name_id); + if (!name_atom) { + /* Schema references a sym ID that doesn't exist — sym table + * is stale or wrong for this data. */ + ray_release(schema); + ray_release(tbl); + return ray_error("corrupt", NULL); + } + + const char* name = ray_str_ptr(name_atom); + size_t name_len = ray_str_len(name_atom); + + /* Reject names with path separators, traversal, or starting with '.' + * — these indicate a stale/wrong sym file, not a column to skip. */ + if (name_len == 0 || name[0] == '.' || + memchr(name, '/', name_len) || memchr(name, '\\', name_len) || + memchr(name, '\0', name_len)) { + ray_release(schema); + ray_release(tbl); + return ray_error("corrupt", NULL); + } + + path_len = snprintf(path, sizeof(path), "%s/%.*s", dir, (int)name_len, name); + if (path_len < 0 || (size_t)path_len >= sizeof(path)) { + ray_release(schema); + ray_release(tbl); + return ray_error("range", NULL); + } + + ray_t* col = use_mmap ? ray_col_mmap(path) : ray_col_load(path); + if (use_mmap && col && RAY_IS_ERR(col) && + strcmp(ray_err_code(col), "nyi") == 0) { + /* ray_release on an error object is a no-op (rayforce.h:180); + * must use ray_error_free to actually reclaim the error + * before retrying with the non-mmap loader. */ + ray_error_free(col); + col = ray_col_load(path); + } + if (!col || RAY_IS_ERR(col)) { + ray_release(schema); + ray_release(tbl); + return col ? col : ray_error("io", NULL); + } + + ray_t* new_df = ray_table_add_col(tbl, name_id, col); + if (!new_df || RAY_IS_ERR(new_df)) { + ray_release(col); + ray_release(schema); + ray_release(tbl); + return new_df ? new_df : ray_error("oom", NULL); + } + ray_release(col); /* table_add_col retains; drop our ref */ + tbl = new_df; + } + + ray_release(schema); + + ray_err_t sym_check = validate_sym_columns(tbl, ncols); + if (sym_check != RAY_OK) { + ray_release(tbl); + return ray_error(ray_err_code_str(sym_check), NULL); + } + + return tbl; +} + +ray_t* ray_splay_load(const char* dir, const char* sym_path) { + return splay_load_impl(dir, sym_path, false); +} + +ray_t* ray_read_splayed(const char* dir, const char* sym_path) { + return splay_load_impl(dir, sym_path, true); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/store/splay.h b/crates/rayforce-sys/vendor/rayforce/src/store/splay.h new file mode 100644 index 0000000..8648bf1 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/store/splay.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_SPLAY_H +#define RAY_SPLAY_H + +#include + +/* Splayed table I/O */ +ray_err_t ray_splay_save(ray_t* tbl, const char* dir, const char* sym_path); +ray_t* ray_splay_load(const char* dir, const char* sym_path); +ray_t* ray_read_splayed(const char* dir, const char* sym_path); + +#endif /* RAY_SPLAY_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/table/dict.c b/crates/rayforce-sys/vendor/rayforce/src/table/dict.c new file mode 100644 index 0000000..9d58412 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/table/dict.c @@ -0,0 +1,609 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "dict.h" +#include "table.h" +#include "table/sym.h" +#include "lang/internal.h" /* atom_eq for RAY_LIST key compares */ +#include + +/* -------------------------------------------------------------------------- + * Layout + * + * Block header (32B) | slot[0] = keys (ray_t*) | slot[1] = vals (ray_t*) + * + * d->type = RAY_DICT + * d->len = 2 (slot count, kept consistent with table block convention) + * keys: any vector type; pair count = keys->len + * vals: typed vector when homogeneous, RAY_LIST otherwise + * -------------------------------------------------------------------------- */ + +#define DICT_DATA_SIZE (2 * sizeof(ray_t*)) + +static ray_t* dict_alloc_block(ray_t* keys, ray_t* vals) { + ray_t* d = ray_alloc(DICT_DATA_SIZE); + if (!d || RAY_IS_ERR(d)) return d; + d->type = RAY_DICT; + d->attrs = 0; + d->len = 2; + memset(d->nullmap, 0, 16); + ray_t** slots = ray_dict_slots(d); + slots[0] = keys; + slots[1] = vals; + return d; +} + +/* -------------------------------------------------------------------------- + * ray_dict_new — wrap two refs into a fresh RAY_DICT block. + * + * Ownership: consumes one ref each of `keys` and `vals` (transferred into + * the dict). On error, both refs are released. Returns rc=1 dict. + * -------------------------------------------------------------------------- */ + +ray_t* ray_dict_new(ray_t* keys, ray_t* vals) { + if (!keys || RAY_IS_ERR(keys)) { + if (vals && !RAY_IS_ERR(vals)) ray_release(vals); + return keys ? keys : ray_error("type", NULL); + } + if (!vals || RAY_IS_ERR(vals)) { + ray_release(keys); + return vals ? vals : ray_error("type", NULL); + } + ray_t* d = dict_alloc_block(keys, vals); + if (!d || RAY_IS_ERR(d)) { + ray_release(keys); + ray_release(vals); + return d ? d : ray_error("oom", NULL); + } + return d; +} + +/* -------------------------------------------------------------------------- + * ray_dict_keys / ray_dict_vals — borrowed pointers; do not release. + * -------------------------------------------------------------------------- */ + +ray_t* ray_dict_keys(ray_t* d) { + if (!d || RAY_IS_ERR(d) || d->type != RAY_DICT) return NULL; + return ray_dict_slots(d)[0]; +} + +ray_t* ray_dict_vals(ray_t* d) { + if (!d || RAY_IS_ERR(d) || d->type != RAY_DICT) return NULL; + return ray_dict_slots(d)[1]; +} + +int64_t ray_dict_len(ray_t* d) { + ray_t* keys = ray_dict_keys(d); + return keys ? keys->len : 0; +} + +/* -------------------------------------------------------------------------- + * ray_dict_find_sym — fast sym-only probe (no atom boxing). + * -------------------------------------------------------------------------- */ + +int64_t ray_dict_find_sym(ray_t* d, int64_t sym_id) { + if (!d || RAY_IS_ERR(d) || d->type != RAY_DICT) return -1; + ray_t* keys = ray_dict_slots(d)[0]; + if (!keys || RAY_IS_ERR(keys) || keys->type != RAY_SYM) return -1; + void* base = ray_data(keys); + int64_t n = keys->len; + uint8_t aw = keys->attrs & RAY_SYM_W_MASK; + switch (aw) { + case RAY_SYM_W8: { + const uint8_t* a = (const uint8_t*)base; + for (int64_t i = 0; i < n; i++) if ((int64_t)a[i] == sym_id) return i; + return -1; + } + case RAY_SYM_W16: { + const uint16_t* a = (const uint16_t*)base; + for (int64_t i = 0; i < n; i++) if ((int64_t)a[i] == sym_id) return i; + return -1; + } + case RAY_SYM_W32: { + const uint32_t* a = (const uint32_t*)base; + for (int64_t i = 0; i < n; i++) if ((int64_t)a[i] == sym_id) return i; + return -1; + } + default: { + const int64_t* a = (const int64_t*)base; + for (int64_t i = 0; i < n; i++) if (a[i] == sym_id) return i; + return -1; + } + } +} + +ray_t* ray_dict_probe_sym_borrowed(ray_t* d, int64_t sym_id) { + int64_t idx = ray_dict_find_sym(d, sym_id); + if (idx < 0) return NULL; + ray_t* vals = ray_dict_slots(d)[1]; + if (!vals || RAY_IS_ERR(vals) || vals->type != RAY_LIST) return NULL; + return ((ray_t**)ray_data(vals))[idx]; +} + +ray_t* ray_container_probe_sym(ray_t* v, int64_t sym_id) { + if (!v || RAY_IS_ERR(v)) return NULL; + if (v->type == RAY_DICT) return ray_dict_probe_sym_borrowed(v, sym_id); + if (v->type == RAY_TABLE) return ray_table_get_col(v, sym_id); + return NULL; +} + +/* -------------------------------------------------------------------------- + * ray_dict_find_idx — locate key index in keys vector; -1 if missing. + * + * Dispatches on keys->type; the key atom must have matching atom type + * (e.g. -RAY_SYM key for a RAY_SYM keys vector). Returns -1 on type + * mismatch rather than erroring; the caller surfaces the error. + * -------------------------------------------------------------------------- */ + +int64_t ray_dict_find_idx(ray_t* d, ray_t* key_atom) { + if (!d || RAY_IS_ERR(d) || d->type != RAY_DICT) return -1; + if (!key_atom || RAY_IS_ERR(key_atom)) return -1; + + ray_t* keys = ray_dict_slots(d)[0]; + if (!keys || RAY_IS_ERR(keys)) return -1; + int8_t kt = keys->type; + int64_t n = keys->len; + if (n <= 0) return -1; + + /* RAY_LIST keys: heterogeneous, compare via atom_eq. */ + if (kt == RAY_LIST) { + ray_t** ks = (ray_t**)ray_data(keys); + for (int64_t i = 0; i < n; i++) + if (atom_eq(ks[i], key_atom)) return i; + return -1; + } + + /* Typed-vector keys: atom type must match. */ + if (key_atom->type != -kt) return -1; + + /* Null-aware probe: a null key atom matches only null slots; a non-null + * key atom must match a non-null slot of equal value. Without this, a + * group dict containing both `0Nl` and `0` keys (now produced as + * distinct buckets by ray_group_fn) would still resolve `(at d 0Nl)` + * to the first non-null zero — re-introducing the conflation we just + * fixed in grouping. */ + bool key_is_null = RAY_ATOM_IS_NULL(key_atom); + bool keys_have_nulls = (keys->attrs & RAY_ATTR_HAS_NULLS) != 0 + || (keys->attrs & RAY_ATTR_SLICE); + if (key_is_null) { + if (!keys_have_nulls) return -1; + for (int64_t i = 0; i < n; i++) + if (ray_vec_is_null(keys, i)) return i; + return -1; + } + + void* base = ray_data(keys); +#define DICT_FIND_LOOP(EQ_EXPR) do { \ + if (keys_have_nulls) { \ + for (int64_t i = 0; i < n; i++) { \ + if (ray_vec_is_null(keys, i)) continue; \ + if (EQ_EXPR) return i; \ + } \ + } else { \ + for (int64_t i = 0; i < n; i++) \ + if (EQ_EXPR) return i; \ + } \ + return -1; \ + } while (0) + + switch (kt) { + case RAY_SYM: { + int64_t key_id = key_atom->i64; + uint8_t aw = keys->attrs; + switch (aw & RAY_SYM_W_MASK) { + case RAY_SYM_W8: { + const uint8_t* a = (const uint8_t*)base; + DICT_FIND_LOOP((int64_t)a[i] == key_id); + } + case RAY_SYM_W16: { + const uint16_t* a = (const uint16_t*)base; + DICT_FIND_LOOP((int64_t)a[i] == key_id); + } + case RAY_SYM_W32: { + const uint32_t* a = (const uint32_t*)base; + DICT_FIND_LOOP((int64_t)a[i] == key_id); + } + default: { + const int64_t* a = (const int64_t*)base; + DICT_FIND_LOOP(a[i] == key_id); + } + } + } + case RAY_I64: + case RAY_TIMESTAMP: { + const int64_t* a = (const int64_t*)base; + int64_t v = key_atom->i64; + DICT_FIND_LOOP(a[i] == v); + } + case RAY_I32: + case RAY_DATE: + case RAY_TIME: { + const int32_t* a = (const int32_t*)base; + int32_t v = key_atom->i32; + DICT_FIND_LOOP(a[i] == v); + } + case RAY_I16: { + const int16_t* a = (const int16_t*)base; + int16_t v = key_atom->i16; + DICT_FIND_LOOP(a[i] == v); + } + case RAY_BOOL: + case RAY_U8: { + const uint8_t* a = (const uint8_t*)base; + uint8_t v = key_atom->u8; + DICT_FIND_LOOP(a[i] == v); + } + case RAY_F32: { + const float* a = (const float*)base; + float v = (float)key_atom->f64; + DICT_FIND_LOOP(a[i] == v); + } + case RAY_F64: { + const double* a = (const double*)base; + double v = key_atom->f64; + DICT_FIND_LOOP(a[i] == v); + } + case RAY_STR: { + const char* kp = ray_str_ptr(key_atom); + size_t klen = ray_str_len(key_atom); + for (int64_t i = 0; i < n; i++) { + if (keys_have_nulls && ray_vec_is_null(keys, i)) continue; + size_t vlen = 0; + const char* vp = ray_str_vec_get(keys, i, &vlen); + if (!vp) continue; + if (vlen == klen && (klen == 0 || memcmp(vp, kp, klen) == 0)) + return i; + } + return -1; + } + case RAY_GUID: { + const uint8_t* a = (const uint8_t*)base; + const uint8_t* kp = key_atom->obj ? (const uint8_t*)ray_data(key_atom->obj) : NULL; + if (!kp) return -1; + for (int64_t i = 0; i < n; i++) { + if (keys_have_nulls && ray_vec_is_null(keys, i)) continue; + if (memcmp(a + i * 16, kp, 16) == 0) return i; + } + return -1; + } + default: + return -1; + } +#undef DICT_FIND_LOOP +} + +/* -------------------------------------------------------------------------- + * Internal: read element at index out of a vals container as a borrowed + * ray_t*. For RAY_LIST that's a stored pointer. For typed vectors we + * synthesize a fresh atom (rc=1) — caller owns the returned ref. + * + * `*owned_out` is set true if the caller must release the result, false if + * it's borrowed (must NOT be released by the caller). + * -------------------------------------------------------------------------- */ + +static ray_t* dict_vals_at(ray_t* vals, int64_t idx, bool* owned_out) { + *owned_out = false; + if (!vals || RAY_IS_ERR(vals)) return NULL; + if (idx < 0 || idx >= vals->len) return NULL; + + if (vals->type == RAY_LIST) { + ray_t** slots = (ray_t**)ray_data(vals); + return slots[idx]; + } + + /* Typed vector — box element into a fresh atom so the caller has a + * uniform ray_t* contract. Mark as owned so the caller releases. */ + ray_t* atom = NULL; + void* base = ray_data(vals); + switch (vals->type) { + case RAY_BOOL: atom = ray_bool(((uint8_t*)base)[idx]); break; + case RAY_U8: atom = ray_u8(((uint8_t*)base)[idx]); break; + case RAY_I16: atom = ray_i16(((int16_t*)base)[idx]); break; + case RAY_I32: atom = ray_i32(((int32_t*)base)[idx]); break; + case RAY_I64: atom = ray_i64(((int64_t*)base)[idx]); break; + case RAY_F32: atom = ray_f32(((float*)base)[idx]); break; + case RAY_F64: atom = ray_f64(((double*)base)[idx]); break; + case RAY_DATE: atom = ray_date(((int32_t*)base)[idx]); break; + case RAY_TIME: atom = ray_time(((int32_t*)base)[idx]); break; + case RAY_TIMESTAMP: atom = ray_timestamp(((int64_t*)base)[idx]); break; + case RAY_SYM: { + int64_t id = ray_read_sym(base, idx, vals->type, vals->attrs); + atom = ray_sym(id); + break; + } + case RAY_STR: { + size_t slen = 0; + const char* sp = ray_str_vec_get(vals, idx, &slen); + atom = sp ? ray_str(sp, slen) : ray_str("", 0); + break; + } + case RAY_GUID: + atom = ray_guid(((uint8_t*)base) + idx * 16); + break; + default: + return NULL; + } + if (atom && !RAY_IS_ERR(atom)) *owned_out = true; + return atom; +} + +/* -------------------------------------------------------------------------- + * ray_dict_get — return value for `key_atom`, or NULL if missing. + * + * The returned pointer is owned by the caller (rc=1) — callers must + * `ray_release` it after use. This makes the contract uniform whether + * vals is a typed vector (boxed atom) or a RAY_LIST (retained slot). + * -------------------------------------------------------------------------- */ + +ray_t* ray_dict_get(ray_t* d, ray_t* key_atom) { + int64_t i = ray_dict_find_idx(d, key_atom); + if (i < 0) return NULL; + ray_t* vals = ray_dict_slots(d)[1]; + bool owned = false; + ray_t* out = dict_vals_at(vals, i, &owned); + if (!out || RAY_IS_ERR(out)) return out; + if (!owned) ray_retain(out); + return out; +} + +/* -------------------------------------------------------------------------- + * promote_vals_to_list — return a RAY_LIST equivalent to `vals`. + * + * If `vals` is already a RAY_LIST we return it unchanged (borrowed). If + * `vals` is a typed vector we materialize each element into a fresh atom + * inside a new RAY_LIST — the caller owns the new list and must release + * it (and the original `vals` separately if it owns that). + * + * Used by upsert/remove to keep mutation paths simple regardless of the + * incoming vals shape. + * -------------------------------------------------------------------------- */ + +static ray_t* promote_vals_to_list(ray_t* vals) { + if (!vals || RAY_IS_ERR(vals)) return vals; + if (vals->type == RAY_LIST) { + ray_retain(vals); + return vals; + } + int64_t n = vals->len; + ray_t* lst = ray_list_new(n); + if (!lst || RAY_IS_ERR(lst)) return lst ? lst : ray_error("oom", NULL); + for (int64_t i = 0; i < n; i++) { + bool owned = false; + ray_t* a = dict_vals_at(vals, i, &owned); + if (!a || RAY_IS_ERR(a)) { + ray_release(lst); + return a ? a : ray_error("oom", NULL); + } + ray_t* lst2 = ray_list_append(lst, a); + if (owned) ray_release(a); + if (!lst2 || RAY_IS_ERR(lst2)) { + if (lst2 == NULL) ray_release(lst); + return lst2 ? lst2 : ray_error("oom", NULL); + } + lst = lst2; + } + return lst; +} + +/* -------------------------------------------------------------------------- + * ray_dict_upsert — set d[key_atom] = val. + * + * Ownership: consumes `d`; on success the ref is transferred into the + * returned dict (rc=1). On error `d` is released. Does NOT consume + * `key_atom` or `val` — both are retained internally as needed. + * + * Existing-key fast path: COW the dict, replace val slot in place when + * vals is a RAY_LIST; if vals is a typed vector matching val's atom type + * we COW vals and rewrite the element; otherwise promote vals to RAY_LIST + * first. Missing-key path: append key & val (always promoting vals to + * RAY_LIST so the homogeneous typed-vec invariant is not silently broken). + * -------------------------------------------------------------------------- */ + +ray_t* ray_dict_upsert(ray_t* d, ray_t* key_atom, ray_t* val) { + if (!d || RAY_IS_ERR(d)) return d ? d : ray_error("type", NULL); + if (!val || RAY_IS_ERR(val)) { + ray_release(d); + return val ? val : ray_error("type", NULL); + } + + /* Empty-target special case: build a fresh dict. Keys vector type + * mirrors the key atom's atom type. */ + if (d->type != RAY_DICT) { + ray_release(d); + if (!key_atom || RAY_IS_ERR(key_atom)) return ray_error("type", NULL); + int8_t kt = (int8_t)-key_atom->type; + ray_t* keys = (kt == RAY_SYM) + ? ray_sym_vec_new(RAY_SYM_W64, 1) + : ray_vec_new(kt, 1); + if (!keys || RAY_IS_ERR(keys)) return keys ? keys : ray_error("oom", NULL); + ray_t* vals = ray_list_new(1); + if (!vals || RAY_IS_ERR(vals)) { ray_release(keys); return vals ? vals : ray_error("oom", NULL); } + ray_t* d2 = ray_dict_new(keys, vals); + if (!d2 || RAY_IS_ERR(d2)) return d2; + return ray_dict_upsert(d2, key_atom, val); + } + + int64_t idx = ray_dict_find_idx(d, key_atom); + + /* COW the dict; the slots remain shared until we COW them too. */ + d = ray_cow(d); + if (!d || RAY_IS_ERR(d)) return d; + + ray_t** slots = ray_dict_slots(d); + ray_t* keys = slots[0]; + ray_t* vals = slots[1]; + + /* The append/set helpers consume the input ref and return an owned ref + * (possibly the same pointer, or a fresh one after grow — in which case + * the OLD block is already freed inside the helper). So we always + * overwrite slots[*] with the helper return and never release the old + * pointer ourselves — that would double-free on grow. */ + if (idx >= 0) { + /* Replace existing slot. */ + if (vals->type == RAY_LIST) { + ray_t* new_vals = ray_list_set(vals, idx, val); + if (!new_vals || RAY_IS_ERR(new_vals)) { ray_release(d); return new_vals ? new_vals : ray_error("oom", NULL); } + slots[1] = new_vals; + } else { + /* Typed vector path: promote to LIST first, then update. */ + ray_t* lst = promote_vals_to_list(vals); + if (!lst || RAY_IS_ERR(lst)) { ray_release(d); return lst ? lst : ray_error("oom", NULL); } + ray_release(vals); + slots[1] = lst; + ray_t* new_lst = ray_list_set(lst, idx, val); + if (!new_lst || RAY_IS_ERR(new_lst)) { ray_release(d); return new_lst ? new_lst : ray_error("oom", NULL); } + slots[1] = new_lst; + } + return d; + } + + /* Missing key — append to both vectors. Promote vals to LIST first. */ + if (vals->type != RAY_LIST) { + ray_t* lst = promote_vals_to_list(vals); + if (!lst || RAY_IS_ERR(lst)) { ray_release(d); return lst ? lst : ray_error("oom", NULL); } + ray_release(vals); + slots[1] = lst; + vals = lst; + } + + /* Append key — helper consumes `keys`, returns owned (possibly new) ref. */ + ray_t* new_keys = NULL; + if (keys->type == RAY_SYM) { + int64_t kid = key_atom->i64; + new_keys = ray_vec_append(keys, &kid); + } else if (keys->type == RAY_STR && key_atom->type == -RAY_STR) { + new_keys = ray_str_vec_append(keys, ray_str_ptr(key_atom), ray_str_len(key_atom)); + } else if (keys->type == RAY_GUID && key_atom->type == -RAY_GUID) { + const void* src = key_atom->obj ? ray_data(key_atom->obj) : NULL; + if (!src) { ray_release(d); return ray_error("type", NULL); } + new_keys = ray_vec_append(keys, src); + } else if (keys->type == RAY_F32 && key_atom->type == -RAY_F32) { + /* F32 atoms keep their value in the f64 union slot; the keys vec + * stores narrower 4-byte floats, so narrow before append (the + * generic &u8 fallback below would copy the wrong half of the + * double bit pattern). */ + float f = (float)key_atom->f64; + new_keys = ray_vec_append(keys, &f); + } else if (keys->type == -key_atom->type) { + new_keys = ray_vec_append(keys, &key_atom->u8); + } else { + ray_release(d); + return ray_error("type", NULL); + } + if (!new_keys || RAY_IS_ERR(new_keys)) { ray_release(d); return new_keys ? new_keys : ray_error("oom", NULL); } + slots[0] = new_keys; + + /* Append val — list_append consumes vals, returns owned (possibly new). */ + ray_t* new_vals = ray_list_append(vals, val); + if (!new_vals || RAY_IS_ERR(new_vals)) { ray_release(d); return new_vals ? new_vals : ray_error("oom", NULL); } + slots[1] = new_vals; + + return d; +} + +/* -------------------------------------------------------------------------- + * ray_dict_remove — drop the (key, val) pair if present. + * + * Ownership: consumes `d`; transferred into the returned dict. If the + * key isn't present, returns the input unchanged (one-ref transferred). + * -------------------------------------------------------------------------- */ + +ray_t* ray_dict_remove(ray_t* d, ray_t* key_atom) { + if (!d || RAY_IS_ERR(d)) return d ? d : ray_error("type", NULL); + if (d->type != RAY_DICT) { ray_release(d); return ray_error("type", NULL); } + + int64_t idx = ray_dict_find_idx(d, key_atom); + if (idx < 0) return d; + + d = ray_cow(d); + if (!d || RAY_IS_ERR(d)) return d; + + ray_t** slots = ray_dict_slots(d); + ray_t* keys = slots[0]; + ray_t* vals = slots[1]; + + /* Promote typed-vector vals to LIST so we can remove uniformly. */ + if (vals->type != RAY_LIST) { + ray_t* lst = promote_vals_to_list(vals); + if (!lst || RAY_IS_ERR(lst)) { ray_release(d); return lst ? lst : ray_error("oom", NULL); } + ray_release(vals); + slots[1] = lst; + vals = lst; + } + + /* Drop key element by slicing (build a smaller vec without idx). */ + int64_t n = keys->len; + ray_t* new_keys = NULL; + if (keys->type == RAY_SYM) { + new_keys = ray_sym_vec_new(keys->attrs & RAY_SYM_W_MASK, n - 1); + } else if (keys->type == RAY_STR) { + new_keys = ray_vec_new(RAY_STR, n - 1); + } else { + new_keys = ray_vec_new(keys->type, n - 1); + } + if (!new_keys || RAY_IS_ERR(new_keys)) { ray_release(d); return new_keys ? new_keys : ray_error("oom", NULL); } + + /* Copy keys[0..idx-1] then keys[idx+1..n-1] into new_keys. */ + if (keys->type == RAY_STR) { + for (int64_t i = 0; i < n; i++) { + if (i == idx) continue; + size_t slen = 0; + const char* sp = ray_str_vec_get(keys, i, &slen); + ray_t* nk = ray_str_vec_append(new_keys, sp ? sp : "", sp ? slen : 0); + if (!nk || RAY_IS_ERR(nk)) { ray_release(new_keys); ray_release(d); return nk ? nk : ray_error("oom", NULL); } + new_keys = nk; + } + } else if (keys->type == RAY_SYM) { + uint8_t aw = keys->attrs & RAY_SYM_W_MASK; + void* sb = ray_data(keys); + for (int64_t i = 0; i < n; i++) { + if (i == idx) continue; + int64_t sid = ray_read_sym(sb, i, RAY_SYM, aw); + ray_t* nk = ray_vec_append(new_keys, &sid); + if (!nk || RAY_IS_ERR(nk)) { ray_release(new_keys); ray_release(d); return nk ? nk : ray_error("oom", NULL); } + new_keys = nk; + } + } else { + uint8_t esz = ray_sym_elem_size(keys->type, keys->attrs); + const uint8_t* base = (const uint8_t*)ray_data(keys); + for (int64_t i = 0; i < n; i++) { + if (i == idx) continue; + ray_t* nk = ray_vec_append(new_keys, base + (size_t)i * esz); + if (!nk || RAY_IS_ERR(nk)) { ray_release(new_keys); ray_release(d); return nk ? nk : ray_error("oom", NULL); } + new_keys = nk; + } + } + slots[0] = new_keys; + ray_release(keys); + + /* Drop vals[idx] from the LIST. */ + vals = ray_cow(vals); + if (!vals || RAY_IS_ERR(vals)) { ray_release(d); return vals; } + slots[1] = vals; + ray_t** vslots = (ray_t**)ray_data(vals); + if (vslots[idx]) ray_release(vslots[idx]); + for (int64_t i = idx; i + 1 < vals->len; i++) vslots[i] = vslots[i + 1]; + vals->len -= 1; + + return d; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/table/dict.h b/crates/rayforce-sys/vendor/rayforce/src/table/dict.h new file mode 100644 index 0000000..a6e1d0d --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/table/dict.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_DICT_H +#define RAY_DICT_H + +/* + * dict.h -- Dict operations. + * + * A dict has type = RAY_DICT (99), len = 2. Data region holds two + * ray_t* slots: + * slot[0] = keys vector (any vector type) + * slot[1] = vals — typed vector when homogeneous, RAY_LIST otherwise + * + * Layout mirrors RAY_TABLE's + * (keys, vals) shape. + * + * Lookup dispatches on keys->type so polymorphic keys (sym, i64, str, …) + * all use the same probe path. Pair count == keys->len. + */ + +#include +#include "mem/heap.h" + +/* Internal slot accessors — keys/vals slots in the 2-pointer block. */ +static inline ray_t** ray_dict_slots(ray_t* d) { + return (ray_t**)ray_data(d); +} + +/* Lookup index of `key_atom` in `keys` vector, or -1 if not found. + * `key_atom` may be of any atom type matching the keys vector. */ +int64_t ray_dict_find_idx(ray_t* d, ray_t* key_atom); + +/* Find sym key index without atom boxing. Returns -1 if d is not a + * RAY_DICT, keys is not RAY_SYM, or sym_id is missing. */ +int64_t ray_dict_find_sym(ray_t* d, int64_t sym_id); + +/* Borrowed-ref probe for sym key. Returns the slot pointer when vals is + * RAY_LIST; returns NULL otherwise (typed-vec dicts require boxing — use + * ray_dict_get for those). Used by env-path resolution where dict vals + * are always callables/atoms/sub-dicts, never typed columns. */ +ray_t* ray_dict_probe_sym_borrowed(ray_t* d, int64_t sym_id); + +/* Borrowed sym-key probe for either RAY_DICT or RAY_TABLE — returns the + * value slot for dicts and the column vector for tables. NULL on miss. */ +ray_t* ray_container_probe_sym(ray_t* v, int64_t sym_id); + +#endif /* RAY_DICT_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/table/sym.c b/crates/rayforce-sys/vendor/rayforce/src/table/sym.c new file mode 100644 index 0000000..02d1e1a --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/table/sym.c @@ -0,0 +1,1251 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "sym.h" +#include "core/platform.h" +#include "store/col.h" +#include "store/fileio.h" +#include "mem/heap.h" +#include "mem/sys.h" +#include "mem/arena.h" +#include +#include +#include +#include +#include "ops/hash.h" + +/* -------------------------------------------------------------------------- + * Symbol table structure (static global, sequential mode only). + * NOT thread-safe: all interning must happen before ray_parallel_begin(). + * -------------------------------------------------------------------------- */ + +#define SYM_INIT_CAP 256 +#define SYM_LOAD_FACTOR 0.7 + +/* Cached segment list for a dotted sym: nsegs sym_ids that together make up + * the dotted path. segs is arena-allocated (same lifetime as sym table). */ +typedef struct { + uint8_t nsegs; + int64_t* segs; /* length nsegs; NULL for non-dotted entries */ +} sym_segs_t; + +typedef struct { + /* Hash table: each bucket stores (hash32 << 32) | (id + 1), 0 = empty */ + uint64_t* buckets; + uint32_t bucket_cap; /* always power of 2 */ + + /* String array: strings[id] = ray_t* string atom */ + ray_t** strings; + uint32_t str_count; + uint32_t str_cap; + + /* Per-sym dotted-path metadata, parallel to strings[]. + * `dotted` is a bitmap (1 bit per sym_id); bit set = name is dotted + * and segment sym_ids are cached in `segments`. + * `scanned` is a bitmap; bit set = sym_cache_segments has settled this + * sym (either cached successfully, or decided it is a plain name). + * Unset = needs to be (re-)scanned on the next intern call, which is + * how we recover from a transient cache OOM on first intern: the + * bit stays clear, so future interns of the same name retry. + * `segments` holds cached segment sym_ids; segs == NULL when dotted + * bit is clear. */ + uint64_t* dotted; /* (str_cap + 63) / 64 words */ + uint64_t* scanned; /* (str_cap + 63) / 64 words */ + sym_segs_t* segments; /* length str_cap */ + + /* Persistence: entries [0..persisted_count-1] are known on disk */ + uint32_t persisted_count; + + /* Arena for string atoms — avoids per-string buddy allocator calls */ + ray_arena_t* arena; +} sym_table_t; + +static sym_table_t g_sym; +static _Atomic(bool) g_sym_inited = false; + +/* Spinlock protecting g_sym mutations in ray_sym_intern */ +static _Atomic(int) g_sym_lock = 0; +static inline void sym_lock(void) { + while (atomic_exchange_explicit(&g_sym_lock, 1, memory_order_acquire)) { +#if defined(__x86_64__) || defined(__i386__) + __builtin_ia32_pause(); +#endif + } +} +static inline void sym_unlock(void) { + atomic_store_explicit(&g_sym_lock, 0, memory_order_release); +} + +/* Arena-backed ray_str equivalent. Same logic as ray_str() in atom.c + * but allocates from the sym arena instead of the buddy allocator. */ +static ray_t* sym_str_arena(ray_arena_t* arena, const char* s, size_t len) { + if (len < 7) { + /* SSO path: inline in header */ + ray_t* v = ray_arena_alloc(arena, 0); + if (!v) return NULL; + v->type = -RAY_STR; + v->slen = (uint8_t)len; + if (len > 0) memcpy(v->sdata, s, len); + v->sdata[len] = '\0'; + return v; + } + /* Long string: fused single allocation for U8 vector + STR header. + * Layout: [CHAR ray_t header (32B) | string data (len+1) | padding | STR ray_t header (32B)] + * This halves arena_alloc calls for long strings. */ + size_t data_size = len + 1; + size_t chars_block = ((32 + data_size) + 31) & ~(size_t)31; /* align up to 32 */ + ray_t* chars = ray_arena_alloc(arena, chars_block + 32 - 32); /* chars_block - 32 (header) + 32 (str header) */ + if (!chars) return NULL; + chars->type = RAY_U8; + chars->len = (int64_t)len; + memcpy(ray_data(chars), s, len); + ((char*)ray_data(chars))[len] = '\0'; + + /* STR header sits right after the CHAR block */ + ray_t* v = (ray_t*)((char*)chars + chars_block); + memset(v, 0, 32); + v->attrs = RAY_ATTR_ARENA; + ray_atomic_store(&v->rc, 1); + v->type = -RAY_STR; + v->obj = chars; + return v; +} + +/* -------------------------------------------------------------------------- + * ray_sym_init + * -------------------------------------------------------------------------- */ + +ray_err_t ray_sym_init(void) { + bool expected = false; + if (!atomic_compare_exchange_strong_explicit(&g_sym_inited, &expected, true, + memory_order_acq_rel, memory_order_acquire)) + return RAY_OK; /* already initialized by another thread */ + + g_sym.bucket_cap = SYM_INIT_CAP; + /* ray_sys_alloc uses mmap(MAP_ANONYMOUS) which zero-initializes. */ + g_sym.buckets = (uint64_t*)ray_sys_alloc(g_sym.bucket_cap * sizeof(uint64_t)); + if (!g_sym.buckets) { + atomic_store_explicit(&g_sym_inited, false, memory_order_release); + return RAY_ERR_OOM; + } + + g_sym.str_cap = SYM_INIT_CAP; + g_sym.str_count = 0; + g_sym.strings = (ray_t**)ray_sys_alloc(g_sym.str_cap * sizeof(ray_t*)); + if (!g_sym.strings) { + ray_sys_free(g_sym.buckets); + g_sym.buckets = NULL; + atomic_store_explicit(&g_sym_inited, false, memory_order_release); + return RAY_ERR_OOM; + } + + g_sym.arena = ray_arena_new(1024 * 1024); /* 1MB chunks */ + if (!g_sym.arena) { + ray_sys_free(g_sym.strings); + ray_sys_free(g_sym.buckets); + g_sym.strings = NULL; + g_sym.buckets = NULL; + atomic_store_explicit(&g_sym_inited, false, memory_order_release); + return RAY_ERR_OOM; + } + + /* Dotted-path sidecars sized to str_cap. ray_sys_alloc is MAP_ANONYMOUS + * so memory is zero-initialised — bitmaps start all-zero, segments[i] + * structs start {nsegs:0, segs:NULL}. Failures free prior allocations + * and roll the sym table back to uninitialised. */ + uint32_t bm_words = (g_sym.str_cap + 63) / 64; + g_sym.dotted = (uint64_t*)ray_sys_alloc((size_t)bm_words * sizeof(uint64_t)); + g_sym.scanned = (uint64_t*)ray_sys_alloc((size_t)bm_words * sizeof(uint64_t)); + g_sym.segments = (sym_segs_t*)ray_sys_alloc((size_t)g_sym.str_cap * sizeof(sym_segs_t)); + if (!g_sym.dotted || !g_sym.scanned || !g_sym.segments) { + if (g_sym.dotted) ray_sys_free(g_sym.dotted); + if (g_sym.scanned) ray_sys_free(g_sym.scanned); + if (g_sym.segments) ray_sys_free(g_sym.segments); + g_sym.dotted = NULL; + g_sym.scanned = NULL; + g_sym.segments = NULL; + ray_arena_destroy(g_sym.arena); + g_sym.arena = NULL; + ray_sys_free(g_sym.strings); + ray_sys_free(g_sym.buckets); + g_sym.strings = NULL; + g_sym.buckets = NULL; + atomic_store_explicit(&g_sym_inited, false, memory_order_release); + return RAY_ERR_OOM; + } + + /* g_sym_inited already set to true by CAS above */ + return RAY_OK; +} + +/* -------------------------------------------------------------------------- + * ray_sym_destroy + * -------------------------------------------------------------------------- */ + +void ray_sym_destroy(void) { + if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return; + + /* Arena-backed strings: ray_release is a no-op (RAY_ATTR_ARENA). + * Destroy the arena to free all string atoms at once. + * segments[i].segs pointers are arena-allocated too, freed with it. */ + if (g_sym.arena) { + ray_arena_destroy(g_sym.arena); + g_sym.arena = NULL; + } + + if (g_sym.segments) ray_sys_free(g_sym.segments); + if (g_sym.scanned) ray_sys_free(g_sym.scanned); + if (g_sym.dotted) ray_sys_free(g_sym.dotted); + ray_sys_free(g_sym.strings); + ray_sys_free(g_sym.buckets); + + memset(&g_sym, 0, sizeof(g_sym)); + atomic_store_explicit(&g_sym_inited, false, memory_order_release); +} + +/* -------------------------------------------------------------------------- + * Hash table helpers + * -------------------------------------------------------------------------- */ + +static void ht_insert(uint64_t* buckets, uint32_t cap, uint32_t hash, uint32_t id) { + uint32_t mask = cap - 1; + uint32_t slot = hash & mask; + uint64_t entry = ((uint64_t)hash << 32) | ((uint64_t)(id + 1)); + + for (;;) { + if (buckets[slot] == 0) { + buckets[slot] = entry; + return; + } + slot = (slot + 1) & mask; + } +} + +/* Grow hash table to new_cap (must be power of 2 and > current cap). */ +static bool ht_grow_to(uint32_t new_cap) { + uint64_t* new_buckets = (uint64_t*)ray_sys_alloc((size_t)new_cap * sizeof(uint64_t)); + if (!new_buckets) return false; + + /* Re-insert all existing entries */ + for (uint32_t i = 0; i < g_sym.bucket_cap; i++) { + uint64_t e = g_sym.buckets[i]; + if (e == 0) continue; + uint32_t h = (uint32_t)(e >> 32); + uint32_t id = (uint32_t)(e & 0xFFFFFFFF) - 1; + ht_insert(new_buckets, new_cap, h, id); + } + + ray_sys_free(g_sym.buckets); + g_sym.buckets = new_buckets; + g_sym.bucket_cap = new_cap; + return true; +} + +static bool ht_grow(void) { + /* Overflow guard: bucket_cap is always power of 2. + * At 2^31, doubling overflows uint32_t. */ + if (g_sym.bucket_cap >= (UINT32_MAX / 2 + 1)) return false; + return ht_grow_to(g_sym.bucket_cap * 2); +} + +/* -------------------------------------------------------------------------- + * sym_grow_str_cap — grow strings[], dotted[] bitmap, and segments[] array + * to hold at least new_cap entries. Must be called with sym_lock held + * (or from within single-threaded prehashed intern). Zero-fills the new + * portion of segments[] explicitly (realloc of a mapped region may return + * pages that weren't touched but we don't want to rely on virgin mmap). + * -------------------------------------------------------------------------- */ +static bool sym_grow_str_cap(uint32_t new_cap) { + uint32_t old_cap = g_sym.str_cap; + if (new_cap <= old_cap) return true; + + ray_t** new_strings = (ray_t**)ray_sys_realloc(g_sym.strings, + (size_t)new_cap * sizeof(ray_t*)); + if (!new_strings) return false; + g_sym.strings = new_strings; + + uint32_t old_bm_words = (old_cap + 63) / 64; + uint32_t new_bm_words = (new_cap + 63) / 64; + if (new_bm_words > old_bm_words) { + uint64_t* new_dotted = (uint64_t*)ray_sys_realloc(g_sym.dotted, + (size_t)new_bm_words * sizeof(uint64_t)); + if (!new_dotted) return false; + memset(new_dotted + old_bm_words, 0, + (size_t)(new_bm_words - old_bm_words) * sizeof(uint64_t)); + g_sym.dotted = new_dotted; + + uint64_t* new_scanned = (uint64_t*)ray_sys_realloc(g_sym.scanned, + (size_t)new_bm_words * sizeof(uint64_t)); + if (!new_scanned) return false; + memset(new_scanned + old_bm_words, 0, + (size_t)(new_bm_words - old_bm_words) * sizeof(uint64_t)); + g_sym.scanned = new_scanned; + } + + sym_segs_t* new_segments = (sym_segs_t*)ray_sys_realloc(g_sym.segments, + (size_t)new_cap * sizeof(sym_segs_t)); + if (!new_segments) return false; + memset(new_segments + old_cap, 0, + (size_t)(new_cap - old_cap) * sizeof(sym_segs_t)); + g_sym.segments = new_segments; + + g_sym.str_cap = new_cap; + return true; +} + +/* Forward declarations — sym_cache_segments (below) needs these helpers + * that are defined further down in the file. ray_sym_bytes_upper is + * declared in sym.h as a public inline so both the intern path and the + * test suite can refer to the same formula. */ +static int64_t sym_intern_nolock(uint32_t hash, const char* str, size_t len); +static int64_t sym_probe(uint32_t hash, const char* str, size_t len); +static int64_t sym_commit_new(uint32_t hash, const char* str, size_t len); +static bool sym_reserve_capacity(uint32_t new_sym_count, size_t arena_bytes); + +/* -------------------------------------------------------------------------- + * sym_cache_segments — idempotent cache-and-apply for an EXISTING sym. + * Used by ray_sym_rebuild_segments (after bulk persistence loads) and by + * the probe-found branch of sym_intern_nolock (a prior intern via + * ray_sym_intern_no_split may have committed the sym without ever + * running the cache prep). + * + * Atomic: same inspect + reserve + commit pattern as sym_intern_nolock, + * so a failure here leaves no orphan segment syms and no half-applied + * cache state. Returns false only on real OOM — scanned stays clear + * in that case so future retries pick up where we left off. + * -------------------------------------------------------------------------- */ +static bool sym_cache_segments(uint32_t new_id, const char* str, size_t len) { + uint64_t bit = (uint64_t)1 << (new_id & 63); + uint32_t word = new_id >> 6; + if (g_sym.scanned[word] & bit) return true; + + const char* first_dot = (const char*)memchr(str, '.', len); + if (!first_dot) { + /* Plain — mark settled. */ + g_sym.scanned[word] |= bit; + return true; + } + + /* Validate structure. Trailing dot → not dotted. Leading `.` is + * allowed ONLY when followed by another dot (e.g. `.sys.gc`) — + * in that case segment 0 includes the leading dot (`.sys`), so + * reserved-namespace names resolve against their root dict via + * the regular segment walk. */ + if (str[len - 1] == '.') { + g_sym.scanned[word] |= bit; + return true; + } + bool leading_dot = (str[0] == '.'); + if (leading_dot) { + /* `.sys` alone (no second dot) is a plain name. */ + const char* second = (const char*)memchr(str + 1, '.', len - 1); + if (!second) { g_sym.scanned[word] |= bit; return true; } + } + size_t sep_dots = 0; + for (size_t i = (leading_dot ? 1 : 0); i < len; i++) + if (str[i] == '.') sep_dots++; + if (sep_dots + 1 > 255) { + g_sym.scanned[word] |= bit; + return true; + } + uint8_t nsegs = (uint8_t)(sep_dots + 1); + + struct { const char* p; size_t len; uint32_t hash; int64_t id; } descs[256]; + uint32_t new_seg_count = 0; + size_t new_seg_bytes = 0; + { + const char* p = str; + size_t remaining = len; + uint8_t i = 0; + while (remaining && i < nsegs) { + /* Segment 0 starts at str[0] but skips the leading `.` when + * searching for the segment-terminating dot — so seg 0 of + * `.sys.gc` is `.sys`, not `` (empty). */ + size_t skip = (i == 0 && leading_dot) ? 1 : 0; + const char* dot = remaining > skip + ? (const char*)memchr(p + skip, '.', remaining - skip) + : NULL; + size_t seg_len = dot ? (size_t)(dot - p) : remaining; + if (seg_len == 0) { g_sym.scanned[word] |= bit; return true; } + uint32_t h = (uint32_t)ray_hash_bytes(p, seg_len); + descs[i].p = p; + descs[i].len = seg_len; + descs[i].hash = h; + descs[i].id = sym_probe(h, p, seg_len); + if (descs[i].id < 0) { + new_seg_count++; + new_seg_bytes += ray_sym_bytes_upper(seg_len); + } + i++; + if (!dot) break; + remaining -= (seg_len + 1); + p = dot + 1; + } + } + + /* Reserve capacity for new segments + segs array. */ + size_t segs_payload = (size_t)nsegs * sizeof(int64_t); + size_t arena_bytes = new_seg_bytes + + (((size_t)32 + segs_payload + 31) & ~(size_t)31); + if (!sym_reserve_capacity(new_seg_count, arena_bytes)) return false; + + /* Commit. Allocations covered by reservation above. */ + for (uint8_t i = 0; i < nsegs; i++) { + if (descs[i].id < 0) { + int64_t sid = sym_commit_new(descs[i].hash, descs[i].p, descs[i].len); + if (sid < 0) return false; /* reservation should have prevented */ + descs[i].id = sid; + g_sym.scanned[sid >> 6] |= ((uint64_t)1 << (sid & 63)); + } + } + + int64_t* segs = (int64_t*)ray_arena_alloc(g_sym.arena, segs_payload); + if (!segs) return false; /* reservation should have prevented */ + for (uint8_t i = 0; i < nsegs; i++) segs[i] = descs[i].id; + + g_sym.segments[new_id].nsegs = nsegs; + g_sym.segments[new_id].segs = segs; + g_sym.dotted[word] |= bit; + g_sym.scanned[word] |= bit; + return true; +} + +/* -------------------------------------------------------------------------- + * sym_probe — hash-table lookup only. Returns sym_id for an existing + * entry or -1 if not present. No side effects. + * -------------------------------------------------------------------------- */ +static int64_t sym_probe(uint32_t hash, const char* str, size_t len) { + uint32_t mask = g_sym.bucket_cap - 1; + uint32_t slot = hash & mask; + for (;;) { + uint64_t e = g_sym.buckets[slot]; + if (e == 0) return -1; + uint32_t e_hash = (uint32_t)(e >> 32); + if (e_hash == hash) { + uint32_t e_id = (uint32_t)(e & 0xFFFFFFFF) - 1; + ray_t* existing = g_sym.strings[e_id]; + if (ray_str_len(existing) == len && + memcmp(ray_str_ptr(existing), str, len) == 0) { + return (int64_t)e_id; + } + } + slot = (slot + 1) & mask; + } +} + +/* -------------------------------------------------------------------------- + * sym_commit_new — insert a NEW sym (caller must have confirmed it does + * not already exist). Grows the hash/strings tables as needed, allocates + * the string atom from the arena, inserts into the hash table. Returns + * new sym_id or -1 on OOM. No cache side effect. + * -------------------------------------------------------------------------- */ +static int64_t sym_commit_new(uint32_t hash, const char* str, size_t len) { + /* Grow hash table if load factor exceeds threshold, or if critically + * full. Attempt grow before refusing insert. + * Cast to uint64_t to prevent overflow when bucket_cap >= 2^26. */ + if ((uint64_t)g_sym.str_count * 100 >= (uint64_t)g_sym.bucket_cap * 70) { + if (!ht_grow()) { + /* If critically full even after failed grow, refuse insert + * to prevent infinite probe loops. */ + if ((uint64_t)g_sym.str_count * 100 >= (uint64_t)g_sym.bucket_cap * 95) { + return -1; + } + } + } + + uint32_t new_id = g_sym.str_count; + + if (new_id >= g_sym.str_cap) { + if (g_sym.str_cap >= UINT32_MAX / 2) return -1; + if (!sym_grow_str_cap(g_sym.str_cap * 2)) return -1; + } + + /* Create string atom from arena — avoids buddy allocator overhead. + * Arena blocks have rc=1 and RAY_ATTR_ARENA set. */ + ray_t* s = sym_str_arena(g_sym.arena, str, len); + if (!s) return -1; + g_sym.strings[new_id] = s; + g_sym.str_count++; + + /* Insert into hash table. + * Note: ht_insert probes from hash & mask to find an empty slot, + * so it works correctly even if ht_grow changed the bucket array. */ + ht_insert(g_sym.buckets, g_sym.bucket_cap, hash, new_id); + + return (int64_t)new_id; +} + +/* -------------------------------------------------------------------------- + * sym_intern_nolock_noseg — intern WITHOUT the segment-caching side + * effect. Persistence paths (ray_sym_load, ray_sym_save's merge phase) + * use this variant because segment sub-interning during load would + * append new ids mid-sequence and break the disk-position==sym_id + * invariant. After the bulk op, call ray_sym_rebuild_segments to + * populate the dotted bitmap + segments cache. Assumes caller holds + * sym_lock (or is in the single-threaded prehashed caller contract). + * -------------------------------------------------------------------------- */ +static int64_t sym_intern_nolock_noseg(uint32_t hash, const char* str, size_t len) { + int64_t existing = sym_probe(hash, str, len); + if (existing >= 0) return existing; + return sym_commit_new(hash, str, len); +} + +/* Reserve hash-table, strings-array, and arena capacity for `new_sym_count` + * new syms plus `arena_bytes` of additional arena usage (for the segs array + * if we're interning a dotted name). Returns true on success; on failure + * returns false with no commits made. */ +static bool sym_reserve_capacity(uint32_t new_sym_count, size_t arena_bytes) { + /* Hash table — grow if adding new_sym_count entries would exceed 70%. */ + uint64_t new_count = (uint64_t)g_sym.str_count + new_sym_count; + uint32_t target = g_sym.bucket_cap; + while (new_count * 100 >= (uint64_t)target * 70) { + if (target >= (UINT32_MAX / 2 + 1)) return false; + target *= 2; + } + if (target > g_sym.bucket_cap) { + if (!ht_grow_to(target)) return false; + } + + /* Strings and sidecars. */ + if (new_count > g_sym.str_cap) { + uint32_t str_target = g_sym.str_cap; + while (str_target < new_count) { + if (str_target >= UINT32_MAX / 2) return false; + str_target *= 2; + } + if (!sym_grow_str_cap(str_target)) return false; + } + + /* Arena — reserve one chunk large enough for every forthcoming alloc. */ + if (arena_bytes && !ray_arena_reserve(g_sym.arena, arena_bytes)) return false; + + return true; +} + +/* -------------------------------------------------------------------------- + * sym_intern_nolock — fully atomic intern. + * + * Three phases: + * A. Inspect: probe the main name, validate its dotted shape, probe + * every segment. No side effects. + * B. Reserve: pre-grow hash/strings/arena to accommodate everything + * we might need to commit. Can fail → return -1 with no state + * change (no orphan segment syms, no cache fragments). + * C. Commit: all allocations in this phase are guaranteed by the + * reservations above, so they cannot fail. Creates any new + * segment syms, creates the main sym, fills the segs cache, sets + * scanned + dotted bits. + * + * This closes two prior traps: + * - A committed main sym whose dotted bit disagrees with its name's + * structure (env silently routing dotted-path writes/reads through + * the flat path). + * - Orphan segment syms persisting when the main-sym commit fails. + * + * For an existing sym found in phase A, we still opportunistically try + * the cache — that path is the lazy fallback for ray_sym_intern_no_split, + * which commits the main sym without a cache on purpose. A cache-OOM + * there is tolerated (scanned bit stays clear → future interns retry). + * -------------------------------------------------------------------------- */ +static int64_t sym_intern_nolock(uint32_t hash, const char* str, size_t len) { + /* Phase A.1: probe main. */ + int64_t existing = sym_probe(hash, str, len); + if (existing >= 0) { + (void)sym_cache_segments((uint32_t)existing, str, len); + return existing; + } + + /* Phase A.2: structural validation + per-segment probe. */ + struct { const char* p; size_t len; uint32_t hash; int64_t id; } descs[256]; + uint8_t nsegs = 0; + uint32_t new_seg_count = 0; + size_t new_seg_bytes = 0; + bool is_dotted = false; + + const char* first_dot = (const char*)memchr(str, '.', len); + if (first_dot) { + /* Dotted-name rules (parallel to sym_cache_segments): + * - Trailing dot → plain (not dotted). + * - Leading dot alone → plain (`.sys` with no inner dot). + * - Leading dot + inner dot → segment 0 is `.` including + * the leading dot. This is how + * reserved-namespace names like + * `.sys.gc` resolve against the + * `.sys` root dict. */ + bool valid = str[len - 1] != '.'; + bool leading_dot = (str[0] == '.'); + if (valid && leading_dot) { + const char* second = (const char*)memchr(str + 1, '.', len - 1); + if (!second) valid = false; + } + size_t sep_dots = 0; + if (valid) { + for (size_t i = (leading_dot ? 1 : 0); i < len; i++) + if (str[i] == '.') sep_dots++; + if (sep_dots + 1 > 255) valid = false; + } + if (valid) { + nsegs = (uint8_t)(sep_dots + 1); + const char* p = str; + size_t remaining = len; + uint8_t i = 0; + while (remaining && i < nsegs) { + size_t skip = (i == 0 && leading_dot) ? 1 : 0; + const char* dot = remaining > skip + ? (const char*)memchr(p + skip, '.', remaining - skip) + : NULL; + size_t seg_len = dot ? (size_t)(dot - p) : remaining; + if (seg_len == 0) { valid = false; break; } + uint32_t seg_hash = (uint32_t)ray_hash_bytes(p, seg_len); + descs[i].p = p; + descs[i].len = seg_len; + descs[i].hash = seg_hash; + descs[i].id = sym_probe(seg_hash, p, seg_len); + if (descs[i].id < 0) { + new_seg_count++; + new_seg_bytes += ray_sym_bytes_upper(seg_len); + } + i++; + if (!dot) break; + remaining -= (seg_len + 1); + p = dot + 1; + } + if (valid) is_dotted = true; + } + } + + /* Phase B: reserve capacity for main + new segments + segs array. */ + size_t arena_bytes = ray_sym_bytes_upper(len); + if (is_dotted) { + arena_bytes += new_seg_bytes; + /* segs array is arena-allocated via ray_arena_alloc(_, nsegs*8). */ + size_t segs_payload = (size_t)nsegs * sizeof(int64_t); + arena_bytes += ((size_t)32 + segs_payload + 31) & ~(size_t)31; + } + if (!sym_reserve_capacity(1 + new_seg_count, arena_bytes)) return -1; + + /* Phase C: commit. Every allocation below is covered by the + * reservation above, so nothing here can fail. */ + if (is_dotted) { + for (uint8_t i = 0; i < nsegs; i++) { + if (descs[i].id < 0) { + int64_t sid = sym_commit_new(descs[i].hash, descs[i].p, descs[i].len); + /* Reservation guarantees success; defensive check kept. */ + if (sid < 0) return -1; + descs[i].id = sid; + /* Segment is itself a plain name (no dot inside). */ + g_sym.scanned[sid >> 6] |= ((uint64_t)1 << (sid & 63)); + } + } + } + + int64_t main_id = sym_commit_new(hash, str, len); + if (main_id < 0) return -1; + + if (is_dotted) { + int64_t* segs = (int64_t*)ray_arena_alloc(g_sym.arena, + (size_t)nsegs * sizeof(int64_t)); + if (!segs) return main_id; /* reservation should have prevented this */ + for (uint8_t i = 0; i < nsegs; i++) segs[i] = descs[i].id; + g_sym.segments[main_id].nsegs = nsegs; + g_sym.segments[main_id].segs = segs; + g_sym.dotted[main_id >> 6] |= ((uint64_t)1 << (main_id & 63)); + } + g_sym.scanned[main_id >> 6] |= ((uint64_t)1 << (main_id & 63)); + + return main_id; +} + +/* -------------------------------------------------------------------------- + * ray_sym_intern — locked public API + * -------------------------------------------------------------------------- */ + +int64_t ray_sym_intern(const char* str, size_t len) { + if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return -1; + uint32_t hash = (uint32_t)ray_hash_bytes(str, len); + sym_lock(); + int64_t id = sym_intern_nolock(hash, str, len); + sym_unlock(); + return id; +} + +/* -------------------------------------------------------------------------- + * ray_sym_intern_prehashed -- intern with pre-computed hash, no lock. + * + * CALLER CONTRACT: must only be called when no other thread is interning + * (e.g., after ray_pool_dispatch returns during CSV merge). + * -------------------------------------------------------------------------- */ + +int64_t ray_sym_intern_prehashed(uint32_t hash, const char* str, size_t len) { + if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return -1; + return sym_intern_nolock(hash, str, len); +} + +/* -------------------------------------------------------------------------- + * ray_sym_intern_no_split — persistence-only bulk intern + * -------------------------------------------------------------------------- */ + +int64_t ray_sym_intern_no_split(const char* str, size_t len) { + if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return -1; + uint32_t hash = (uint32_t)ray_hash_bytes(str, len); + sym_lock(); + int64_t id = sym_intern_nolock_noseg(hash, str, len); + sym_unlock(); + return id; +} + +/* -------------------------------------------------------------------------- + * ray_sym_rebuild_segments — populate dotted cache for any not-yet-cached + * entries. Must follow a batch of ray_sym_intern_no_split calls. + * + * Propagates the first allocation/sub-intern failure as RAY_ERR_OOM so + * persistence callers (ray_sym_load / ray_sym_save merge) can abort + * cleanly rather than silently leaving dotted names un-cached — that + * would degrade them to flat-sym semantics and break env lookup for any + * name the user wrote with a '.' in it. + * -------------------------------------------------------------------------- */ + +ray_err_t ray_sym_rebuild_segments(void) { + if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return RAY_ERR_IO; + sym_lock(); + /* Snapshot upper bound — sym_cache_segments may append segment entries + * beyond the original range, but those new entries themselves are + * non-dotted segment names and so produce no further work. Use the + * scanned bitmap to skip: anything already settled (plain or dotted) + * avoids even the memchr inside sym_cache_segments. */ + uint32_t count = g_sym.str_count; + for (uint32_t i = 0; i < count; i++) { + if (g_sym.scanned[i >> 6] & ((uint64_t)1 << (i & 63))) continue; + ray_t* s = g_sym.strings[i]; + if (!s) continue; + if (!sym_cache_segments(i, ray_str_ptr(s), ray_str_len(s))) { + sym_unlock(); + return RAY_ERR_OOM; + } + } + sym_unlock(); + return RAY_OK; +} + +/* -------------------------------------------------------------------------- + * Dotted-name accessors + * -------------------------------------------------------------------------- */ + +bool ray_sym_is_dotted(int64_t sym_id) { + if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return false; + if (sym_id < 0 || (uint32_t)sym_id >= g_sym.str_count) return false; + uint64_t word = g_sym.dotted[(uint32_t)sym_id >> 6]; + return (word >> ((uint32_t)sym_id & 63)) & 1; +} + +int ray_sym_segs(int64_t sym_id, const int64_t** out_segs) { + if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return 0; + if (sym_id < 0 || (uint32_t)sym_id >= g_sym.str_count) return 0; + sym_segs_t s = g_sym.segments[sym_id]; + if (s.nsegs == 0 || !s.segs) return 0; + if (out_segs) *out_segs = s.segs; + return (int)s.nsegs; +} + +/* -------------------------------------------------------------------------- + * ray_sym_find + * -------------------------------------------------------------------------- */ + +int64_t ray_sym_find(const char* str, size_t len) { + if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return -1; + + /* Lock required: concurrent ray_sym_intern may trigger ht_grow which + * frees and replaces g_sym.buckets -- reading without lock is UAF. */ + sym_lock(); + + uint32_t hash = (uint32_t)ray_hash_bytes(str, len); + uint32_t mask = g_sym.bucket_cap - 1; + uint32_t slot = hash & mask; + + for (;;) { + uint64_t e = g_sym.buckets[slot]; + if (e == 0) { sym_unlock(); return -1; } /* empty -- not found */ + + uint32_t e_hash = (uint32_t)(e >> 32); + if (e_hash == hash) { + uint32_t e_id = (uint32_t)(e & 0xFFFFFFFF) - 1; + ray_t* existing = g_sym.strings[e_id]; + if (ray_str_len(existing) == len && + memcmp(ray_str_ptr(existing), str, len) == 0) { + sym_unlock(); + return (int64_t)e_id; + } + } + slot = (slot + 1) & mask; + } +} + +/* -------------------------------------------------------------------------- + * ray_sym_str + * -------------------------------------------------------------------------- */ + +/* Returned pointer is valid only while no concurrent ray_sym_intern occurs. + * Safe during read-only execution phase (after all interning is complete). + * Caller must not store the pointer across sym table mutations (ht_grow + * or strings realloc). */ +ray_t* ray_sym_str(int64_t id) { + if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return NULL; + + /* Lock required: concurrent ray_sym_intern may realloc g_sym.strings. */ + sym_lock(); + if (id < 0 || (uint32_t)id >= g_sym.str_count) { sym_unlock(); return NULL; } + ray_t* s = g_sym.strings[id]; + sym_unlock(); + return s; +} + +/* -------------------------------------------------------------------------- + * ray_sym_count + * -------------------------------------------------------------------------- */ + +uint32_t ray_sym_count(void) { + if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return 0; + + /* Lock required: concurrent ray_sym_intern may modify str_count. */ + sym_lock(); + uint32_t count = g_sym.str_count; + sym_unlock(); + return count; +} + +/* -------------------------------------------------------------------------- + * ray_sym_ensure_cap -- pre-grow hash table and strings array + * + * Ensures the symbol table can hold at least `needed` total symbols without + * rehashing. Call before bulk interning (e.g., CSV merge) to prevent + * mid-insert OOM that silently drops symbols. + * -------------------------------------------------------------------------- */ + +bool ray_sym_ensure_cap(uint32_t needed) { + if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return false; + + sym_lock(); + + /* Grow strings array (and sidecars) if needed */ + while (g_sym.str_cap < needed) { + if (g_sym.str_cap >= UINT32_MAX / 2) { sym_unlock(); return false; } + uint32_t new_str_cap = g_sym.str_cap * 2; + if (new_str_cap < needed) { /* jump directly to needed */ + new_str_cap = needed; + /* Round up to power of 2 */ + new_str_cap--; + new_str_cap |= new_str_cap >> 1; + new_str_cap |= new_str_cap >> 2; + new_str_cap |= new_str_cap >> 4; + new_str_cap |= new_str_cap >> 8; + new_str_cap |= new_str_cap >> 16; + new_str_cap++; + if (new_str_cap == 0) { sym_unlock(); return false; } + } + if (!sym_grow_str_cap(new_str_cap)) { sym_unlock(); return false; } + } + + /* Grow hash table so load factor stays below threshold after filling */ + double raw_buckets = (double)needed / SYM_LOAD_FACTOR + 1.0; + if (raw_buckets > (double)UINT32_MAX) { sym_unlock(); return false; } + uint32_t needed_buckets = (uint32_t)raw_buckets; + /* Round up to power of 2 */ + needed_buckets--; + needed_buckets |= needed_buckets >> 1; + needed_buckets |= needed_buckets >> 2; + needed_buckets |= needed_buckets >> 4; + needed_buckets |= needed_buckets >> 8; + needed_buckets |= needed_buckets >> 16; + needed_buckets++; + + if (needed_buckets > g_sym.bucket_cap) { + if (!ht_grow_to(needed_buckets)) { sym_unlock(); return false; } + } + + sym_unlock(); + return true; +} + +/* -------------------------------------------------------------------------- + * ray_sym_save -- serialize symbol table as RAY_LIST of -RAY_STR + * + * Uses ray_col_save (STRL format), file locking for concurrent writers, + * and fsync + atomic rename for crash safety. Append-only: skips save + * when persisted_count == str_count. + * -------------------------------------------------------------------------- */ + +ray_err_t ray_sym_save(const char* path) { + if (!path) return RAY_ERR_IO; + if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return RAY_ERR_IO; + + /* Quick check: nothing new to persist? */ + sym_lock(); + if (g_sym.persisted_count == g_sym.str_count) { + sym_unlock(); + return RAY_OK; + } + sym_unlock(); + + /* Build lock and temp paths */ + char lock_path[1024]; + char tmp_path[1024]; + if (snprintf(lock_path, sizeof(lock_path), "%s.lk", path) >= (int)sizeof(lock_path)) + return RAY_ERR_IO; + if (snprintf(tmp_path, sizeof(tmp_path), "%s.tmp", path) >= (int)sizeof(tmp_path)) + return RAY_ERR_IO; + + /* Acquire cross-process exclusive lock */ + ray_fd_t lock_fd = ray_file_open(lock_path, RAY_OPEN_READ | RAY_OPEN_WRITE | RAY_OPEN_CREATE); + if (lock_fd == RAY_FD_INVALID) return RAY_ERR_IO; + ray_err_t err = ray_file_lock_ex(lock_fd); + if (err != RAY_OK) { ray_file_close(lock_fd); return err; } + + /* If file exists, load and merge (pick up entries from other writers). + * Distinguish "file not found" (proceed with full save) from real I/O + * errors (abort to avoid overwriting a file we couldn't read). */ + { + ray_t* existing = ray_col_load(path); + if (existing && !RAY_IS_ERR(existing)) { + if (existing->type != RAY_LIST) { + ray_release(existing); + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return RAY_ERR_CORRUPT; + } + /* Intern any new entries from disk (idempotent). + * Verify each entry's in-memory ID matches its disk position: + * if a local symbol already occupies a slot that disk expects, + * the tables have diverged and merging would silently reorder + * symbol IDs, corrupting previously written RAY_SYM columns. */ + ray_t** slots = (ray_t**)ray_data(existing); + for (int64_t i = 0; i < existing->len; i++) { + ray_t* s = slots[i]; + if (!s || RAY_IS_ERR(s) || s->type != -RAY_STR) { + ray_release(existing); + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return RAY_ERR_CORRUPT; + } + /* Use the no-split variant: sub-interning segments mid-loop + * would shift subsequent disk positions and spuriously trip + * the id==i check below. */ + int64_t id = ray_sym_intern_no_split(ray_str_ptr(s), ray_str_len(s)); + if (id < 0) { + ray_release(existing); + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return RAY_ERR_OOM; + } + if (id != i) { + /* Divergent symbol tables: disk position i maps to + * in-memory id != i. A local symbol occupies the + * slot, so merging would reorder IDs and corrupt + * any RAY_SYM columns written by the other writer. */ + ray_release(existing); + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return RAY_ERR_CORRUPT; + } + } + ray_release(existing); + /* Populate dotted cache for names just merged in. An OOM + * here would leave some loaded dotted names without a segment + * cache, silently degrading their env-lookup semantics — we + * must not proceed to write the file as if the merge fully + * succeeded. */ + ray_err_t rebuild_err = ray_sym_rebuild_segments(); + if (rebuild_err != RAY_OK) { + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return rebuild_err; + } + } else { + /* ray_col_load failed — check if the file actually exists. + * If it does, the failure is a real I/O/corruption error; + * do not overwrite the file with a potentially incomplete + * in-memory snapshot. */ + ray_fd_t probe_fd = ray_file_open(path, RAY_OPEN_READ); + if (probe_fd != RAY_FD_INVALID) { + /* File exists and is readable but ray_col_load failed — + * corruption or format error; do not overwrite. */ + ray_file_close(probe_fd); + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return RAY_IS_ERR(existing) ? ray_err_from_obj(existing) : RAY_ERR_IO; + } + if (errno != ENOENT) { + /* File may exist but we can't open it (EACCES, EMFILE, + * EIO, etc.) — do not overwrite, report I/O error. */ + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return RAY_ERR_IO; + } + /* File does not exist (ENOENT) — proceed with full save */ + } + } + + /* Snapshot string pointers under sym_lock, then build list without it. + * Strings are append-only and never freed, so pointers remain valid. */ + sym_lock(); + uint32_t count = g_sym.str_count; + size_t snap_sz = count * sizeof(ray_t*); + ray_t* snap_block = ray_alloc(snap_sz); + if (!snap_block || RAY_IS_ERR(snap_block)) { + sym_unlock(); + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return RAY_ERR_OOM; + } + ray_t** snap = (ray_t**)ray_data(snap_block); + memcpy(snap, g_sym.strings, snap_sz); + sym_unlock(); + + /* Build RAY_LIST of -RAY_STR from snapshot */ + ray_t* list = ray_list_new((int64_t)count); + if (!list || RAY_IS_ERR(list)) { + ray_free(snap_block); + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return RAY_ERR_OOM; + } + + for (uint32_t i = 0; i < count; i++) { + list = ray_list_append(list, snap[i]); + if (!list || RAY_IS_ERR(list)) { + ray_free(snap_block); + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return RAY_ERR_OOM; + } + } + ray_free(snap_block); + + /* Save to temp file via ray_col_save (writes STRL format) */ + err = ray_col_save(list, tmp_path); + ray_release(list); + if (err != RAY_OK) { + remove(tmp_path); + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return err; + } + + /* Fsync temp file for durability */ + ray_fd_t tmp_fd = ray_file_open(tmp_path, RAY_OPEN_READ | RAY_OPEN_WRITE); + if (tmp_fd == RAY_FD_INVALID) { + remove(tmp_path); + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return RAY_ERR_IO; + } + err = ray_file_sync(tmp_fd); + ray_file_close(tmp_fd); + if (err != RAY_OK) { + remove(tmp_path); + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return err; + } + + /* Atomic rename: tmp -> final path */ + err = ray_file_rename(tmp_path, path); + if (err != RAY_OK) { + remove(tmp_path); + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return err; + } + + /* Fsync parent directory so the new directory entry is durable. + * Without this, a crash after rename can lose the new file. */ + err = ray_file_sync_dir(path); + if (err != RAY_OK) { + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return err; + } + + /* Update persisted count */ + sym_lock(); + g_sym.persisted_count = count; + sym_unlock(); + + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return RAY_OK; +} + +/* -------------------------------------------------------------------------- + * ray_sym_load -- load symbol table from RAY_LIST file (STRL format) + * + * Uses ray_col_load to read the list, then interns entries beyond what's + * already in memory. File locking prevents reading a partial write. + * -------------------------------------------------------------------------- */ + +ray_err_t ray_sym_load(const char* path) { + if (!path) return RAY_ERR_IO; + if (!atomic_load_explicit(&g_sym_inited, memory_order_acquire)) return RAY_ERR_IO; + + /* Acquire cross-process shared lock. + * Try read-only open first so that read-only users (snapshots, read-only + * mounts) can load without write permission on the directory. Fall back + * to read-write+create if the lock file doesn't exist yet. If both fail, + * only proceed without locking on read-only filesystem (EROFS) — other + * errors (EMFILE, ENFILE, EACCES on writable fs, etc.) are real failures + * that would silently drop the shared-lock guarantee. */ + char lock_path[1024]; + if (snprintf(lock_path, sizeof(lock_path), "%s.lk", path) >= (int)sizeof(lock_path)) + return RAY_ERR_IO; + ray_fd_t lock_fd = ray_file_open(lock_path, RAY_OPEN_READ); + if (lock_fd == RAY_FD_INVALID) { + int saved_errno = errno; + lock_fd = ray_file_open(lock_path, RAY_OPEN_READ | RAY_OPEN_WRITE | RAY_OPEN_CREATE); + if (lock_fd == RAY_FD_INVALID) { + /* Only proceed unlocked on read-only filesystem (EROFS) where + * concurrent writes are impossible. All other failures are + * real errors that should not be silently ignored. */ + if (saved_errno != EROFS && errno != EROFS) + return RAY_ERR_IO; + } + } + if (lock_fd != RAY_FD_INVALID) { + ray_err_t err = ray_file_lock_sh(lock_fd); + if (err != RAY_OK) { ray_file_close(lock_fd); return err; } + } + + /* Load the sym file as a RAY_LIST of -RAY_STR */ + ray_t* list = ray_col_load(path); + if (!list || RAY_IS_ERR(list)) { + ray_err_t code = RAY_IS_ERR(list) ? ray_err_from_obj(list) : RAY_ERR_IO; + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return code; + } + + if (list->type != RAY_LIST || list->len > UINT32_MAX) { + ray_release(list); + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return RAY_ERR_CORRUPT; + } + + /* Validate existing entries match, then intern remaining. + * Use persisted_count (not str_count) as the already-loaded prefix: + * runtime code may ray_sym_intern transient names that were never + * persisted, and those must not participate in prefix validation + * or affect the intern start offset. */ + sym_lock(); + uint32_t already = g_sym.persisted_count; + sym_unlock(); + ray_t** slots = (ray_t**)ray_data(list); + + /* Reject stale/truncated sym file: if disk has fewer entries than what + * we previously loaded from disk, the file is outdated or truncated. */ + if (already > 0 && list->len < (int64_t)already) { + ray_release(list); + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return RAY_ERR_CORRUPT; + } + + /* Validate entries [0..already-1] match the persisted prefix */ + for (int64_t i = 0; i < (int64_t)already && i < list->len; i++) { + ray_t* s = slots[i]; + if (!s || RAY_IS_ERR(s) || s->type != -RAY_STR) { + ray_release(list); + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return RAY_ERR_CORRUPT; + } + ray_t* mem_s = ray_sym_str(i); + if (!mem_s || ray_str_len(mem_s) != ray_str_len(s) || + memcmp(ray_str_ptr(mem_s), ray_str_ptr(s), ray_str_len(s)) != 0) { + ray_release(list); + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return RAY_ERR_CORRUPT; + } + } + + /* Intern entries beyond what's already in memory. + * Verify each entry's in-memory ID matches its disk position: + * if transient runtime-interned symbols already occupy these + * slots, the disk entries would get wrong IDs, causing RAY_SYM + * columns to resolve the wrong strings. */ + for (int64_t i = (int64_t)already; i < list->len; i++) { + ray_t* s = slots[i]; + if (!s || RAY_IS_ERR(s) || s->type != -RAY_STR) { + ray_release(list); + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return RAY_ERR_CORRUPT; + } + /* Bulk load MUST use the no-split variant so that loading a disk + * entry like "user.name" doesn't recursively intern "user" + "name" + * mid-loop and shift subsequent disk positions — that would break + * the id==i contract below. Segment cache is populated in one + * pass after the loop finishes. */ + int64_t id = ray_sym_intern_no_split(ray_str_ptr(s), ray_str_len(s)); + if (id < 0) { + ray_release(list); + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return RAY_ERR_OOM; + } + if (id != i) { + /* ID mismatch: disk position i was assigned in-memory + * id != i, meaning a transient symbol occupies the slot. + * The sym table has diverged from disk; continuing would + * cause RAY_SYM columns to resolve wrong strings. */ + ray_release(list); + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return RAY_ERR_CORRUPT; + } + } + + /* Populate dotted cache for every loaded (and previously-loaded) sym. + * Idempotent — already-cached entries are skipped. Runs once per load. + * An OOM here must surface: leaving dotted names un-cached would make + * env lookup silently resolve them as flat syms, quietly losing + * namespace semantics on anything the user stored with a '.' in it. */ + ray_err_t rebuild_err = ray_sym_rebuild_segments(); + if (rebuild_err != RAY_OK) { + ray_release(list); + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return rebuild_err; + } + + /* Update persisted count to reflect what is actually on disk. + * Use list->len (not str_count) because transient runtime-interned + * symbols may exist beyond the persisted prefix. */ + sym_lock(); + g_sym.persisted_count = (uint32_t)list->len; + sym_unlock(); + + ray_release(list); + ray_file_unlock(lock_fd); + ray_file_close(lock_fd); + return RAY_OK; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/table/sym.h b/crates/rayforce-sys/vendor/rayforce/src/table/sym.h new file mode 100644 index 0000000..e55734c --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/table/sym.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_SYM_H +#define RAY_SYM_H + +/* + * sym.h -- Global symbol intern table. + * + * Sequential mode: simple hash map + array. wyhash (truncated to 32-bit), + * open addressing with linear probing. Stores (hash32 << 32) | (id + 1) + * so that 0 means empty bucket. + */ + +#include +#include "core/types.h" +#include + +/* Symbol width encoding (lower 2 bits of attrs when type == RAY_SYM). + * RAY_SYM_W{8,16,32,64} are now declared in for embedders. */ +#define RAY_SYM_W_MASK 0x03 +#ifndef RAY_SYM_W8 +#define RAY_SYM_W8 0x00 +#define RAY_SYM_W16 0x01 +#define RAY_SYM_W32 0x02 +#define RAY_SYM_W64 0x03 +#endif + +/* Helper macros */ +#define RAY_IS_SYM(t) ((t) == RAY_SYM) +#define RAY_SYM_ELEM(attrs) (1u << ((attrs) & RAY_SYM_W_MASK)) /* 1,2,4,8 */ + +/* Determine optimal SYM width for a given dictionary size */ +static inline uint8_t ray_sym_dict_width(int64_t dict_size) { + if (dict_size <= 255) return RAY_SYM_W8; + if (dict_size <= 65535) return RAY_SYM_W16; + if (dict_size <= 4294967295) return RAY_SYM_W32; + return RAY_SYM_W64; +} + +/* SYM-aware element size: returns adaptive width for RAY_SYM columns */ +static inline uint8_t ray_sym_elem_size(int8_t type, uint8_t attrs) { + if (type == RAY_SYM) return (uint8_t)RAY_SYM_ELEM(attrs); + return ray_elem_size(type); +} + +/* Read a dictionary index from a RAY_SYM column (adaptive width) */ +static inline int64_t ray_read_sym(const void* data, int64_t row, int8_t type, uint8_t attrs) { + (void)type; /* only RAY_SYM now */ + switch (attrs & RAY_SYM_W_MASK) { + case RAY_SYM_W8: return ((const uint8_t*)data)[row]; + case RAY_SYM_W16: return ((const uint16_t*)data)[row]; + case RAY_SYM_W32: return ((const uint32_t*)data)[row]; + case RAY_SYM_W64: return ((const int64_t*)data)[row]; + } + return 0; +} + +/* Write a dictionary index into a RAY_SYM column (adaptive width) */ +static inline void ray_write_sym(void* data, int64_t row, uint64_t val, int8_t type, uint8_t attrs) { + (void)type; /* only RAY_SYM now */ + switch (attrs & RAY_SYM_W_MASK) { + case RAY_SYM_W8: ((uint8_t*)data)[row] = (uint8_t)val; break; + case RAY_SYM_W16: ((uint16_t*)data)[row] = (uint16_t)val; break; + case RAY_SYM_W32: ((uint32_t*)data)[row] = (uint32_t)val; break; + case RAY_SYM_W64: ((int64_t*)data)[row] = (int64_t)val; break; + } +} + +/* Intern with pre-computed wyhash, no lock. + * Caller must guarantee single-threaded access. */ +int64_t ray_sym_intern_prehashed(uint32_t hash, const char* str, size_t len); + +/* ---- Dotted name resolution (namespace paths) --------------------------- + * A symbol whose name contains one or more '.' is a *dotted* sym. At intern + * time we memchr once, split the name on '.', intern each segment, and cache + * the resulting segment sym_ids. `ray_sym_is_dotted` is cheap (one bitmap + * load) and gates the slow path in env lookup/set. */ +bool ray_sym_is_dotted(int64_t sym_id); + +/* Returns segment count (>=2 if dotted, 0 otherwise). *out_segs is set to + * an interned sym_id array of length `nsegs` (valid for the lifetime of the + * sym table). */ +int ray_sym_segs(int64_t sym_id, const int64_t** out_segs); + +/* Bulk-intern variant that does NOT sub-intern segments. Used only by + * persistence paths (ray_sym_load, ray_sym_save merge phase) where the + * disk-position==sym_id invariant would be broken by segment sub-interning + * appending entries mid-sequence. Callers MUST follow a batch of these + * with ray_sym_rebuild_segments to populate the dotted cache. */ +int64_t ray_sym_intern_no_split(const char* str, size_t len); + +/* Walk the intern table and cache segment sym_ids for any dotted name + * that hasn't been cached yet. Idempotent — safe to call multiple times. + * Needed after bulk loads that used ray_sym_intern_no_split. Returns + * RAY_ERR_OOM on the first allocation/sub-intern failure so persistence + * paths can abort instead of leaving dotted names silently un-cached. */ +ray_err_t ray_sym_rebuild_segments(void); + +/* Upper bound on the arena bytes that sym_str_arena consumes for a name + * of the given length. Used by the three-phase atomic intern to pre- + * reserve arena capacity, so the commit phase cannot fail partway. + * + * Short path (<7 bytes): ray_arena_alloc(_, 0) charges ARENA_ALIGN_UP(32) + * = 32 bytes. Long path: sym_str_arena computes chars_block = + * ALIGN(32 + len + 1) and calls ray_arena_alloc(_, chars_block), which + * charges ARENA_ALIGN_UP(32 + chars_block) = 32 + chars_block because + * chars_block is 32-aligned. The +32 term is the crucial one — omitting + * it under-reserves by exactly 32 bytes per long sym. + * + * Exposed as inline so tests can verify the bound against actual arena + * consumption for every length in a range. */ +static inline size_t ray_sym_bytes_upper(size_t len) { + if (len < 7) return 32; + size_t chars_block = ((size_t)32 + len + 1 + 31) & ~(size_t)31; + return 32 + chars_block; +} + +#endif /* RAY_SYM_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/table/table.c b/crates/rayforce-sys/vendor/rayforce/src/table/table.c new file mode 100644 index 0000000..8d393b6 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/table/table.c @@ -0,0 +1,238 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "table.h" +#include "mem/heap.h" +#include "ops/ops.h" +#include + +/* -------------------------------------------------------------------------- + * Data layout — same shape as RAY_DICT. + * + * Block header (32 B): type = RAY_TABLE, len = 2 + * slot[0] = ray_t* schema — RAY_I64 vector of column name sym IDs + * slot[1] = ray_t* cols — RAY_LIST of column vectors + * + * `tbl->len` is the slot count (always 2). Use ray_table_ncols() to get + * the column count, ray_table_nrows() for the row count. + * + * The schema vector stays RAY_I64 (rather than RAY_SYM) because the rest + * of the codebase reads it as `int64_t* ids = ray_data(schema)` in + * dozens of hot loops; RAY_SYM's adaptive widths (W8/W16/W32/W64) would + * silently truncate those reads. RAY_DICT is free to use any keys type; + * TABLE deliberately pins schema to I64 for that interop. + * -------------------------------------------------------------------------- */ + +#define TBL_DATA_SIZE (2 * sizeof(ray_t*)) + +static inline ray_t** tbl_slots(ray_t* tbl) { + return (ray_t**)ray_data(tbl); +} + +static inline ray_t* tbl_schema(ray_t* tbl) { + return tbl_slots(tbl)[0]; +} + +static inline ray_t* tbl_cols(ray_t* tbl) { + return tbl_slots(tbl)[1]; +} + +/* -------------------------------------------------------------------------- + * ray_table_new — allocates an empty table with capacity for `ncols`. + * + * The schema vector and cols list are pre-sized to avoid early grows. + * Callers append columns via ray_table_add_col. + * -------------------------------------------------------------------------- */ + +ray_t* ray_table_new(int64_t ncols) { + if (ncols < 0) return ray_error("range", NULL); + + ray_t* tbl = ray_alloc(TBL_DATA_SIZE); + if (!tbl) return ray_error("oom", "table_new(ncols=%lld)", (long long)ncols); + if (RAY_IS_ERR(tbl)) return tbl; + tbl->type = RAY_TABLE; + tbl->attrs = 0; + tbl->len = 2; + memset(tbl->nullmap, 0, 16); + memset(ray_data(tbl), 0, TBL_DATA_SIZE); + + ray_t* schema = ray_vec_new(RAY_I64, ncols); + if (!schema || RAY_IS_ERR(schema)) { + ray_free(tbl); + return schema ? schema : ray_error("oom", NULL); + } + ray_t* cols = ray_list_new(ncols); + if (!cols || RAY_IS_ERR(cols)) { + ray_release(schema); + ray_free(tbl); + return cols ? cols : ray_error("oom", NULL); + } + tbl_slots(tbl)[0] = schema; + tbl_slots(tbl)[1] = cols; + return tbl; +} + +/* -------------------------------------------------------------------------- + * ray_table_add_col — append `col_vec` under name `name_id`. + * + * Consumes one ref of the input table; on success returns an owned ref to + * the (possibly COW'd) result. Retains `col_vec` internally so the caller + * keeps its own ref. + * -------------------------------------------------------------------------- */ + +ray_t* ray_table_add_col(ray_t* tbl, int64_t name_id, ray_t* col_vec) { + if (!tbl || RAY_IS_ERR(tbl)) return tbl; + if (!col_vec || RAY_IS_ERR(col_vec)) return ray_error("type", NULL); + + tbl = ray_cow(tbl); + if (!tbl || RAY_IS_ERR(tbl)) return tbl; + + ray_t** slots = tbl_slots(tbl); + ray_t* schema = slots[0]; + ray_t* cols = slots[1]; + + /* schema and cols may themselves be shared after cow — append helpers + * COW them as needed and return the (possibly new) owned ref. */ + ray_t* new_schema = ray_vec_append(schema, &name_id); + if (!new_schema || RAY_IS_ERR(new_schema)) { ray_release(tbl); return new_schema ? new_schema : ray_error("oom", NULL); } + slots[0] = new_schema; + + ray_retain(col_vec); + ray_t* new_cols = ray_list_append(cols, col_vec); + ray_release(col_vec); + if (!new_cols || RAY_IS_ERR(new_cols)) { ray_release(tbl); return new_cols ? new_cols : ray_error("oom", NULL); } + slots[1] = new_cols; + + return tbl; +} + +/* -------------------------------------------------------------------------- + * ray_table_get_col — lookup column by sym id; borrowed pointer or NULL. + * -------------------------------------------------------------------------- */ + +ray_t* ray_table_get_col(ray_t* tbl, int64_t name_id) { + if (!tbl || RAY_IS_ERR(tbl)) return NULL; + ray_t* schema = tbl_schema(tbl); + ray_t* cols = tbl_cols(tbl); + if (!schema || !cols) return NULL; + int64_t* ids = (int64_t*)ray_data(schema); + int64_t ncols = schema->len; + ray_t** col_ptrs = (ray_t**)ray_data(cols); + for (int64_t i = 0; i < ncols; i++) + if (ids[i] == name_id) return col_ptrs[i]; + return NULL; +} + +/* -------------------------------------------------------------------------- + * ray_table_get_col_idx — borrowed pointer at slot `idx`, or NULL. + * -------------------------------------------------------------------------- */ + +ray_t* ray_table_get_col_idx(ray_t* tbl, int64_t idx) { + if (!tbl || RAY_IS_ERR(tbl)) return NULL; + ray_t* cols = tbl_cols(tbl); + if (!cols) return NULL; + if (idx < 0 || idx >= cols->len) return NULL; + return ((ray_t**)ray_data(cols))[idx]; +} + +/* -------------------------------------------------------------------------- + * ray_table_col_name — sym id at slot `idx`, -1 on out-of-range. + * -------------------------------------------------------------------------- */ + +int64_t ray_table_col_name(ray_t* tbl, int64_t idx) { + if (!tbl || RAY_IS_ERR(tbl)) return -1; + ray_t* schema = tbl_schema(tbl); + if (!schema) return -1; + if (idx < 0 || idx >= schema->len) return -1; + return ((int64_t*)ray_data(schema))[idx]; +} + +/* -------------------------------------------------------------------------- + * ray_table_set_col_name — overwrite name at `idx`. Caller must ensure + * exclusive ownership (rc==1) before calling. + * -------------------------------------------------------------------------- */ + +void ray_table_set_col_name(ray_t* tbl, int64_t idx, int64_t name_id) { + if (!tbl || RAY_IS_ERR(tbl)) return; + ray_t** slots = tbl_slots(tbl); + ray_t* schema = slots[0]; + if (!schema || RAY_IS_ERR(schema)) return; + if (idx < 0 || idx >= schema->len) return; + schema = ray_cow(schema); + if (!schema || RAY_IS_ERR(schema)) return; + slots[0] = schema; + ((int64_t*)ray_data(schema))[idx] = name_id; +} + +/* -------------------------------------------------------------------------- + * ray_table_ncols / ray_table_nrows / ray_table_schema + * -------------------------------------------------------------------------- */ + +int64_t ray_table_ncols(ray_t* tbl) { + if (!tbl || RAY_IS_ERR(tbl)) return 0; + ray_t* schema = tbl_schema(tbl); + return schema ? schema->len : 0; +} + +int64_t ray_table_nrows(ray_t* tbl) { + if (!tbl || RAY_IS_ERR(tbl)) return 0; + ray_t* cols = tbl_cols(tbl); + if (!cols || cols->len <= 0) return 0; + ray_t* first_col = ((ray_t**)ray_data(cols))[0]; + if (!first_col || RAY_IS_ERR(first_col)) return 0; + + if (RAY_IS_PARTED(first_col->type) || first_col->type == RAY_MAPCOMMON) + return ray_parted_nrows(first_col); + + return first_col->len; +} + +int64_t ray_parted_nrows(ray_t* v) { + if (!v || RAY_IS_ERR(v)) return 0; + if (!RAY_IS_PARTED(v->type) && v->type != RAY_MAPCOMMON) return v->len; + + if (v->type == RAY_MAPCOMMON) { + ray_t** ptrs = (ray_t**)ray_data(v); + ray_t* counts = ptrs[1]; + if (!counts || RAY_IS_ERR(counts)) return 0; + int64_t total = 0; + int64_t* cdata = (int64_t*)ray_data(counts); + for (int64_t i = 0; i < counts->len; i++) + total += cdata[i]; + return total; + } + + int64_t n_segs = v->len; + ray_t** segs = (ray_t**)ray_data(v); + int64_t total = 0; + for (int64_t i = 0; i < n_segs; i++) { + if (segs[i] && !RAY_IS_ERR(segs[i])) + total += segs[i]->len; + } + return total; +} + +ray_t* ray_table_schema(ray_t* tbl) { + if (!tbl || RAY_IS_ERR(tbl)) return NULL; + return tbl_schema(tbl); +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/table/table.h b/crates/rayforce-sys/vendor/rayforce/src/table/table.h new file mode 100644 index 0000000..16eff4e --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/table/table.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_TABLE_H +#define RAY_TABLE_H + +/* + * table.h -- Table operations. + * + * A table has type = RAY_TABLE (13), len = current column count. + * Data region: first sizeof(ray_t*) bytes = pointer to schema (I64 vector + * of column name symbol IDs), then ncols * sizeof(ray_t*) = column vector + * pointers. + */ + +#include + +int64_t ray_parted_nrows(ray_t* parted_col); + +#endif /* RAY_TABLE_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/vec/atom.c b/crates/rayforce-sys/vendor/rayforce/src/vec/atom.c new file mode 100644 index 0000000..2d4b487 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/vec/atom.c @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "atom.h" +#include + +/* -------------------------------------------------------------------------- + * Simple atom constructors + * + * Pattern: allocate 0-byte data block (just the 32B header), set type to + * negative tag, store value in header union field. + * -------------------------------------------------------------------------- */ + +ray_t* ray_bool(bool val) { + ray_t* v = ray_alloc(0); + if (RAY_IS_ERR(v)) return v; + v->type = -RAY_BOOL; + v->b8 = val ? 1 : 0; + return v; +} + +ray_t* ray_u8(uint8_t val) { + ray_t* v = ray_alloc(0); + if (RAY_IS_ERR(v)) return v; + v->type = -RAY_U8; + v->u8 = val; + return v; +} + +ray_t* ray_i16(int16_t val) { + ray_t* v = ray_alloc(0); + if (RAY_IS_ERR(v)) return v; + v->type = -RAY_I16; + v->i16 = val; + return v; +} + +ray_t* ray_i32(int32_t val) { + ray_t* v = ray_alloc(0); + if (RAY_IS_ERR(v)) return v; + v->type = -RAY_I32; + v->i32 = val; + return v; +} + +ray_t* ray_i64(int64_t val) { + ray_t* v = ray_alloc(0); + if (RAY_IS_ERR(v)) return v; + v->type = -RAY_I64; + v->i64 = val; + return v; +} + +ray_t* ray_f64(double val) { + ray_t* v = ray_alloc(0); + if (RAY_IS_ERR(v)) return v; + v->type = -RAY_F64; + v->f64 = val; + return v; +} + +/* F32 atoms reuse the f64 union slot — fmt_obj's RAY_F32 branch already + * narrows back to float via `(float)obj->f64`. Constructor mirrors + * ray_f64; only the type tag differs. Provided so RAY_F32 vectors can + * box elements through the same atom-construction path used by I32/F64. */ +ray_t* ray_f32(float val) { + ray_t* v = ray_alloc(0); + if (RAY_IS_ERR(v)) return v; + v->type = -RAY_F32; + v->f64 = (double)val; + return v; +} + +/* -------------------------------------------------------------------------- + * String atom: SSO for <= 7 bytes, long string via U8 vector for > 7 + * -------------------------------------------------------------------------- */ + +ray_t* ray_str(const char* s, size_t len) { + if (len < 7) { + /* SSO path: store inline in header (< 7 leaves room for NUL). + * Exactly 7 bytes would fill all of sdata[7] with no NUL terminator, + * so 7-byte strings fall through to the long-string path. */ + ray_t* v = ray_alloc(0); + if (RAY_IS_ERR(v)) return v; + v->type = -RAY_STR; + v->slen = (uint8_t)len; + if (len > 0) memcpy(v->sdata, s, len); + v->sdata[len] = '\0'; + return v; + } + /* Long string: allocate a U8 vector to hold the data, store pointer. + * Allocate len+1 and null-terminate for C string compatibility — callers + * (including ctypes c_char_p) may read until '\0'. */ + size_t data_size = len + 1; + ray_t* chars = ray_alloc(data_size); + if (!chars || RAY_IS_ERR(chars)) return chars; + chars->type = RAY_U8; + chars->len = (int64_t)len; + memcpy(ray_data(chars), s, len); + ((char*)ray_data(chars))[len] = '\0'; + + ray_t* v = ray_alloc(0); + if (RAY_IS_ERR(v)) { + ray_free(chars); + return v; + } + v->type = -RAY_STR; + v->obj = chars; + return v; +} + +/* -------------------------------------------------------------------------- + * Symbol atom: intern ID stored as i64 + * -------------------------------------------------------------------------- */ + +ray_t* ray_sym(int64_t id) { + ray_t* v = ray_alloc(0); + if (RAY_IS_ERR(v)) return v; + v->type = -RAY_SYM; + v->i64 = id; + return v; +} + +/* -------------------------------------------------------------------------- + * Date/Time/Timestamp atoms + * + * All atom constructors accept int64_t and store in the i64 union field + * (atoms are scalar wrappers — always 8 bytes in the union). The vector + * element sizes differ: DATE=4, TIME=4, TIMESTAMP=8. When broadcasting + * an atom to a vector (materialize_broadcast_input), the value must be + * narrowed to the correct element width. + * -------------------------------------------------------------------------- */ + +ray_t* ray_date(int64_t val) { + ray_t* v = ray_alloc(0); + if (RAY_IS_ERR(v)) return v; + v->type = -RAY_DATE; + v->i64 = val; + return v; +} + +ray_t* ray_time(int64_t val) { + ray_t* v = ray_alloc(0); + if (RAY_IS_ERR(v)) return v; + v->type = -RAY_TIME; + v->i64 = val; + return v; +} + +ray_t* ray_timestamp(int64_t val) { + ray_t* v = ray_alloc(0); + if (RAY_IS_ERR(v)) return v; + v->type = -RAY_TIMESTAMP; + v->i64 = val; + return v; +} + +ray_t* ray_typed_null(int8_t type) { + if (type >= 0) return ray_error("type", NULL); + ray_t* v = ray_alloc(0); + if (RAY_IS_ERR(v)) return v; + v->type = type; + v->i64 = 0; + v->nullmap[0] |= 1; + return v; +} + +/* -------------------------------------------------------------------------- + * GUID atom: 16 bytes stored in a U8 vector, pointer in obj field + * -------------------------------------------------------------------------- */ + +ray_t* ray_guid(const uint8_t* bytes) { + /* Allocate U8 vector of length 16 */ + ray_t* vec = ray_alloc(16); + if (!vec || RAY_IS_ERR(vec)) return vec; + vec->type = RAY_U8; + vec->len = 16; + memcpy(ray_data(vec), bytes, 16); + + ray_t* v = ray_alloc(0); + if (RAY_IS_ERR(v)) { + ray_free(vec); + return v; + } + v->type = -RAY_GUID; + v->obj = vec; + return v; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/vec/atom.h b/crates/rayforce-sys/vendor/rayforce/src/vec/atom.h new file mode 100644 index 0000000..0c495bc --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/vec/atom.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_ATOM_H +#define RAY_ATOM_H + +/* + * atom.h -- Atom constructors. + * + * Each atom is a 32-byte block (header only, no data region) with a + * negative type tag and the value stored in the header union. + */ + +#include + +#endif /* RAY_ATOM_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/vec/embedding.h b/crates/rayforce-sys/vendor/rayforce/src/vec/embedding.h new file mode 100644 index 0000000..487c7d3 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/vec/embedding.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_EMBEDDING_H +#define RAY_EMBEDDING_H + +#include + +/* ===== Embedding Column Helpers ===== */ + +/* An embedding column is a RAY_F32 vector of length N*D where D is the + * embedding dimension. D is stored in a separate I32 atom that the + * caller keeps alongside the column. Access helpers: */ + +/* Create an embedding column for N rows of D-dimensional vectors. */ +ray_t* ray_embedding_new(int64_t nrows, int32_t dim); + +#endif /* RAY_EMBEDDING_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/vec/list.c b/crates/rayforce-sys/vendor/rayforce/src/vec/list.c new file mode 100644 index 0000000..c4b33c3 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/vec/list.c @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "list.h" +#include "mem/heap.h" +#include + +/* -------------------------------------------------------------------------- + * Capacity helpers (same pattern as vec.c) + * -------------------------------------------------------------------------- */ + +static int64_t list_capacity(ray_t* list) { + size_t block_size = (size_t)1 << list->order; + size_t data_space = block_size - 32; /* 32B ray_t header */ + return (int64_t)(data_space / sizeof(ray_t*)); +} + +/* -------------------------------------------------------------------------- + * ray_list_new + * -------------------------------------------------------------------------- */ + +ray_t* ray_list_new(int64_t capacity) { + if (capacity < 0) return ray_error("range", NULL); + if ((uint64_t)capacity > SIZE_MAX / sizeof(ray_t*)) + return ray_error("oom", NULL); + size_t data_size = (size_t)capacity * sizeof(ray_t*); + + ray_t* list = ray_alloc(data_size); + if (!list) return ray_error("oom", "list_new(cap=%lld): %zu bytes", + (long long)capacity, data_size); + if (RAY_IS_ERR(list)) return list; + + list->type = RAY_LIST; + list->len = 0; + list->attrs = 0; + memset(list->nullmap, 0, 16); + + return list; +} + +/* -------------------------------------------------------------------------- + * ray_list_append + * -------------------------------------------------------------------------- */ + +ray_t* ray_list_append(ray_t* list, ray_t* item) { + if (!list || RAY_IS_ERR(list)) return list; + + /* COW if shared */ + ray_t* original = list; + list = ray_cow(list); + if (!list || RAY_IS_ERR(list)) return list; + + int64_t cap = list_capacity(list); + + /* Grow if needed */ + if (list->len >= cap) { + size_t new_data_size = (size_t)(list->len + 1) * sizeof(ray_t*); + if (new_data_size < 32) new_data_size = 32; + else { + size_t s = 32; + while (s < new_data_size) { + if (s > SIZE_MAX / 2) { + if (list != original) ray_release(list); + return ray_error("oom", NULL); + } + s *= 2; + } + new_data_size = s; + } + ray_t* new_list = ray_scratch_realloc(list, new_data_size); + if (!new_list || RAY_IS_ERR(new_list)) { + if (list != original) ray_release(list); + return new_list ? new_list : ray_error("oom", NULL); + } + list = new_list; + } + + /* Store item pointer and retain it */ + ray_t** slots = (ray_t**)ray_data(list); + slots[list->len] = item; + if (item) ray_retain(item); + list->len++; + + return list; +} + +/* -------------------------------------------------------------------------- + * ray_list_get + * -------------------------------------------------------------------------- */ + +ray_t* ray_list_get(ray_t* list, int64_t idx) { + if (!list || RAY_IS_ERR(list)) return NULL; + if (idx < 0 || idx >= list->len) return NULL; + + ray_t** slots = (ray_t**)ray_data(list); + return slots[idx]; +} + +/* -------------------------------------------------------------------------- + * ray_list_set + * -------------------------------------------------------------------------- */ + +ray_t* ray_list_set(ray_t* list, int64_t idx, ray_t* item) { + if (!list || RAY_IS_ERR(list)) return list; + if (idx < 0 || idx >= list->len) + return ray_error("range", NULL); + + /* COW if shared */ + list = ray_cow(list); + if (!list || RAY_IS_ERR(list)) return list; + + ray_t** slots = (ray_t**)ray_data(list); + + /* Release old item */ + ray_t* old = slots[idx]; + if (old) ray_release(old); + + /* Store new item and retain it */ + slots[idx] = item; + if (item) ray_retain(item); + + return list; +} + +/* -------------------------------------------------------------------------- + * ray_list_insert_at — insert one item at pre-insertion position idx. + * + * idx ∈ [0, list->len]; idx == len is equivalent to ray_list_append. + * -------------------------------------------------------------------------- */ + +ray_t* ray_list_insert_at(ray_t* list, int64_t idx, ray_t* item) { + if (!list || RAY_IS_ERR(list)) return list; + if (list->type != RAY_LIST) return ray_error("type", NULL); + if (idx < 0 || idx > list->len) return ray_error("range", NULL); + + ray_t* original = list; + list = ray_cow(list); + if (!list || RAY_IS_ERR(list)) return list; + + int64_t cap = list_capacity(list); + + if (list->len >= cap) { + size_t new_data_size = (size_t)(list->len + 1) * sizeof(ray_t*); + if (new_data_size < 32) new_data_size = 32; + else { + size_t s = 32; + while (s < new_data_size) { + if (s > SIZE_MAX / 2) { + if (list != original) ray_release(list); + return ray_error("oom", NULL); + } + s *= 2; + } + new_data_size = s; + } + ray_t* new_list = ray_scratch_realloc(list, new_data_size); + if (!new_list || RAY_IS_ERR(new_list)) { + if (list != original) ray_release(list); + return new_list ? new_list : ray_error("oom", NULL); + } + list = new_list; + } + + ray_t** slots = (ray_t**)ray_data(list); + + if (idx < list->len) { + memmove(&slots[idx + 1], &slots[idx], + (size_t)(list->len - idx) * sizeof(ray_t*)); + } + + slots[idx] = item; + if (item) ray_retain(item); + list->len++; + + return list; +} + +/* -------------------------------------------------------------------------- + * ray_list_insert_many — insert N items at N pre-insertion positions. + * + * idxs : RAY_I64 vec of length N, each idx in [0, list->len]. + * vals : RAY_LIST. Length 1 broadcasts the single ptr; length N is parallel. + * + * Stable on duplicate indices. Returns a fresh block; broadcast retains the + * same pointer once per insertion site. + * -------------------------------------------------------------------------- */ + +ray_t* ray_list_insert_many(ray_t* list, ray_t* idxs, ray_t* vals) { + if (!list || RAY_IS_ERR(list)) return list; + if (!idxs || RAY_IS_ERR(idxs)) return idxs; + if (!vals || RAY_IS_ERR(vals)) return vals; + if (list->type != RAY_LIST) return ray_error("type", NULL); + if (idxs->type != RAY_I64) return ray_error("type", NULL); + if (vals->type != RAY_LIST) return ray_error("type", NULL); + + int64_t N = idxs->len; + int64_t old_len = list->len; + + if (N == 0) { ray_retain(list); return list; } + + const int64_t* idx_arr = (const int64_t*)ray_data(idxs); + for (int64_t k = 0; k < N; k++) { + if (idx_arr[k] < 0 || idx_arr[k] > old_len) + return ray_error("range", NULL); + } + + int broadcast; + if (vals->len == 1) broadcast = 1; + else if (vals->len == N) broadcast = 0; + else return ray_error("range", NULL); + + /* Sort buffer of (idx, src_pos) pairs */ + ray_t* pair_vec = ray_vec_new(RAY_I64, 2 * N); + if (!pair_vec || RAY_IS_ERR(pair_vec)) return ray_error("oom", NULL); + pair_vec->len = 2 * N; + int64_t* pairs = (int64_t*)ray_data(pair_vec); + for (int64_t k = 0; k < N; k++) { + pairs[2 * k] = idx_arr[k]; + pairs[2 * k + 1] = k; + } + + /* Stable insertion sort by idx */ + for (int64_t i = 1; i < N; i++) { + int64_t ki = pairs[2 * i]; + int64_t ks = pairs[2 * i + 1]; + int64_t j = i - 1; + while (j >= 0 && pairs[2 * j] > ki) { + pairs[2 * (j + 1)] = pairs[2 * j]; + pairs[2 * (j + 1) + 1] = pairs[2 * j + 1]; + j--; + } + pairs[2 * (j + 1)] = ki; + pairs[2 * (j + 1) + 1] = ks; + } + + int64_t new_len = old_len + N; + if (new_len < old_len) { ray_release(pair_vec); return ray_error("oom", NULL); } + if ((uint64_t)new_len > SIZE_MAX / sizeof(ray_t*)) { + ray_release(pair_vec); + return ray_error("oom", NULL); + } + size_t data_size = (size_t)new_len * sizeof(ray_t*); + + ray_t* result = ray_alloc(data_size); + if (!result || RAY_IS_ERR(result)) { + ray_release(pair_vec); + return result ? result : ray_error("oom", NULL); + } + result->type = RAY_LIST; + result->len = new_len; + result->attrs = 0; + memset(result->nullmap, 0, 16); + + ray_t** src_slots = (ray_t**)ray_data(list); + ray_t** val_slots = (ray_t**)ray_data(vals); + ray_t** dst_slots = (ray_t**)ray_data(result); + + int64_t w = 0; + int64_t p = 0; + for (int64_t r = 0; r <= old_len; r++) { + while (p < N && pairs[2 * p] == r) { + int64_t src_pos = pairs[2 * p + 1]; + ray_t* item = broadcast ? val_slots[0] : val_slots[src_pos]; + dst_slots[w] = item; + if (item) ray_retain(item); + w++; + p++; + } + if (r < old_len) { + ray_t* item = src_slots[r]; + dst_slots[w] = item; + if (item) ray_retain(item); + w++; + } + } + + ray_release(pair_vec); + return result; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/vec/list.h b/crates/rayforce-sys/vendor/rayforce/src/vec/list.h new file mode 100644 index 0000000..20ad19c --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/vec/list.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_LIST_H +#define RAY_LIST_H + +/* + * list.h -- LIST type operations. + * + * A LIST has type = RAY_LIST (0) and stores an array of ray_t* pointers + * in the data region. Items are reference-counted via ray_retain/ray_release. + */ + +#include + +#endif /* RAY_LIST_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/vec/sel.c b/crates/rayforce-sys/vendor/rayforce/src/vec/sel.c new file mode 100644 index 0000000..e651296 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/vec/sel.c @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include "ops/ops.h" +#include + +/* -------------------------------------------------------------------------- + * Layout size computation + * + * Data payload after 32-byte ray_t header: + * ray_sel_meta_t 16 bytes + * seg_flags[n_segs] align8(n_segs) bytes + * seg_popcnt[n_segs] align8(n_segs * 2) bytes + * bits[n_words] n_words * 8 bytes + * -------------------------------------------------------------------------- */ + +static size_t sel_data_size(int64_t nrows) { + uint32_t n_segs = (uint32_t)((nrows + RAY_MORSEL_ELEMS - 1) / RAY_MORSEL_ELEMS); + uint32_t n_words = (uint32_t)((nrows + 63) / 64); + + size_t sz = sizeof(ray_sel_meta_t); + sz += (n_segs + 7u) & ~(size_t)7; /* seg_flags, 8-aligned */ + sz += ((size_t)n_segs * 2 + 7u) & ~(size_t)7; /* seg_popcnt, 8-aligned */ + sz += (size_t)n_words * 8; /* bits */ + return sz; +} + +/* -------------------------------------------------------------------------- + * ray_sel_new — allocate a selection with all bits zero (no rows pass) + * -------------------------------------------------------------------------- */ + +ray_t* ray_sel_new(int64_t nrows) { + if (nrows < 0) return ray_error("range", NULL); + + size_t dsz = sel_data_size(nrows); + ray_t* s = ray_alloc(dsz); + if (!s || RAY_IS_ERR(s)) return s; + + s->type = RAY_SEL; + s->len = nrows; + memset(ray_data(s), 0, dsz); + + ray_sel_meta_t* m = ray_sel_meta(s); + m->total_pass = 0; + m->n_segs = (uint32_t)((nrows + RAY_MORSEL_ELEMS - 1) / RAY_MORSEL_ELEMS); + /* seg_flags[] already zero = RAY_SEL_NONE, seg_popcnt[] = 0, bits[] = 0 */ + + return s; +} + +/* -------------------------------------------------------------------------- + * ray_sel_recompute — rebuild seg_flags + seg_popcnt from bits[] + * + * Called after direct writes into bits[] (e.g., fused predicate evaluation). + * -------------------------------------------------------------------------- */ + +void ray_sel_recompute(ray_t* sel) { + if (!sel || sel->type != RAY_SEL) return; + + ray_sel_meta_t* m = ray_sel_meta(sel); + uint8_t* flags = ray_sel_flags(sel); + uint16_t* pcnt = ray_sel_popcnt(sel); + uint64_t* bits = ray_sel_bits(sel); + + int64_t total = 0; + int64_t nrows = sel->len; + uint32_t n_segs = m->n_segs; + + for (uint32_t seg = 0; seg < n_segs; seg++) { + int64_t seg_start = (int64_t)seg * RAY_MORSEL_ELEMS; + int64_t seg_rows = nrows - seg_start; + if (seg_rows > RAY_MORSEL_ELEMS) seg_rows = RAY_MORSEL_ELEMS; + + /* Count bits in this segment's words */ + uint32_t word_start = (uint32_t)(seg_start / 64); + uint32_t word_end = (uint32_t)((seg_start + seg_rows + 63) / 64); + int64_t seg_pop = 0; + for (uint32_t w = word_start; w < word_end; w++) + seg_pop += __builtin_popcountll(bits[w]); + + /* Handle partial last word: mask out trailing bits beyond nrows */ + if (seg == n_segs - 1 && (nrows & 63)) { + uint32_t last_w = word_end - 1; + uint32_t valid_bits = (uint32_t)(nrows & 63); + uint64_t trail_mask = (1ULL << valid_bits) - 1; + /* Subtract overcounted trailing bits */ + seg_pop -= __builtin_popcountll(bits[last_w] & ~trail_mask); + } + + pcnt[seg] = (uint16_t)seg_pop; + total += seg_pop; + + if (seg_pop == 0) + flags[seg] = RAY_SEL_NONE; + else if (seg_pop == seg_rows) + flags[seg] = RAY_SEL_ALL; + else + flags[seg] = RAY_SEL_MIX; + } + + m->total_pass = total; +} + +/* -------------------------------------------------------------------------- + * ray_sel_from_pred — convert a RAY_BOOL byte-per-row vector to RAY_SEL + * -------------------------------------------------------------------------- */ + +ray_t* ray_sel_from_pred(ray_t* pred) { + if (!pred || RAY_IS_ERR(pred)) return pred; + if (pred->type != RAY_BOOL) return ray_error("type", NULL); + + int64_t nrows = pred->len; + ray_t* sel = ray_sel_new(nrows); + if (!sel || RAY_IS_ERR(sel)) return sel; + + /* Pack byte-per-row into bitpacked uint64_t words */ + uint64_t* bits = ray_sel_bits(sel); + const uint8_t* src = (const uint8_t*)ray_data(pred); + + int64_t full_words = nrows / 64; + for (int64_t w = 0; w < full_words; w++) { + uint64_t word = 0; + const uint8_t* p = src + w * 64; + for (int b = 0; b < 64; b++) + word |= (uint64_t)(p[b] != 0) << b; + bits[w] = word; + } + + /* Remainder bits */ + int64_t rem = nrows & 63; + if (rem) { + uint64_t word = 0; + const uint8_t* p = src + full_words * 64; + for (int64_t b = 0; b < rem; b++) + word |= (uint64_t)(p[b] != 0) << b; + bits[full_words] = word; + } + + ray_sel_recompute(sel); + return sel; +} + +/* -------------------------------------------------------------------------- + * ray_sel_and — AND two selections of equal length, returns new RAY_SEL + * -------------------------------------------------------------------------- */ + +ray_t* ray_sel_and(ray_t* a, ray_t* b) { + if (!a || RAY_IS_ERR(a)) return a; + if (!b || RAY_IS_ERR(b)) return b; + if (a->type != RAY_SEL || b->type != RAY_SEL) + return ray_error("type", NULL); + if (a->len != b->len) + return ray_error("range", NULL); + + int64_t nrows = a->len; + ray_t* out = ray_sel_new(nrows); + if (!out || RAY_IS_ERR(out)) return out; + + uint64_t* dst = ray_sel_bits(out); + const uint64_t* sa = ray_sel_bits(a); + const uint64_t* sb = ray_sel_bits(b); + uint32_t n_words = (uint32_t)((nrows + 63) / 64); + + for (uint32_t w = 0; w < n_words; w++) + dst[w] = sa[w] & sb[w]; + + ray_sel_recompute(out); + return out; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/vec/str.c b/crates/rayforce-sys/vendor/rayforce/src/vec/str.c new file mode 100644 index 0000000..ca76b92 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/vec/str.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "str.h" +#include + +/* -------------------------------------------------------------------------- + * SSO vs long-string detection + * + * The slen/sdata and obj fields share the same 8-byte union in ray_t. + * SSO: slen is 0..7, sdata contains the string bytes. + * Long: obj is a non-NULL pointer to a U8 vector. + * + * Distinction: + * - slen 1..7 → always SSO (a 32B-aligned pointer's low byte is a + * multiple of 32, never 1..7) + * - slen 0 with obj == NULL → empty SSO (all 8 union bytes are zero) + * - slen 0 with obj != NULL → long string (pointer's low byte is 0) + * - slen > 7 → long string (pointer's low byte is 32, 64, ... or higher) + * -------------------------------------------------------------------------- */ + +static bool is_sso(ray_t* s) { + if (s->slen >= 1 && s->slen <= 7) return true; + if (s->slen == 0 && s->obj == NULL) return true; + return false; +} + +/* -------------------------------------------------------------------------- + * ray_str_ptr + * -------------------------------------------------------------------------- */ + +const char* ray_str_ptr(ray_t* s) { + if (!s || RAY_IS_ERR(s)) return NULL; + if (is_sso(s)) return (const char*)s->sdata; + return (const char*)ray_data(s->obj); +} + +/* -------------------------------------------------------------------------- + * ray_str_len + * -------------------------------------------------------------------------- */ + +size_t ray_str_len(ray_t* s) { + if (!s || RAY_IS_ERR(s)) return 0; + if (is_sso(s)) return (size_t)s->slen; + return (size_t)s->obj->len; +} + +/* -------------------------------------------------------------------------- + * ray_str_cmp -- Compare two string atoms. + * + * Compare by memcmp of the min length, then by length difference. + * -------------------------------------------------------------------------- */ + +int ray_str_cmp(ray_t* a, ray_t* b) { + if (!a || RAY_IS_ERR(a) || !b || RAY_IS_ERR(b)) return 0; + + const char* ap = ray_str_ptr(a); + const char* bp = ray_str_ptr(b); + size_t alen = ray_str_len(a); + size_t blen = ray_str_len(b); + + size_t minlen = alen < blen ? alen : blen; + int cmp = 0; + if (minlen > 0) cmp = memcmp(ap, bp, minlen); + if (cmp != 0) return cmp; + + if (alen < blen) return -1; + if (alen > blen) return 1; + return 0; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/vec/str.h b/crates/rayforce-sys/vendor/rayforce/src/vec/str.h new file mode 100644 index 0000000..192f8e2 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/vec/str.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_STR_H +#define RAY_STR_H + +/* + * str.h -- String helper functions. + * + * String atoms use SSO for <= 7 bytes (stored in sdata[7] with slen). + * Long strings store data in a U8 vector pointed to by obj. + */ + +#include +#include + +/* ===== Inline String Element (16 bytes) ===== */ + +typedef union { + struct { uint32_t len; char data[12]; }; /* inline: len <= 12 */ + struct { uint32_t len_; char prefix[4]; /* pooled: len > 12 */ + uint32_t pool_off; uint32_t _pad; }; +} ray_str_t; + +#define RAY_STR_INLINE_MAX 12 + +static inline bool ray_str_is_inline(const ray_str_t* s) { + return s->len <= RAY_STR_INLINE_MAX; +} + +/* Resolve string data pointer for a ray_str_t element. + * pool_base: base of string pool (NULL if all strings are inline) */ +static inline const char* ray_str_t_ptr(const ray_str_t* s, const char* pool_base) { + if (s->len == 0) return ""; + if (ray_str_is_inline(s)) return s->data; + assert(pool_base != NULL && "ray_str_t_ptr: pooled string requires non-NULL pool_base"); + return pool_base + s->pool_off; +} + +/* Equality: fast reject on len, then prefix, then full compare. + * pool_a/pool_b: pool bases for elements a and b respectively (NULL if inline) */ +static inline bool ray_str_t_eq(const ray_str_t* a, const char* pool_a, + const ray_str_t* b, const char* pool_b) { + if (a->len != b->len) return false; + if (a->len == 0) return true; + if (ray_str_is_inline(a)) { + return memcmp(a->data, b->data, a->len) == 0; + } + /* Both pooled: check prefix first */ + if (memcmp(a->prefix, b->prefix, 4) != 0) return false; + return memcmp(pool_a + a->pool_off, pool_b + b->pool_off, a->len) == 0; +} + +/* Ordering: lexicographic, shorter string is less on prefix tie. + * pool_a/pool_b: pool bases for elements a and b respectively (NULL if inline) */ +static inline int ray_str_t_cmp(const ray_str_t* a, const char* pool_a, + const ray_str_t* b, const char* pool_b) { + const char* pa = ray_str_t_ptr(a, pool_a); + const char* pb = ray_str_t_ptr(b, pool_b); + uint32_t min_len = a->len < b->len ? a->len : b->len; + int r = memcmp(pa, pb, min_len); + if (r != 0) return r; + return (a->len > b->len) - (a->len < b->len); +} + +/* Hash a ray_str_t element. Uses FNV-1a which is self-contained and fast for + * the typical short-to-medium strings stored in ray_str_t. + * pool_base: pool base pointer for pooled strings (NULL when inline-only). */ +static inline uint64_t ray_str_t_hash(const ray_str_t* s, const char* pool_base) { + if (s->len == 0) return 0x9E3779B97F4A7C15ULL; /* golden ratio constant for empty */ + if (!ray_str_is_inline(s)) { + assert(pool_base != NULL && "ray_str_t_hash: pooled string requires non-NULL pool_base"); + } + const char* p = ray_str_is_inline(s) ? s->data : pool_base + s->pool_off; + uint64_t h = 0xcbf29ce484222325ULL; + for (uint32_t i = 0; i < s->len; i++) { + h ^= (uint64_t)(unsigned char)p[i]; + h *= 0x100000001b3ULL; + } + return h; +} + +#endif /* RAY_STR_H */ diff --git a/crates/rayforce-sys/vendor/rayforce/src/vec/vec.c b/crates/rayforce-sys/vendor/rayforce/src/vec/vec.c new file mode 100644 index 0000000..110a6e3 --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/vec/vec.c @@ -0,0 +1,1361 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "vec.h" +#include "core/platform.h" +#include "mem/heap.h" +#include "table/sym.h" +#include "vec/embedding.h" +#include "vec/str.h" +#include "ops/idxop.h" +#include +#include + +/* qsort comparator for (idx, original_k) pairs in ray_vec_insert_many. + * Sorts primarily by idx ascending; ties break by original k to preserve + * stable-sort semantics (matches the previous insertion-sort behaviour). */ +static int pair_cmp_idx_then_k(const void* a, const void* b) { + const int64_t* pa = (const int64_t*)a; + const int64_t* pb = (const int64_t*)b; + if (pa[0] != pb[0]) return (pa[0] > pb[0]) - (pa[0] < pb[0]); + return (pa[1] > pb[1]) - (pa[1] < pb[1]); +} + +/* Public bitmap accessor — handles slice / ext / inline / HAS_INDEX + * uniformly. See vec.h for the contract. */ +const uint8_t* ray_vec_nullmap_bytes(const ray_t* v, + int64_t* bit_offset_out, + int64_t* len_bits_out) { + if (bit_offset_out) *bit_offset_out = 0; + if (len_bits_out) *len_bits_out = 0; + if (!v) return NULL; + + /* Slice: HAS_NULLS / HAS_INDEX live on the parent — redirect first, + * THEN test for nulls. Reading v->attrs & HAS_NULLS here would + * incorrectly drop a sliced view of a nullable column. */ + const ray_t* target = v; + int64_t off = 0; + if (v->attrs & RAY_ATTR_SLICE) { + target = v->slice_parent; + off = v->slice_offset; + if (!target) return NULL; + } + if (!(target->attrs & RAY_ATTR_HAS_NULLS)) return NULL; + + if (bit_offset_out) *bit_offset_out = off; + + if (target->attrs & RAY_ATTR_HAS_INDEX) { + const ray_index_t* ix = ray_index_payload(target->index); + if (ix->saved_attrs & RAY_ATTR_NULLMAP_EXT) { + ray_t* ext; + memcpy(&ext, &ix->saved_nullmap[0], sizeof(ext)); + if (len_bits_out) *len_bits_out = ext->len * 8; + return (const uint8_t*)ray_data(ext); + } + if (len_bits_out) *len_bits_out = 128; + return ix->saved_nullmap; + } + if (target->attrs & RAY_ATTR_NULLMAP_EXT) { + if (len_bits_out) *len_bits_out = target->ext_nullmap->len * 8; + return (const uint8_t*)ray_data(target->ext_nullmap); + } + /* Inline path: RAY_STR's bytes 0-15 hold str_pool/str_ext_null, not + * bits — so RAY_STR with HAS_NULLS must always have NULLMAP_EXT. */ + if (target->type == RAY_STR) return NULL; + if (len_bits_out) *len_bits_out = 128; + return target->nullmap; +} + +/* Internal compatibility wrapper for the older two-out-param form used + * inside vec.c. Returns the inline pointer (16-byte buffer) when nulls + * live inline, or NULL when they live in *ext_out. */ +static inline const uint8_t* vec_inline_nullmap(const ray_t* v, ray_t** ext_nullmap_ref) { + *ext_nullmap_ref = NULL; + if (v->attrs & RAY_ATTR_HAS_INDEX) { + const ray_index_t* ix = ray_index_payload(v->index); + if (ix->saved_attrs & RAY_ATTR_NULLMAP_EXT) { + ray_t* ext; + memcpy(&ext, &ix->saved_nullmap[0], sizeof(ext)); + *ext_nullmap_ref = ext; + return NULL; + } + return ix->saved_nullmap; + } + if (v->attrs & RAY_ATTR_NULLMAP_EXT) { + *ext_nullmap_ref = v->ext_nullmap; + return NULL; + } + return v->nullmap; +} + +/* True if v has any nulls. HAS_NULLS is preserved on the parent across + * index attach/detach (see attach_finalize), so this is the same one-bit + * test in both indexed and non-indexed cases. */ +static inline bool vec_any_nulls(const ray_t* v) { + return (v->attrs & RAY_ATTR_HAS_NULLS) != 0; +} + +/* In-place drop of attached index — caller must hold a unique ref (rc==1) + * on `v` itself. Used by mutation paths to invalidate the (now stale) + * index before writing. HAS_NULLS was preserved through the attachment + * so it needs no restoration; only NULLMAP_EXT (cleared at attach time) + * is reinstated from saved_attrs. + * + * Shared-index case: `v` may share its index ray_t with another vec + * (e.g. after ray_cow followed by ray_retain_owned_refs, both copies + * point at the same RAY_INDEX with rc==2). We must NOT clobber the + * saved-nullmap bytes inside a shared index — the other holder still + * reads them. Detect rc>1 and copy the saved pointers via + * ray_index_retain_saved instead of moving them out. */ +static inline void vec_drop_index_inplace(ray_t* v) { + if (!(v->attrs & RAY_ATTR_HAS_INDEX)) return; + ray_t* idx = v->index; + ray_index_t* ix = ray_index_payload(idx); + uint8_t saved = ix->saved_attrs; + bool shared = ray_atomic_load(&idx->rc) > 1; + + if (shared) { + /* Take our own retained references to the saved-pointer slots + * (ext_nullmap / str_pool / sym_dict etc.) so the bytes we copy + * into v->nullmap are validly owned by v. Leave the index's + * snapshot intact for the other holder. */ + ray_index_retain_saved(ix); + } + memcpy(v->nullmap, ix->saved_nullmap, 16); + if (!shared) { + /* Sole owner: about to release idx, so neutralize its snapshot + * to prevent ray_index_release_saved from double-releasing the + * pointers we just transferred to v. */ + memset(ix->saved_nullmap, 0, 16); + ix->saved_attrs = 0; + } + v->attrs &= (uint8_t)~RAY_ATTR_HAS_INDEX; + if (saved & RAY_ATTR_NULLMAP_EXT) v->attrs |= RAY_ATTR_NULLMAP_EXT; + ray_release(idx); +} + +/* -------------------------------------------------------------------------- + * Capacity helpers + * + * A vector's capacity is determined by its buddy order: + * capacity = (2^order - 32) / elem_size + * When len reaches capacity, realloc to next power-of-2 data size. + * -------------------------------------------------------------------------- */ + +static int64_t vec_capacity(ray_t* vec) { + size_t block_size = (size_t)1 << vec->order; + size_t data_space = block_size - 32; /* 32B ray_t header */ + uint8_t esz = ray_sym_elem_size(vec->type, vec->attrs); + if (esz == 0) return 0; + return (int64_t)(data_space / esz); +} + +/* -------------------------------------------------------------------------- + * ray_vec_new + * -------------------------------------------------------------------------- */ + +ray_t* ray_vec_new(int8_t type, int64_t capacity) { + if (type <= 0 || type >= RAY_TYPE_COUNT) + return ray_error("type", NULL); + if (type == RAY_SYM) + return ray_sym_vec_new(RAY_SYM_W64, capacity); /* default: global sym IDs */ + if (capacity < 0) return ray_error("range", NULL); + + uint8_t esz = ray_elem_size(type); + size_t data_size = (size_t)capacity * esz; + if (esz > 1 && data_size / esz != (size_t)capacity) + return ray_error("oom", NULL); + + ray_t* v = ray_alloc(data_size); + if (!v) return ray_error("oom", "vec_new(type=%d, cap=%lld): %zu bytes", + (int)type, (long long)capacity, data_size); + if (RAY_IS_ERR(v)) return v; + + v->type = type; + v->len = 0; + v->attrs = 0; + memset(v->nullmap, 0, 16); + if (type == RAY_STR) v->str_pool = NULL; + + return v; +} + +/* -------------------------------------------------------------------------- + * ray_sym_vec_new — create a RAY_SYM vector with adaptive index width + * + * sym_width: RAY_SYM_W8, RAY_SYM_W16, RAY_SYM_W32, or RAY_SYM_W64 + * capacity: number of elements (rows) + * -------------------------------------------------------------------------- */ + +ray_t* ray_sym_vec_new(uint8_t sym_width, int64_t capacity) { + if ((sym_width & ~RAY_SYM_W_MASK) != 0) + return ray_error("type", NULL); + if (capacity < 0) return ray_error("range", NULL); + + uint8_t esz = (uint8_t)RAY_SYM_ELEM(sym_width); + size_t data_size = (size_t)capacity * esz; + if (esz > 1 && data_size / esz != (size_t)capacity) + return ray_error("oom", NULL); + + ray_t* v = ray_alloc(data_size); + if (!v) return ray_error("oom", "sym_vec_new(width=%u, cap=%lld): %zu bytes", + (unsigned)sym_width, (long long)capacity, data_size); + if (RAY_IS_ERR(v)) return v; + + v->type = RAY_SYM; + v->len = 0; + v->attrs = sym_width; /* lower 2 bits encode width */ + memset(v->nullmap, 0, 16); + + return v; +} + +/* -------------------------------------------------------------------------- + * ray_vec_append + * -------------------------------------------------------------------------- */ + +ray_t* ray_vec_append(ray_t* vec, const void* elem) { + if (!vec || RAY_IS_ERR(vec)) return vec; + if (vec->type <= 0 || vec->type >= RAY_TYPE_COUNT) + return ray_error("type", NULL); + if (vec->type == RAY_STR) return ray_error("type", NULL); + + /* COW: if shared, copy first */ + ray_t* original = vec; + vec = ray_cow(vec); + if (!vec || RAY_IS_ERR(vec)) return vec; + + /* Append changes len + writes data; any attached index is now stale. */ + vec_drop_index_inplace(vec); + + uint8_t esz = ray_sym_elem_size(vec->type, vec->attrs); + int64_t cap = vec_capacity(vec); + + /* Grow if needed */ + if (vec->len >= cap) { + size_t new_data_size = (size_t)(vec->len + 1) * esz; + /* Round up to next power of 2 block */ + if (new_data_size < 32) new_data_size = 32; + else { + size_t s = 32; + while (s < new_data_size) { + if (s > SIZE_MAX / 2) goto fail; + s *= 2; + } + new_data_size = s; + } + ray_t* new_vec = ray_scratch_realloc(vec, new_data_size); + if (!new_vec || RAY_IS_ERR(new_vec)) { + if (vec != original) ray_release(vec); + return new_vec ? new_vec : ray_error("oom", NULL); + } + vec = new_vec; + } + + /* Append element */ + char* dst = (char*)ray_data(vec) + vec->len * esz; + memcpy(dst, elem, esz); + vec->len++; + + return vec; + +fail: + if (vec != original) ray_release(vec); + return ray_error("oom", NULL); +} + +/* -------------------------------------------------------------------------- + * ray_vec_set + * -------------------------------------------------------------------------- */ + +ray_t* ray_vec_set(ray_t* vec, int64_t idx, const void* elem) { + if (!vec || RAY_IS_ERR(vec)) return vec; + if (vec->type == RAY_STR) return ray_error("type", NULL); + if (idx < 0 || idx >= vec->len) + return ray_error("range", NULL); + + /* COW: if shared, copy first */ + vec = ray_cow(vec); + if (!vec || RAY_IS_ERR(vec)) return vec; + + /* Writing a slot value invalidates any attached accelerator index. */ + vec_drop_index_inplace(vec); + + uint8_t esz = ray_sym_elem_size(vec->type, vec->attrs); + char* dst = (char*)ray_data(vec) + idx * esz; + memcpy(dst, elem, esz); + + return vec; +} + +/* -------------------------------------------------------------------------- + * ray_vec_get + * -------------------------------------------------------------------------- */ + +void* ray_vec_get(ray_t* vec, int64_t idx) { + if (!vec || RAY_IS_ERR(vec)) return NULL; + if (vec->type == RAY_STR) return NULL; + + /* Slice path: redirect to parent */ + if (vec->attrs & RAY_ATTR_SLICE) { + ray_t* parent = vec->slice_parent; + int64_t offset = vec->slice_offset; + if (idx < 0 || idx >= vec->len) return NULL; + uint8_t esz = ray_sym_elem_size(parent->type, parent->attrs); + return (char*)ray_data(parent) + (offset + idx) * esz; + } + + if (idx < 0 || idx >= vec->len) return NULL; + uint8_t esz = ray_sym_elem_size(vec->type, vec->attrs); + return (char*)ray_data(vec) + idx * esz; +} + +/* -------------------------------------------------------------------------- + * ray_vec_slice (zero-copy view) + * -------------------------------------------------------------------------- */ + +ray_t* ray_vec_slice(ray_t* vec, int64_t offset, int64_t len) { + if (!vec || RAY_IS_ERR(vec)) return vec; + if (offset < 0 || len < 0 || offset > vec->len || len > vec->len - offset) + return ray_error("range", NULL); + + /* If input is already a slice, resolve to ultimate parent */ + ray_t* parent = vec; + int64_t parent_offset = offset; + if (vec->attrs & RAY_ATTR_SLICE) { + parent = vec->slice_parent; + parent_offset = vec->slice_offset + offset; + } + + /* Allocate a header-only block for the slice view */ + ray_t* s = ray_alloc(0); + if (!s || RAY_IS_ERR(s)) return s; + + s->type = parent->type; + s->attrs = RAY_ATTR_SLICE | (parent->attrs & RAY_SYM_W_MASK); + s->len = len; + s->slice_parent = parent; + s->slice_offset = parent_offset; + + /* Retain the parent so it stays alive */ + ray_retain(parent); + + return s; +} + +/* -------------------------------------------------------------------------- + * ray_vec_concat + * -------------------------------------------------------------------------- */ + +ray_t* ray_vec_concat(ray_t* a, ray_t* b) { + if (!a || RAY_IS_ERR(a)) return a; + if (!b || RAY_IS_ERR(b)) return b; + if (a->type != b->type) + return ray_error("type", NULL); + + if (a->type == RAY_STR) { + int64_t total_len = a->len + b->len; + if (total_len < a->len) return ray_error("oom", NULL); + + ray_t* result = ray_vec_new(RAY_STR, total_len); + if (!result || RAY_IS_ERR(result)) return result; + result->len = total_len; + + ray_str_t* dst = (ray_str_t*)ray_data(result); + + /* Resolve a's data (may be a slice) */ + const ray_str_t* a_elems = (a->attrs & RAY_ATTR_SLICE) + ? &((const ray_str_t*)ray_data(a->slice_parent))[a->slice_offset] + : (const ray_str_t*)ray_data(a); + ray_t* a_pool_owner = (a->attrs & RAY_ATTR_SLICE) ? a->slice_parent : a; + + /* Resolve b's data (may be a slice) */ + const ray_str_t* b_elems = (b->attrs & RAY_ATTR_SLICE) + ? &((const ray_str_t*)ray_data(b->slice_parent))[b->slice_offset] + : (const ray_str_t*)ray_data(b); + ray_t* b_pool_owner = (b->attrs & RAY_ATTR_SLICE) ? b->slice_parent : b; + + /* Copy a's elements as-is */ + memcpy(dst, a_elems, (size_t)a->len * sizeof(ray_str_t)); + + /* Merge pools: a's pool + b's pool */ + int64_t a_pool_size = (a_pool_owner->str_pool) ? a_pool_owner->str_pool->len : 0; + int64_t b_pool_size = (b_pool_owner->str_pool) ? b_pool_owner->str_pool->len : 0; + int64_t total_pool = a_pool_size + b_pool_size; + + /* Guard: total pool must fit in uint32_t for pool_off rebasing */ + if (total_pool > (int64_t)UINT32_MAX) { + ray_release(result); + return ray_error("range", NULL); + } + + if (total_pool > 0) { + result->str_pool = ray_alloc((size_t)total_pool); + if (!result->str_pool || RAY_IS_ERR(result->str_pool)) { + result->str_pool = NULL; + ray_release(result); + return ray_error("oom", NULL); + } + result->str_pool->type = RAY_U8; + result->str_pool->len = total_pool; + char* pool_dst = (char*)ray_data(result->str_pool); + if (a_pool_size > 0) + memcpy(pool_dst, ray_data(a_pool_owner->str_pool), (size_t)a_pool_size); + if (b_pool_size > 0) + memcpy(pool_dst + a_pool_size, ray_data(b_pool_owner->str_pool), (size_t)b_pool_size); + } + + /* Copy b's elements, rebasing pool offsets */ + for (int64_t i = 0; i < b->len; i++) { + dst[a->len + i] = b_elems[i]; + if (!ray_str_is_inline(&b_elems[i]) && b_elems[i].len > 0) { + dst[a->len + i].pool_off += (uint32_t)a_pool_size; + } + } + + /* Propagate null bitmaps from a and b. + * Slices don't carry RAY_ATTR_HAS_NULLS — check RAY_ATTR_SLICE too. */ + if ((a->attrs & (RAY_ATTR_HAS_NULLS | RAY_ATTR_SLICE)) || + (b->attrs & (RAY_ATTR_HAS_NULLS | RAY_ATTR_SLICE))) { + for (int64_t i = 0; i < a->len; i++) { + if (ray_vec_is_null((ray_t*)a, i)) { + ray_err_t err = ray_vec_set_null_checked(result, i, true); + if (err != RAY_OK) { ray_release(result); return ray_error(ray_err_code_str(err), NULL); } + } + } + for (int64_t i = 0; i < b->len; i++) { + if (ray_vec_is_null((ray_t*)b, i)) { + ray_err_t err = ray_vec_set_null_checked(result, a->len + i, true); + if (err != RAY_OK) { ray_release(result); return ray_error(ray_err_code_str(err), NULL); } + } + } + } + + return result; + } + + uint8_t a_esz = ray_sym_elem_size(a->type, a->attrs); + uint8_t b_esz = ray_sym_elem_size(b->type, b->attrs); + /* Use the wider of the two widths for SYM columns — carry only width bits, + * not flags like RAY_ATTR_SLICE or RAY_ATTR_HAS_NULLS from inputs. */ + uint8_t out_attrs = (a_esz >= b_esz) ? (a->attrs & RAY_SYM_W_MASK) : (b->attrs & RAY_SYM_W_MASK); + uint8_t esz = (a_esz >= b_esz) ? a_esz : b_esz; + + int64_t total_len = a->len + b->len; + if (total_len < a->len) return ray_error("oom", NULL); /* overflow */ + size_t data_size = (size_t)total_len * esz; + if (esz > 1 && data_size / esz != (size_t)total_len) + return ray_error("oom", NULL); /* multiplication overflow */ + + ray_t* result = ray_alloc(data_size); + if (!result || RAY_IS_ERR(result)) return result; + + result->type = a->type; + result->len = total_len; + result->attrs = out_attrs; + memset(result->nullmap, 0, 16); + + /* For SYM with mismatched widths, widen element-by-element */ + if (a->type == RAY_SYM && a_esz != b_esz) { + void* dst = ray_data(result); + for (int64_t i = 0; i < a->len; i++) { + int64_t val = ray_read_sym(ray_data(a), i, a->type, a->attrs); + ray_write_sym(dst, i, (uint64_t)val, result->type, result->attrs); + } + for (int64_t i = 0; i < b->len; i++) { + int64_t val = ray_read_sym(ray_data(b), i, b->type, b->attrs); + ray_write_sym(dst, a->len + i, (uint64_t)val, result->type, result->attrs); + } + } else { + /* Same width: fast memcpy path */ + void* a_data = (a->attrs & RAY_ATTR_SLICE) ? + ((char*)ray_data(a->slice_parent) + a->slice_offset * esz) : + ray_data(a); + memcpy(ray_data(result), a_data, (size_t)a->len * esz); + + void* b_data = (b->attrs & RAY_ATTR_SLICE) ? + ((char*)ray_data(b->slice_parent) + b->slice_offset * esz) : + ray_data(b); + memcpy((char*)ray_data(result) + (size_t)a->len * esz, b_data, + (size_t)b->len * esz); + } + + /* Propagate null bitmaps from a and b. + * Slices don't carry RAY_ATTR_HAS_NULLS — check RAY_ATTR_SLICE too. */ + if ((a->attrs & (RAY_ATTR_HAS_NULLS | RAY_ATTR_SLICE)) || + (b->attrs & (RAY_ATTR_HAS_NULLS | RAY_ATTR_SLICE))) { + for (int64_t i = 0; i < a->len; i++) { + if (ray_vec_is_null((ray_t*)a, i)) { + ray_err_t err = ray_vec_set_null_checked(result, i, true); + if (err != RAY_OK) { ray_release(result); return ray_error(ray_err_code_str(err), NULL); } + } + } + for (int64_t i = 0; i < b->len; i++) { + if (ray_vec_is_null((ray_t*)b, i)) { + ray_err_t err = ray_vec_set_null_checked(result, a->len + i, true); + if (err != RAY_OK) { ray_release(result); return ray_error(ray_err_code_str(err), NULL); } + } + } + } + + /* LIST/TABLE columns hold child pointers — retain them */ + if (a->type == RAY_LIST || a->type == RAY_TABLE) { + ray_t** ptrs = (ray_t**)ray_data(result); + for (int64_t i = 0; i < total_len; i++) { + if (ptrs[i]) ray_retain(ptrs[i]); + } + } + + return result; +} + +/* -------------------------------------------------------------------------- + * ray_vec_insert_at — insert a single element at position idx. + * + * idx is a pre-insertion position in [0, vec->len]. idx == vec->len is + * equivalent to append. Does not support RAY_STR (use ray_str_vec_insert_at). + * -------------------------------------------------------------------------- */ + +ray_t* ray_vec_insert_at(ray_t* vec, int64_t idx, const void* elem) { + if (!vec || RAY_IS_ERR(vec)) return vec; + if (vec->type <= 0 || vec->type >= RAY_TYPE_COUNT) + return ray_error("type", NULL); + if (vec->type == RAY_STR) return ray_error("type", NULL); + if (idx < 0 || idx > vec->len) return ray_error("range", NULL); + + /* COW: if shared, copy first */ + ray_t* original = vec; + vec = ray_cow(vec); + if (!vec || RAY_IS_ERR(vec)) return vec; + + /* In-place insert mutates len + data + nullmap; any attached + * accelerator index is now stale. */ + vec_drop_index_inplace(vec); + + uint8_t esz = ray_sym_elem_size(vec->type, vec->attrs); + int64_t cap = vec_capacity(vec); + + /* Grow if needed */ + if (vec->len >= cap) { + size_t new_data_size = (size_t)(vec->len + 1) * esz; + if (new_data_size < 32) new_data_size = 32; + else { + size_t s = 32; + while (s < new_data_size) { + if (s > SIZE_MAX / 2) goto fail_oom; + s *= 2; + } + new_data_size = s; + } + ray_t* new_vec = ray_scratch_realloc(vec, new_data_size); + if (!new_vec || RAY_IS_ERR(new_vec)) { + if (vec != original) ray_release(vec); + return new_vec ? new_vec : ray_error("oom", NULL); + } + vec = new_vec; + } + + int64_t old_len = vec->len; + char* base = (char*)ray_data(vec); + + /* Shift elements [idx..old_len) → [idx+1..old_len+1) */ + if (idx < old_len) { + memmove(base + (size_t)(idx + 1) * esz, + base + (size_t)idx * esz, + (size_t)(old_len - idx) * esz); + } + + /* Write the new element */ + memcpy(base + (size_t)idx * esz, elem, esz); + + vec->len = old_len + 1; + + /* Shift null bitmap bits [idx..old_len) up by one; clear bit at idx. + * Walk from tail backward so we don't overwrite unread bits. */ + if (vec->attrs & RAY_ATTR_HAS_NULLS) { + for (int64_t i = old_len - 1; i >= idx; i--) { + bool was_null = ray_vec_is_null(vec, i); + if (was_null) { + ray_err_t err = ray_vec_set_null_checked(vec, i + 1, true); + if (err != RAY_OK) goto fail_oom; + } else { + ray_err_t err = ray_vec_set_null_checked(vec, i + 1, false); + if (err != RAY_OK) goto fail_oom; + } + } + /* New element is not null */ + ray_err_t err = ray_vec_set_null_checked(vec, idx, false); + if (err != RAY_OK) goto fail_oom; + } + + return vec; + +fail_oom: + if (vec != original) ray_release(vec); + return ray_error("oom", NULL); +} + +/* -------------------------------------------------------------------------- + * ray_vec_insert_vec_at — splice src into vec at position idx. + * + * Shares SYM-width widening, RAY_STR pool merge, and null-bit propagation + * with ray_vec_concat via the slice→concat→concat pattern. Always returns + * a fresh block; caller should release the input if no longer needed. + * -------------------------------------------------------------------------- */ + +ray_t* ray_vec_insert_vec_at(ray_t* vec, int64_t idx, ray_t* src) { + if (!vec || RAY_IS_ERR(vec)) return vec; + if (!src || RAY_IS_ERR(src)) return src; + if (vec->type != src->type) return ray_error("type", NULL); + if (idx < 0 || idx > vec->len) return ray_error("range", NULL); + + /* Fast path: idx == len is plain concat */ + if (idx == vec->len) return ray_vec_concat(vec, src); + /* Fast path: idx == 0 is reversed concat */ + if (idx == 0) return ray_vec_concat(src, vec); + + ray_t* head = ray_vec_slice(vec, 0, idx); + if (!head || RAY_IS_ERR(head)) return head; + + ray_t* tail = ray_vec_slice(vec, idx, vec->len - idx); + if (!tail || RAY_IS_ERR(tail)) { ray_release(head); return tail; } + + ray_t* mid = ray_vec_concat(head, src); + ray_release(head); + if (!mid || RAY_IS_ERR(mid)) { ray_release(tail); return mid; } + + ray_t* result = ray_vec_concat(mid, tail); + ray_release(mid); + ray_release(tail); + return result; +} + +/* -------------------------------------------------------------------------- + * ray_vec_insert_many — insert N values at N pre-insertion positions. + * + * idxs: I64 vec of length N, each idx in [0, vec->len]. + * vals: either a matching atom (broadcast) or same-type vec of length N + * (parallel) or length 1 (broadcast). + * + * For ties in idxs, the original input order is preserved (stable sort). + * Returns a fresh block; caller releases vec if no longer needed. + * RAY_STR targets are rejected — use ray_vec_insert_vec_at in a loop instead. + * For RAY_SYM, the source width must match the destination width. + * -------------------------------------------------------------------------- */ + +ray_t* ray_vec_insert_many(ray_t* vec, ray_t* idxs, ray_t* vals) { + if (!vec || RAY_IS_ERR(vec)) return vec; + if (!idxs || RAY_IS_ERR(idxs)) return idxs; + if (!vals || RAY_IS_ERR(vals)) return vals; + if (vec->type <= 0 || vec->type >= RAY_TYPE_COUNT) return ray_error("type", NULL); + if (vec->type == RAY_STR) return ray_error("type", NULL); + if (idxs->type != RAY_I64) return ray_error("type", NULL); + + int64_t N = idxs->len; + int64_t old_len = vec->len; + uint8_t esz = ray_sym_elem_size(vec->type, vec->attrs); + + /* Fast path: N == 0 returns a fresh retain */ + if (N == 0) { ray_retain(vec); return vec; } + + /* Validate indices */ + const int64_t* idx_arr = (const int64_t*)ray_data(idxs); + for (int64_t k = 0; k < N; k++) { + if (idx_arr[k] < 0 || idx_arr[k] > old_len) + return ray_error("range", NULL); + } + + /* Classify vals: atom (broadcast) vs vec (parallel or singleton broadcast) */ + int broadcast; + if (vals->type < 0) { + if (vals->type != -vec->type) return ray_error("type", NULL); + broadcast = 1; + } else if (vals->type == vec->type) { + /* SYM width must match — dispatcher should widen upstream */ + if (vec->type == RAY_SYM && + (vals->attrs & RAY_SYM_W_MASK) != (vec->attrs & RAY_SYM_W_MASK)) + return ray_error("type", NULL); + if (vals->len == 1) broadcast = 1; + else if (vals->len == N) broadcast = 0; + else return ray_error("range", NULL); + } else { + return ray_error("type", NULL); + } + + /* Build sort buffer as I64 vec of 2*N slots: [idx0, src0, idx1, src1, ...] */ + ray_t* pair_vec = ray_vec_new(RAY_I64, 2 * N); + if (!pair_vec || RAY_IS_ERR(pair_vec)) return ray_error("oom", NULL); + pair_vec->len = 2 * N; + int64_t* pairs = (int64_t*)ray_data(pair_vec); + for (int64_t k = 0; k < N; k++) { + pairs[2 * k] = idx_arr[k]; + pairs[2 * k + 1] = k; + } + + /* Stable sort the (idx, original_k) pairs by idx. qsort isn't + * inherently stable, but a compound comparator on (idx, k) — where + * k is the original position — gives the same total order as a + * stable sort by idx alone. Replaces an O(N^2) insertion sort + * that hangs for bulk-set updates with thousands+ of indices. */ + qsort(pairs, (size_t)N, 2 * sizeof(int64_t), pair_cmp_idx_then_k); + + /* Allocate result */ + int64_t new_len = old_len + N; + if (new_len < old_len) { ray_release(pair_vec); return ray_error("oom", NULL); } + size_t data_size = (size_t)new_len * esz; + if (esz > 1 && data_size / esz != (size_t)new_len) { + ray_release(pair_vec); + return ray_error("oom", NULL); + } + + ray_t* result = ray_alloc(data_size); + if (!result || RAY_IS_ERR(result)) { ray_release(pair_vec); return result ? result : ray_error("oom", NULL); } + result->type = vec->type; + result->len = new_len; + result->attrs = vec->attrs & RAY_SYM_W_MASK; + memset(result->nullmap, 0, 16); + + /* Source pointers */ + const char* src_base = (vec->attrs & RAY_ATTR_SLICE) + ? ((const char*)ray_data(vec->slice_parent) + (size_t)vec->slice_offset * esz) + : (const char*)ray_data(vec); + + /* Value source: atom bytes or vec row bytes. + * GUID atoms keep their 16-byte payload in vals->obj, not inline; typed + * nulls carry obj==NULL and fall through to a zero buffer (null bit is + * then set below via RAY_ATOM_IS_NULL). */ + static const uint8_t zero_guid[16] = {0}; + const char* val_atom_bytes = NULL; + if (vals->type < 0) { + if (vec->type == RAY_GUID) { + val_atom_bytes = vals->obj + ? (const char*)ray_data(vals->obj) + : (const char*)zero_guid; + } else { + val_atom_bytes = (const char*)&vals->u8; + } + } + const char* val_vec_base = NULL; + if (val_atom_bytes == NULL) { + val_vec_base = (vals->attrs & RAY_ATTR_SLICE) + ? ((const char*)ray_data(vals->slice_parent) + (size_t)vals->slice_offset * esz) + : (const char*)ray_data(vals); + } + + char* dst_base = (char*)ray_data(result); + + /* Walk: merge sorted inserts with original */ + int64_t w = 0; /* write cursor */ + int64_t p = 0; /* pair cursor */ + for (int64_t r = 0; r <= old_len; r++) { + while (p < N && pairs[2 * p] == r) { + int64_t src_pos = pairs[2 * p + 1]; + if (val_atom_bytes) { + /* Broadcast atom */ + memcpy(dst_base + (size_t)w * esz, val_atom_bytes, esz); + /* Atom-level null propagation */ + if (RAY_ATOM_IS_NULL(vals)) { + ray_err_t e = ray_vec_set_null_checked(result, w, true); + if (e != RAY_OK) { ray_release(result); ray_release(pair_vec); return ray_error("oom", NULL); } + } + } else if (broadcast) { + /* Single-element vec broadcast — always row 0 */ + memcpy(dst_base + (size_t)w * esz, val_vec_base, esz); + if (ray_vec_is_null(vals, 0)) { + ray_err_t e = ray_vec_set_null_checked(result, w, true); + if (e != RAY_OK) { ray_release(result); ray_release(pair_vec); return ray_error("oom", NULL); } + } + } else { + /* Parallel: use src_pos into vals */ + memcpy(dst_base + (size_t)w * esz, + val_vec_base + (size_t)src_pos * esz, esz); + if (ray_vec_is_null(vals, src_pos)) { + ray_err_t e = ray_vec_set_null_checked(result, w, true); + if (e != RAY_OK) { ray_release(result); ray_release(pair_vec); return ray_error("oom", NULL); } + } + } + w++; + p++; + } + if (r < old_len) { + memcpy(dst_base + (size_t)w * esz, src_base + (size_t)r * esz, esz); + if (ray_vec_is_null(vec, r)) { + ray_err_t e = ray_vec_set_null_checked(result, w, true); + if (e != RAY_OK) { ray_release(result); ray_release(pair_vec); return ray_error("oom", NULL); } + } + w++; + } + } + + ray_release(pair_vec); + return result; +} + +/* -------------------------------------------------------------------------- + * ray_vec_from_raw + * -------------------------------------------------------------------------- */ + +ray_t* ray_vec_from_raw(int8_t type, const void* data, int64_t count) { + if (type <= 0 || type >= RAY_TYPE_COUNT) + return ray_error("type", NULL); + if (type == RAY_STR) return ray_error("type", NULL); + if (count < 0) return ray_error("range", NULL); + + /* RAY_SYM defaults to W64 (global sym IDs) */ + uint8_t sym_w = (type == RAY_SYM) ? RAY_SYM_W64 : 0; + uint8_t esz = ray_sym_elem_size(type, sym_w); + size_t data_size = (size_t)count * esz; + + ray_t* v = ray_alloc(data_size); + if (!v || RAY_IS_ERR(v)) return v; + + v->type = type; + v->len = count; + v->attrs = sym_w; + memset(v->nullmap, 0, 16); + + memcpy(ray_data(v), data, data_size); + + /* LIST/TABLE elements are child pointers — retain them */ + if (type == RAY_LIST || type == RAY_TABLE) { + ray_t** ptrs = (ray_t**)ray_data(v); + for (int64_t i = 0; i < count; i++) { + if (ptrs[i]) ray_retain(ptrs[i]); + } + } + + return v; +} + +/* -------------------------------------------------------------------------- + * Null bitmap operations + * + * Inline: for vectors with <=128 elements, bits stored in nullmap[16] (128 bits). + * External: for >128 elements, allocate a U8 vector bitmap via ext_nullmap. + * -------------------------------------------------------------------------- */ + +ray_err_t ray_vec_set_null_checked(ray_t* vec, int64_t idx, bool is_null) { + if (!vec || RAY_IS_ERR(vec)) return RAY_ERR_TYPE; + if (vec->attrs & RAY_ATTR_SLICE) return RAY_ERR_TYPE; /* cannot set null on slice — COW first */ + if (idx < 0 || idx >= vec->len) return RAY_ERR_RANGE; + + /* Mutation invalidates any attached accelerator index — drop it inline. + * Caller must already hold a unique ref (set-null on a shared vec is a + * bug regardless of indexing). */ + vec_drop_index_inplace(vec); + + /* Mark HAS_NULLS if setting a null (defer for RAY_STR until ext alloc succeeds) */ + if (is_null && vec->type != RAY_STR) vec->attrs |= RAY_ATTR_HAS_NULLS; + + if (!(vec->attrs & RAY_ATTR_NULLMAP_EXT)) { + /* RAY_STR uses bytes 8-15 for str_pool, HAS_LINK uses bytes 8-15 for + * link_target — both must skip the inline-128 path to avoid + * aliasing corruption. Otherwise <=128 elements go inline. */ + bool can_inline = (vec->type != RAY_STR) && idx < 128 && + !(vec->attrs & RAY_ATTR_HAS_LINK); + if (can_inline) { + /* Inline nullmap path (<=128 elements, non-STR, non-linked) */ + int byte_idx = (int)(idx / 8); + int bit_idx = (int)(idx % 8); + if (is_null) + vec->nullmap[byte_idx] |= (uint8_t)(1u << bit_idx); + else + vec->nullmap[byte_idx] &= (uint8_t)~(1u << bit_idx); + return RAY_OK; + } + /* Need to promote to external nullmap */ + int64_t bitmap_len = (vec->len + 7) / 8; + ray_t* ext = ray_vec_new(RAY_U8, bitmap_len); + if (!ext || RAY_IS_ERR(ext)) return RAY_ERR_OOM; + ext->len = bitmap_len; + if (vec->type == RAY_STR || (vec->attrs & RAY_ATTR_HAS_LINK)) { + /* Bytes 0-15 contain pointers/sym, not bits — start ext zeroed. + * (Linked vecs reach here only when adding their first null, + * since promote_inline_to_ext in linkop.c covers the + * pre-existing-nulls case at attach time.) */ + memset(ray_data(ext), 0, (size_t)bitmap_len); + } else { + /* Copy existing inline bits */ + memcpy(ray_data(ext), vec->nullmap, 16); + /* Zero remaining bytes */ + if (bitmap_len > 16) + memset((char*)ray_data(ext) + 16, 0, (size_t)(bitmap_len - 16)); + } + vec->attrs |= RAY_ATTR_NULLMAP_EXT; + if (is_null) vec->attrs |= RAY_ATTR_HAS_NULLS; + vec->ext_nullmap = ext; + } + + /* External nullmap path */ + ray_t* ext = vec->ext_nullmap; + /* Grow external bitmap if needed */ + int64_t needed_bytes = (idx / 8) + 1; + if (needed_bytes > ext->len) { + int64_t new_len = (vec->len + 7) / 8; + if (new_len < needed_bytes) new_len = needed_bytes; + size_t new_data_size = (size_t)new_len; + int64_t old_len = ext->len; + ray_t* new_ext = ray_scratch_realloc(ext, new_data_size); + if (!new_ext || RAY_IS_ERR(new_ext)) return RAY_ERR_OOM; + /* Zero new bytes */ + if (new_len > old_len) + memset((char*)ray_data(new_ext) + old_len, 0, + (size_t)(new_len - old_len)); + new_ext->len = new_len; + vec->ext_nullmap = new_ext; + ext = new_ext; + } + + uint8_t* bits = (uint8_t*)ray_data(ext); + int byte_idx = (int)(idx / 8); + int bit_idx = (int)(idx % 8); + if (is_null) + bits[byte_idx] |= (uint8_t)(1u << bit_idx); + else + bits[byte_idx] &= (uint8_t)~(1u << bit_idx); + return RAY_OK; +} + +void ray_vec_set_null(ray_t* vec, int64_t idx, bool is_null) { + (void)ray_vec_set_null_checked(vec, idx, is_null); +} + +/* -------------------------------------------------------------------------- + * str_pool_cow — ensure pool is privately owned after ray_cow() + * + * After ray_cow(), the copy shares the same str_pool as the original. + * ray_retain_owned_refs bumps pool rc, so direct mutation would corrupt + * the original's pool data (or ray_scratch_realloc would ray_free a + * shared block). Deep-copy the pool when rc > 1. + * -------------------------------------------------------------------------- */ + +static ray_t* str_pool_cow(ray_t* vec) { + if (!vec->str_pool || RAY_IS_ERR(vec->str_pool)) return vec; + uint32_t pool_rc = ray_atomic_load(&vec->str_pool->rc); + if (pool_rc <= 1) return vec; + + size_t pool_data_size = ((size_t)1 << vec->str_pool->order) - 32; + ray_t* new_pool = ray_alloc(pool_data_size); + if (!new_pool || RAY_IS_ERR(new_pool)) return NULL; + + size_t copy_bytes = (size_t)vec->str_pool->len; + if (copy_bytes > pool_data_size) copy_bytes = pool_data_size; + + uint8_t saved_order = new_pool->order; + uint8_t saved_mmod = new_pool->mmod; + memcpy(new_pool, vec->str_pool, 32 + copy_bytes); + new_pool->order = saved_order; + new_pool->mmod = saved_mmod; + ray_atomic_store(&new_pool->rc, 1); + + ray_release(vec->str_pool); + vec->str_pool = new_pool; + return vec; +} + +/* -------------------------------------------------------------------------- + * String pool dead-byte tracking + * + * Dead bytes are stored as a uint32_t in the pool block's nullmap[0..3], + * which is otherwise unused (the pool is a raw CHAR vector). + * -------------------------------------------------------------------------- */ + +static inline uint32_t str_pool_dead(ray_t* vec) { + if (!vec->str_pool) return 0; + uint32_t d; + memcpy(&d, vec->str_pool->nullmap, 4); + return d; +} + +static inline void str_pool_add_dead(ray_t* vec, uint32_t bytes) { + uint32_t d = str_pool_dead(vec); + d = (d > UINT32_MAX - bytes) ? UINT32_MAX : d + bytes; + memcpy(vec->str_pool->nullmap, &d, 4); +} + +/* -------------------------------------------------------------------------- + * ray_str_vec_append — append a string to a RAY_STR vector + * + * Strings <= 12 bytes are inlined in the ray_str_t element. + * Strings > 12 bytes store a 4-byte prefix + offset into a growable pool. + * -------------------------------------------------------------------------- */ + +ray_t* ray_str_vec_append(ray_t* vec, const char* s, size_t len) { + if (!vec || RAY_IS_ERR(vec)) return vec; + if (vec->type != RAY_STR) return ray_error("type", NULL); + if (len > UINT32_MAX) return ray_error("range", NULL); + + ray_t* original = vec; + vec = ray_cow(vec); + if (!vec || RAY_IS_ERR(vec)) return vec; + if (!str_pool_cow(vec)) goto fail_oom; + + int64_t pool_off = 0; + if (len > RAY_STR_INLINE_MAX) { + if (!vec->str_pool) { + size_t init_pool = len < 256 ? 256 : len * 2; + vec->str_pool = ray_alloc(init_pool); + if (!vec->str_pool || RAY_IS_ERR(vec->str_pool)) { + vec->str_pool = NULL; + goto fail_oom; + } + vec->str_pool->type = RAY_U8; + vec->str_pool->len = 0; + } + + int64_t pool_used = vec->str_pool->len; + size_t pool_cap = ((size_t)1 << vec->str_pool->order) - 32; + if ((size_t)pool_used + len > pool_cap) { + size_t need = (size_t)pool_used + len; + size_t new_cap = pool_cap; + if (new_cap == 0) new_cap = 256; + while (new_cap < need) { + if (new_cap > SIZE_MAX / 2) goto fail_oom; + new_cap *= 2; + } + ray_t* np = ray_scratch_realloc(vec->str_pool, new_cap); + if (!np || RAY_IS_ERR(np)) goto fail_oom; + vec->str_pool = np; + } + + if ((uint64_t)pool_used > UINT32_MAX) goto fail_range; + pool_off = pool_used; + } + + /* Grow element array if needed — pool is already ready */ + int64_t cap = vec_capacity(vec); + if (vec->len >= cap) { + size_t new_data_size = (size_t)(vec->len + 1) * sizeof(ray_str_t); + if (new_data_size < 32) new_data_size = 32; + else { + size_t s2 = 32; + while (s2 < new_data_size) { + if (s2 > SIZE_MAX / 2) goto fail_oom; + s2 *= 2; + } + new_data_size = s2; + } + ray_t* nv = ray_scratch_realloc(vec, new_data_size); + if (!nv || RAY_IS_ERR(nv)) goto fail_oom; + vec = nv; + } + + ray_str_t* elem = &((ray_str_t*)ray_data(vec))[vec->len]; + memset(elem, 0, sizeof(ray_str_t)); + elem->len = (uint32_t)len; + + if (len <= RAY_STR_INLINE_MAX) { + if (len > 0) memcpy(elem->data, s, len); + } else { + /* Copy string into pool (already allocated above) */ + char* pool_base = (char*)ray_data(vec->str_pool); + memcpy(pool_base + pool_off, s, len); + + memcpy(elem->prefix, s, 4); + elem->pool_off = (uint32_t)pool_off; + vec->str_pool->len = pool_off + (int64_t)len; + } + + vec->len++; + return vec; + +fail_oom: + if (vec != original) ray_release(vec); + return ray_error("oom", NULL); +fail_range: + if (vec != original) ray_release(vec); + return ray_error("range", NULL); +} + +/* -------------------------------------------------------------------------- + * ray_str_vec_get — read a string from a RAY_STR vector by index + * + * Returns a pointer to the string data (inline or pool) and sets *out_len. + * Returns NULL for invalid input or out-of-bounds index. + * -------------------------------------------------------------------------- */ + +const char* ray_str_vec_get(ray_t* vec, int64_t idx, size_t* out_len) { + if (out_len) *out_len = 0; + if (!vec || RAY_IS_ERR(vec) || vec->type != RAY_STR) return NULL; + if (idx < 0 || idx >= vec->len) return NULL; + + /* Slice: redirect to parent */ + ray_t* data_owner = vec; + int64_t data_idx = idx; + if (vec->attrs & RAY_ATTR_SLICE) { + data_owner = vec->slice_parent; + data_idx = vec->slice_offset + idx; + } + + const ray_str_t* elem = &((const ray_str_t*)ray_data(data_owner))[data_idx]; + if (out_len) *out_len = elem->len; + + if (elem->len == 0) return ""; + if (ray_str_is_inline(elem)) return elem->data; + + /* Pooled: resolve via pool */ + if (!data_owner->str_pool) return NULL; + return (const char*)ray_data(data_owner->str_pool) + elem->pool_off; +} + +/* -------------------------------------------------------------------------- + * ray_str_vec_set — update string at index in a RAY_STR vector + * + * Overwrites element at idx. Old pooled bytes become dead space (reclaimed + * by ray_str_vec_compact). New pooled strings are appended to the pool. + * -------------------------------------------------------------------------- */ + +ray_t* ray_str_vec_set(ray_t* vec, int64_t idx, const char* s, size_t len) { + if (!vec || RAY_IS_ERR(vec)) return vec; + if (vec->type != RAY_STR) return ray_error("type", NULL); + if (idx < 0 || idx >= vec->len) return ray_error("range", NULL); + if (len > UINT32_MAX) return ray_error("range", NULL); + + ray_t* original = vec; + vec = ray_cow(vec); + if (!vec || RAY_IS_ERR(vec)) return vec; + if (!str_pool_cow(vec)) goto fail_oom; + + ray_str_t* elem = &((ray_str_t*)ray_data(vec))[idx]; + + if (len <= RAY_STR_INLINE_MAX) { + /* Track dead bytes if old string was pooled */ + if (!ray_str_is_inline(elem) && elem->len > 0 && vec->str_pool) { + str_pool_add_dead(vec, elem->len); + } + memset(elem, 0, sizeof(ray_str_t)); + elem->len = (uint32_t)len; + if (len > 0) memcpy(elem->data, s, len); + } else { + if (!vec->str_pool) { + size_t init_pool = len < 256 ? 256 : len * 2; + vec->str_pool = ray_alloc(init_pool); + if (!vec->str_pool || RAY_IS_ERR(vec->str_pool)) { + vec->str_pool = NULL; + goto fail_oom; + } + vec->str_pool->type = RAY_U8; + vec->str_pool->len = 0; + } + + /* Grow pool if needed */ + int64_t pool_used = vec->str_pool->len; + size_t pool_cap = ((size_t)1 << vec->str_pool->order) - 32; + if ((size_t)pool_used + len > pool_cap) { + size_t need = (size_t)pool_used + len; + size_t new_cap = pool_cap; + if (new_cap == 0) new_cap = 256; + while (new_cap < need) { + if (new_cap > SIZE_MAX / 2) goto fail_oom; + new_cap *= 2; + } + ray_t* np = ray_scratch_realloc(vec->str_pool, new_cap); + if (!np || RAY_IS_ERR(np)) goto fail_oom; + vec->str_pool = np; + } + + if ((uint64_t)pool_used > UINT32_MAX) goto fail_range; + + /* Pool alloc succeeded — now safe to modify the element */ + if (!ray_str_is_inline(elem) && elem->len > 0 && vec->str_pool) { + str_pool_add_dead(vec, elem->len); + } + + char* pool_base = (char*)ray_data(vec->str_pool); + memcpy(pool_base + pool_used, s, len); + memset(elem, 0, sizeof(ray_str_t)); + elem->len = (uint32_t)len; + memcpy(elem->prefix, s, 4); + elem->pool_off = (uint32_t)pool_used; + vec->str_pool->len = pool_used + (int64_t)len; + } + + return vec; + +fail_oom: + if (vec != original) ray_release(vec); + return ray_error("oom", NULL); +fail_range: + if (vec != original) ray_release(vec); + return ray_error("range", NULL); +} + +/* -------------------------------------------------------------------------- + * ray_str_vec_insert_at — insert a single string at position idx. + * + * Wraps (s, len) into a 1-element RAY_STR vector and delegates to + * ray_vec_insert_vec_at, which handles pool merging via ray_vec_concat. + * -------------------------------------------------------------------------- */ + +ray_t* ray_str_vec_insert_at(ray_t* vec, int64_t idx, const char* s, size_t len) { + if (!vec || RAY_IS_ERR(vec)) return vec; + if (vec->type != RAY_STR) return ray_error("type", NULL); + if (idx < 0 || idx > vec->len) return ray_error("range", NULL); + + ray_t* tmp = ray_vec_new(RAY_STR, 1); + if (!tmp || RAY_IS_ERR(tmp)) return tmp ? tmp : ray_error("oom", NULL); + + ray_t* tmp2 = ray_str_vec_append(tmp, s, len); + if (!tmp2 || RAY_IS_ERR(tmp2)) { ray_release(tmp); return tmp2 ? tmp2 : ray_error("oom", NULL); } + + ray_t* result = ray_vec_insert_vec_at(vec, idx, tmp2); + ray_release(tmp2); + return result; +} + +/* -------------------------------------------------------------------------- + * ray_str_vec_compact — reclaim dead pool space + * + * Allocates a fresh pool containing only live pooled strings, updates + * element offsets, and releases the old pool. + * -------------------------------------------------------------------------- */ + +ray_t* ray_str_vec_compact(ray_t* vec) { + if (!vec || RAY_IS_ERR(vec)) return vec; + if (vec->type != RAY_STR) return ray_error("type", NULL); + if (!vec->str_pool || str_pool_dead(vec) == 0) return vec; + + ray_t* original = vec; + vec = ray_cow(vec); + if (!vec || RAY_IS_ERR(vec)) return vec; + if (!str_pool_cow(vec)) { + if (vec != original) ray_release(vec); + return ray_error("oom", NULL); + } + + /* Compute true live size by scanning elements — avoids overflow when + * the dead-byte counter (uint32_t) has saturated at UINT32_MAX. */ + ray_str_t* elems = (ray_str_t*)ray_data(vec); + size_t live_size = 0; + for (int64_t i = 0; i < vec->len; i++) { + if (ray_vec_is_null(vec, i) || ray_str_is_inline(&elems[i]) || elems[i].len == 0) continue; + live_size += elems[i].len; + } + + if (live_size == 0) { + ray_release(vec->str_pool); + vec->str_pool = NULL; + return vec; + } + + ray_t* new_pool = ray_alloc(live_size); + if (!new_pool || RAY_IS_ERR(new_pool)) return vec; + new_pool->type = RAY_U8; + new_pool->len = 0; + memset(new_pool->nullmap, 0, 16); + + char* old_base = (char*)ray_data(vec->str_pool); + char* new_base = (char*)ray_data(new_pool); + uint32_t write_off = 0; + + for (int64_t i = 0; i < vec->len; i++) { + if (ray_vec_is_null(vec, i) || ray_str_is_inline(&elems[i]) || elems[i].len == 0) continue; + + uint32_t slen = elems[i].len; + memcpy(new_base + write_off, old_base + elems[i].pool_off, slen); + elems[i].pool_off = write_off; + write_off += slen; + } + + new_pool->len = (int64_t)write_off; + ray_release(vec->str_pool); + vec->str_pool = new_pool; + + return vec; +} + +/* -------------------------------------------------------------------------- + * ray_embedding_new — create a flat F32 vector for N*D embedding storage + * -------------------------------------------------------------------------- */ + +ray_t* ray_embedding_new(int64_t nrows, int32_t dim) { + int64_t total = nrows * (int64_t)dim; + ray_t* v = ray_vec_new(RAY_F32, total); + if (!v || RAY_IS_ERR(v)) return v; + v->len = total; + return v; +} + +bool ray_vec_is_null(ray_t* vec, int64_t idx) { + if (!vec || RAY_IS_ERR(vec)) return false; + if (idx < 0 || idx >= vec->len) return false; + + /* Slice: delegate to parent with adjusted index */ + if (vec->attrs & RAY_ATTR_SLICE) { + ray_t* parent = vec->slice_parent; + int64_t pidx = vec->slice_offset + idx; + return ray_vec_is_null(parent, pidx); + } + + if (!vec_any_nulls(vec)) return false; + + ray_t* ext = NULL; + const uint8_t* inline_bits = vec_inline_nullmap(vec, &ext); + if (ext) { + int64_t byte_idx = idx / 8; + if (byte_idx >= ext->len) return false; + const uint8_t* bits = (const uint8_t*)ray_data(ext); + return (bits[byte_idx] >> (idx % 8)) & 1; + } + + /* Inline nullmap path. RAY_STR's inline 16 bytes hold str_pool/str_ext_null + * (or, when an index is attached, were the same and are now in the index + * snapshot). Either way, RAY_STR uses ext nullmap exclusively for its + * null bits, which is handled above; if the inline path is taken for + * RAY_STR, no nulls are present. */ + if (vec->type == RAY_STR) return false; + if (idx >= 128) return false; + int byte_idx = (int)(idx / 8); + int bit_idx = (int)(idx % 8); + return (inline_bits[byte_idx] >> bit_idx) & 1; +} + +/* -------------------------------------------------------------------------- + * ray_vec_copy_nulls — bulk-copy null bitmap from src to dst + * + * dst must have the same len as src (or at least as many elements). + * Handles inline, external, and slice source bitmaps. + * -------------------------------------------------------------------------- */ + +ray_err_t ray_vec_copy_nulls(ray_t* dst, const ray_t* src) { + if (!dst || !src) return RAY_ERR_TYPE; + + /* Use ray_vec_is_null which handles slices, inline, and external bitmaps + * transparently. For non-null sources this returns immediately. */ + bool has_any = false; + if (src->attrs & RAY_ATTR_SLICE) { + const ray_t* parent = src->slice_parent; + if (parent && (parent->attrs & RAY_ATTR_HAS_NULLS)) has_any = true; + } else { + if (src->attrs & RAY_ATTR_HAS_NULLS) has_any = true; + } + if (!has_any) return RAY_OK; + + for (int64_t i = 0; i < dst->len && i < src->len; i++) { + if (ray_vec_is_null((ray_t*)src, i)) { + ray_err_t err = ray_vec_set_null_checked(dst, i, true); + if (err != RAY_OK) return err; + } + } + return RAY_OK; +} diff --git a/crates/rayforce-sys/vendor/rayforce/src/vec/vec.h b/crates/rayforce-sys/vendor/rayforce/src/vec/vec.h new file mode 100644 index 0000000..15d670e --- /dev/null +++ b/crates/rayforce-sys/vendor/rayforce/src/vec/vec.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2025-2026 Anton Kundenko + * All rights reserved. + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RAY_VEC_H +#define RAY_VEC_H + +/* + * vec.h -- Vector operations. + * + * Vectors are ray_t blocks with positive type tags. Data follows the 32-byte + * header. Supports append, get, set, slice (zero-copy), concat, and nullable + * bitmap (inline for <=128 elements, external for >128). + */ + +#include + +/* Copy null bitmap from src to dst (handles slices, inline, external). + * dst and src must have the same length. Internal helper. */ +ray_err_t ray_vec_copy_nulls(ray_t* dst, const ray_t* src); + +/* Return a pointer to the effective null bitmap bytes for `v`, accounting + * for slice / external / inline / HAS_INDEX storage forms. Returns NULL + * when `v` has no nulls (caller should gate on `v->attrs & RAY_ATTR_HAS_NULLS` + * before calling for the cheap fast-path). + * + * On return: + * *bit_offset_out (if non-NULL): bit-offset within the returned buffer + * that corresponds to v's row 0. Non-zero only for slices. + * *len_bits_out (if non-NULL): total bits addressable in the buffer. + * For inline, this is 128. For external, it's the ext->len * 8. + * + * The returned pointer is valid as long as `v` (and its ext_nullmap / + * attached index ray_t, if any) are not released or mutated. */ +const uint8_t* ray_vec_nullmap_bytes(const ray_t* v, + int64_t* bit_offset_out, + int64_t* len_bits_out); + +#endif /* RAY_VEC_H */ diff --git a/crates/raysense-cli/Cargo.toml b/crates/raysense-cli/Cargo.toml index d92e2de..1ed1bc8 100644 --- a/crates/raysense-cli/Cargo.toml +++ b/crates/raysense-cli/Cargo.toml @@ -33,10 +33,14 @@ path = "src/main.rs" [dependencies] anyhow.workspace = true +axum = "0.7" clap.workspace = true rayforce-sys = { path = "../rayforce-sys", version = "0.1.0" } raysense-core = { path = "../raysense-core", version = "0.1.0" } raysense-memory = { path = "../raysense-memory", version = "0.1.0" } serde.workspace = true serde_json.workspace = true +sha2 = "0.10" +tokio = { version = "1", features = ["rt-multi-thread", "macros", "sync", "time", "signal"] } +tokio-stream = { version = "0.1", features = ["sync"] } toml = "1.1.2" diff --git a/crates/raysense-cli/src/lib.rs b/crates/raysense-cli/src/lib.rs index 6522996..0c54602 100644 --- a/crates/raysense-cli/src/lib.rs +++ b/crates/raysense-cli/src/lib.rs @@ -113,17 +113,18 @@ enum Command { #[arg(long)] config: Option, }, + /// Start a live UI server. The page subscribes to server-sent events and + /// reloads when the scan content hash changes — never on a fixed timer. + /// Single source of UI; no static HTML export. Visualize { #[arg(default_value = ".")] path: PathBuf, - #[arg(long)] - output: Option, - #[arg(long)] - watch: bool, #[arg(long, default_value_t = 2)] interval: u64, #[arg(long)] config: Option, + #[arg(long, default_value_t = 7000)] + port: u16, }, Plugin { #[command(subcommand)] @@ -374,11 +375,10 @@ pub fn run() -> Result<()> { } => watch_project(&path, config.as_deref(), interval)?, Command::Visualize { path, - output, - watch, interval, config, - } => visualize_project(&path, output, config.as_deref(), watch, interval)?, + port, + } => serve_visualization(&path, config.as_deref(), interval, port)?, Command::Plugin { command } => match command { PluginCommand::List { path, config } => list_plugins(&path, config.as_deref())?, PluginCommand::Add { @@ -731,36 +731,167 @@ fn watch_project(root: &Path, config_path: Option<&Path>, interval: u64) -> Resu } } -fn visualize_project( +/// Run a tokio HTTP server that hosts the live visualization. The server +/// re-scans on a fixed interval, only emits an SSE `data-changed` event when +/// the new snapshot's content hash differs from the previous one, and serves +/// the HTML page without any meta-refresh. Browsers connected to `/events` +/// reload the page on each change; other state (filter selections, scroll, +/// expanded panels) survives whenever data didn't actually change. +fn serve_visualization( root: &Path, - output: Option, config_path: Option<&Path>, - watch: bool, interval: u64, + port: u16, ) -> Result<()> { - let output = output.unwrap_or_else(|| root.join(".raysense/visualization.html")); - if let Some(parent) = output.parent() { - fs::create_dir_all(parent) - .with_context(|| format!("failed to create {}", parent.display()))?; - } - loop { - let config = config_for_root(root, config_path)?; - let report = scan_path_with_config(root, &config)?; - let health = compute_health_with_config(&report, &config); - fs::write(&output, visualization_html(&report, &health)) - .with_context(|| format!("failed to write {}", output.display()))?; + let root = root.to_path_buf(); + let config_path = config_path.map(Path::to_path_buf); + let interval = interval.max(1); + + let runtime = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .context("failed to start tokio runtime")?; + + runtime.block_on(async move { + use axum::{ + response::sse::{Event, KeepAlive, Sse}, + response::{Html, IntoResponse}, + routing::get, + Json, Router, + }; + use std::sync::Arc; + use tokio::sync::{broadcast, RwLock}; + use tokio_stream::wrappers::BroadcastStream; + use tokio_stream::StreamExt; + + let initial = scan_now(&root, config_path.as_deref())?; + let state = Arc::new(LiveState { + inner: RwLock::new(initial), + tx: broadcast::channel::<()>(16).0, + }); + + let scanner_state = state.clone(); + let scanner_root = root.clone(); + let scanner_config = config_path.clone(); + tokio::spawn(async move { + let mut ticker = tokio::time::interval(std::time::Duration::from_secs(interval)); + ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); + ticker.tick().await; // first tick fires immediately; we already scanned. + loop { + ticker.tick().await; + let scan = match tokio::task::spawn_blocking({ + let root = scanner_root.clone(); + let cfg = scanner_config.clone(); + move || scan_now(&root, cfg.as_deref()) + }) + .await + { + Ok(Ok(snap)) => snap, + Ok(Err(err)) => { + eprintln!("rescan failed: {err}"); + continue; + } + Err(err) => { + eprintln!("rescan task panicked: {err}"); + continue; + } + }; + let mut current = scanner_state.inner.write().await; + if current.hash != scan.hash { + *current = scan; + let _ = scanner_state.tx.send(()); + } + } + }); + + let html_state = state.clone(); + let data_state = state.clone(); + let events_state = state.clone(); + + let app = Router::new() + .route( + "/", + get(move || async move { + let snap = html_state.inner.read().await; + Html(snap.html.clone()).into_response() + }), + ) + .route( + "/data", + get(move || async move { + let snap = data_state.inner.read().await; + Json(snap.payload.clone()).into_response() + }), + ) + .route( + "/events", + get(move || async move { + let rx = events_state.tx.subscribe(); + let stream = BroadcastStream::new(rx).map(|item| match item { + Ok(()) => Ok(Event::default().event("data-changed")), + Err(_) => Ok::<_, std::convert::Infallible>( + Event::default().event("data-changed"), + ), + }); + Sse::new(stream).keep_alive(KeepAlive::default()) + }), + ); + + let addr = std::net::SocketAddr::from(([127, 0, 0, 1], port)); + let listener = tokio::net::TcpListener::bind(addr) + .await + .with_context(|| format!("failed to bind {addr}"))?; println!( - "visualization {} snapshot={} quality_signal={}", - output.display(), - report.snapshot.snapshot_id, - health.quality_signal + "visualization http://{addr} interval={interval}s — Ctrl+C to stop", + addr = addr, + interval = interval, ); - if !watch { - break; - } - thread::sleep(Duration::from_secs(interval.max(1))); - } - Ok(()) + + axum::serve(listener, app) + .with_graceful_shutdown(async { + let _ = tokio::signal::ctrl_c().await; + }) + .await + .context("server error")?; + + Ok::<(), anyhow::Error>(()) + }) +} + +struct LiveState { + inner: tokio::sync::RwLock, + tx: tokio::sync::broadcast::Sender<()>, +} + +struct LiveSnapshot { + hash: String, + html: String, + payload: serde_json::Value, +} + +fn scan_now(root: &Path, config_path: Option<&Path>) -> Result { + use sha2::{Digest, Sha256}; + let config = config_for_root(root, config_path)?; + let report = scan_path_with_config(root, &config)?; + let health = compute_health_with_config(&report, &config); + let html = visualization_html(&report, &health); + let payload = serde_json::json!({ + "snapshot_id": report.snapshot.snapshot_id, + "score": health.score, + "quality_signal": health.quality_signal, + "files": report.files.len(), + "functions": report.functions.len(), + "rules": health.rules.len(), + }); + let mut hasher = Sha256::new(); + hasher.update(report.snapshot.snapshot_id.as_bytes()); + hasher.update(serde_json::to_vec(&payload).unwrap_or_default()); + let hash = format!("{:x}", hasher.finalize()); + Ok(LiveSnapshot { + hash, + html, + payload, + }) } fn visualization_html( @@ -1089,7 +1220,7 @@ fn visualization_html( .unwrap_or_else(|_| "{}".to_string()); format!( r#" -Raysense +Raysense