Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/ops/opt.c
Original file line number Diff line number Diff line change
Expand Up @@ -1537,6 +1537,14 @@ static ray_op_t* split_and_filter(ray_graph_t* g, ray_op_t* filter_node) {
ray_op_t* input = filter_node->inputs[0];
if (!pred_a || !pred_b || !input) return filter_node;

/* Don't split a FILTER whose child is a GROUP. Splitting an AND here
* produces FILTER(b, FILTER(a, GROUP)); exec's HAVING fusion swaps the
* table to the GROUP output only for a FILTER whose DIRECT child is GROUP,
* so the outer conjunct would evaluate against the base table (schema error
* on an agg-output ref, row-count mismatch on a key ref). Left intact, the
* single FILTER(AND, GROUP) is handled correctly by the HAVING fusion. */
if (input->opcode == OP_GROUP) return filter_node;

/* Save IDs before potential realloc */
uint32_t filter_id = filter_node->id;
uint32_t pred_a_id = pred_a->id;
Expand Down
193 changes: 129 additions & 64 deletions test/test_group_pushdown.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,15 @@ static bool plan_pushed(ray_graph_t* g, ray_op_t* root) {
root->inputs[0] && root->inputs[0]->opcode == OP_FILTER;
}

/* Un-split HAVING shape: root is FILTER whose direct child is GROUP and whose
* predicate is still an intact AND (the split would have made inputs[0] a FILTER). */
static bool plan_having_unsplit(ray_graph_t* g, ray_op_t* root) {
(void)g;
return root && root->opcode == OP_FILTER &&
root->inputs[0] && root->inputs[0]->opcode == OP_GROUP &&
root->inputs[1] && root->inputs[1]->opcode == OP_AND;
}

/* Pred on the AGG OUTPUT column — HAVING proper; must never push.
* After GROUP BY k, SUM(v), the agg output column is named "v_sum". */
static ray_op_t* pred_on_agg(ray_graph_t* g) {
Expand All @@ -91,6 +100,65 @@ static test_result_t test_having_on_agg_not_pushed(void) {
PASS();
}

/* Shape (a): HAVING AND where BOTH conjuncts reference the agg output v_sum.
* pred = AND(v_sum > 10, v_sum < 30). Neither conjunct is a group key, so
* pushdown is skipped; the AND must NOT be split over GROUP (split would make
* the outer conjunct evaluate against the base table → schema error).
* Per-group v_sum: 6,15,24,33 → 15 and 24 satisfy (>10 AND <30) → 2 rows. */
static test_result_t test_having_and_agg_executes(void) {
ray_heap_init();
ray_t* tbl = make_gp_table();
ray_graph_t* g = ray_graph_new(tbl);

ray_op_t* k = ray_scan(g, "k");
ray_op_t* v = ray_scan(g, "v");
ray_op_t* keys[] = {k};
uint16_t aops[] = {OP_SUM};
ray_op_t* ains[] = {v};
ray_op_t* grp = ray_group(g, keys, 1, aops, ains, 1);
ray_op_t* pred = ray_and(g,
ray_gt(g, ray_scan(g, "v_sum"), ray_const_i64(g, 10)),
ray_lt(g, ray_scan(g, "v_sum"), ray_const_i64(g, 30)));
ray_op_t* filt = ray_filter(g, grp, pred);
ray_op_t* root = ray_optimize(g, filt);

/* Guard keeps FILTER(AND, GROUP) intact — not a split chain. */
TEST_ASSERT(plan_having_unsplit(g, root),
"AND(agg,agg) over GROUP must stay un-split FILTER(AND, GROUP)");

ray_t* r = ray_execute(g, root);
TEST_ASSERT_FALSE(RAY_IS_ERR(r));
TEST_ASSERT_EQ_I(ray_table_nrows(r), 2); /* v_sum 15 and 24 */

ray_release(r); ray_graph_free(g); ray_release(tbl);
ray_sym_destroy(); ray_heap_destroy();
PASS();
}

/* Guard narrowness: an AND-filter whose child is NOT a GROUP must still split
* into a chain (filter reorder is unaffected away from GROUP). */
static test_result_t test_and_split_non_group_still_splits(void) {
ray_heap_init();
ray_t* tbl = make_gp_table();
ray_graph_t* g = ray_graph_new(tbl);

ray_op_t* base = ray_scan(g, "k"); /* non-GROUP data input */
ray_op_t* pred = ray_and(g,
ray_ge(g, ray_scan(g, "k"), ray_const_i64(g, 2)),
ray_le(g, ray_scan(g, "k"), ray_const_i64(g, 3)));
ray_op_t* filt = ray_filter(g, base, pred);
ray_op_t* root = ray_optimize(g, filt);

/* Split still happens: root is the outer FILTER, its input another FILTER. */
TEST_ASSERT(root && root->opcode == OP_FILTER, "root is FILTER");
TEST_ASSERT(root->inputs[0] && root->inputs[0]->opcode == OP_FILTER,
"non-GROUP AND-filter must still split into a chain");

ray_graph_free(g); ray_release(tbl);
ray_sym_destroy(); ray_heap_destroy();
PASS();
}

/* Hand-build the post-rewrite DAG (Task-3 optimizer shape) and execute:
* GROUP.inputs[0] = FILTER(k >= 3, const_table). Without the executor
* hook the filter never runs and all 4 groups appear. */
Expand Down Expand Up @@ -453,71 +521,43 @@ static test_result_t test_no_push_multi_consumer(void) {

/* c. AND predicate mixing a key-scan conjunct and an agg-output conjunct.
*
* pred = AND(k >= 3, v_sum > 10). collect_pred_scans traverses the full AND
* subtree and finds both `k` and `v_sum` scans. `v_sum` is not a group key
* (it is an agg output), so keys_only=false → pushdown skipped at pass 6.
*
* At pass 7 (filter_reorder) split_and_filter decomposes the AND-filter into
* two chained filters: FILTER(v_sum>10, FILTER(k>=3, GROUP(...))). Passes
* run once (ray_optimize makes a single linear sweep, pass 6 before pass 7),
* so the k>=3 filter is NOT re-attempted for pushdown after the split.
* The optimised shape is therefore FILTER(FILTER(GROUP)), not GROUP(FILTER).
* pred = AND(k >= 3, v_sum > 10). v_sum is an agg output, not a group key, so
* keys_only=false → pushdown skipped at pass 6 (root stays FILTER, not GROUP).
*
* Execution note: the outer FILTER evaluates its predicate (v_sum > 10) via
* exec_node, which reads SCAN nodes from g->table (the original table). The
* original table has no `v_sum` column, so executing the OPTIMIZED plan
* returns a schema error. The plan-shape assert is the meaningful check for
* the optimised path.
*
* Value correctness is verified on the UN-OPTIMIZED plan (no ray_optimize
* call): FILTER(GROUP) preserves the single-filter exec path (line ~1230 in
* exec.c) that swaps g->table to the GROUP output before evaluating the pred.
* Values: k=3 sum=24, k=4 sum=33 both satisfy AND(k>=3, v_sum>10) → 2 rows. */
* At pass 7 the AND-filter sits directly over GROUP, so split_and_filter's
* GROUP guard leaves it intact: the optimised shape is FILTER(AND, GROUP).
* The executor's HAVING fusion swaps g->table to the GROUP output and
* evaluates the whole AND there: k=3 sum=24 and k=4 sum=33 satisfy both
* conjuncts → 2 rows. We assert this on the OPTIMISED plan. */
static test_result_t test_no_push_mixed_pred(void) {
ray_heap_init();
ray_t* tbl = make_gp_table();
ray_graph_t* g = ray_graph_new(tbl);

/* Plan-shape check: optimise and assert no push */
{
ray_graph_t* g = ray_graph_new(tbl);
ray_op_t* k = ray_scan(g, "k");
ray_op_t* v = ray_scan(g, "v");
ray_op_t* keys[] = {k};
uint16_t aops[] = {OP_SUM};
ray_op_t* ains[] = {v};
ray_op_t* grp = ray_group(g, keys, 1, aops, ains, 1);
ray_op_t* pred = ray_and(g,
ray_ge(g, ray_scan(g, "k"), ray_const_i64(g, 3)),
ray_gt(g, ray_scan(g, "v_sum"), ray_const_i64(g, 10)));
ray_op_t* filt = ray_filter(g, grp, pred);
ray_op_t* root = ray_optimize(g, filt);
TEST_ASSERT(!plan_pushed(g, root), "mixed-pred AND must not push");
ray_graph_free(g);
}
ray_op_t* k = ray_scan(g, "k");
ray_op_t* v = ray_scan(g, "v");
ray_op_t* keys[] = {k};
uint16_t aops[] = {OP_SUM};
ray_op_t* ains[] = {v};
ray_op_t* grp = ray_group(g, keys, 1, aops, ains, 1);
ray_op_t* pred = ray_and(g,
ray_ge(g, ray_scan(g, "k"), ray_const_i64(g, 3)),
ray_gt(g, ray_scan(g, "v_sum"), ray_const_i64(g, 10)));
ray_op_t* filt = ray_filter(g, grp, pred);
ray_op_t* root = ray_optimize(g, filt);

/* Value check: un-optimized FILTER(GROUP) preserves the executor's
* special HAVING path (filter_child->opcode == OP_GROUP) so scans in
* the pred correctly resolve against the GROUP output, not g->table. */
{
ray_graph_t* g = ray_graph_new(tbl);
ray_op_t* k = ray_scan(g, "k");
ray_op_t* v = ray_scan(g, "v");
ray_op_t* keys[] = {k};
uint16_t aops[] = {OP_SUM};
ray_op_t* ains[] = {v};
ray_op_t* grp = ray_group(g, keys, 1, aops, ains, 1);
ray_op_t* pred = ray_and(g,
ray_ge(g, ray_scan(g, "k"), ray_const_i64(g, 3)),
ray_gt(g, ray_scan(g, "v_sum"), ray_const_i64(g, 10)));
ray_op_t* filt = ray_filter(g, grp, pred);
ray_t* r = ray_execute(g, filt); /* no ray_optimize — keeps FILTER(GROUP) */
TEST_ASSERT_FALSE(RAY_IS_ERR(r));
/* k=3 sum=24, k=4 sum=33 both pass both conjuncts → 2 rows */
TEST_ASSERT_EQ_I(ray_table_nrows(r), 2);
ray_release(r); ray_graph_free(g);
}
/* Mixed AND is not pushable, and the GROUP guard prevents the split: the
* optimised shape is the intact FILTER(AND, GROUP). */
TEST_ASSERT(!plan_pushed(g, root), "mixed-pred AND must not push");
TEST_ASSERT(plan_having_unsplit(g, root),
"mixed-pred AND over GROUP must stay un-split FILTER(AND, GROUP)");

ray_release(tbl);
ray_t* r = ray_execute(g, root);
TEST_ASSERT_FALSE(RAY_IS_ERR(r));
/* k=3 sum=24, k=4 sum=33 satisfy AND(k>=3, v_sum>10) → 2 rows. */
TEST_ASSERT_EQ_I(ray_table_nrows(r), 2);

ray_release(r); ray_graph_free(g); ray_release(tbl);
ray_sym_destroy(); ray_heap_destroy();
PASS();
}
Expand Down Expand Up @@ -592,12 +632,10 @@ static test_result_t test_no_push_const_pred(void) {
* still the un-split AND and whose data input is the const-table (chain
* depth 1).
*
* The knob-based diff helper (diff_having) is NOT usable here: with
* pushdown disabled, pass-7 still splits the AND above GROUP into
* FILTER(k<=3, FILTER(k>=2, GROUP)); the outer filter's pred evaluates
* against g->table (12 rows) but its input is the 2-row group output —
* a row-count mismatch error (empirically verified, pre-existing and
* independent of the pushdown bug).
* With pushdown DISABLED the AND stays above GROUP as FILTER(AND, GROUP);
* split_and_filter's GROUP guard leaves it un-split, so the HAVING fusion
* evaluates the whole AND against the GROUP output and the disabled-pushdown
* path returns the same 2 rows as the pushed path (asserted below).
* =================================================================== */

/* (and (>= k 2) (<= k 3)) — keys-only AND, nominally groups k=2 and k=3 */
Expand Down Expand Up @@ -694,13 +732,40 @@ static test_result_t test_diff_and_of_keys(void) {
TEST_ASSERT_EQ_I(ray_vec_get_i64(sv, 0), 15); /* k=2 */
TEST_ASSERT_EQ_I(ray_vec_get_i64(sv, 1), 24); /* k=3 */

/* --- Disabled-pushdown path: FILTER(AND, GROUP) optimised; the GROUP
* guard suppresses the split, so this now executes correctly (it was the
* documented row-count-mismatch failure before the fix). --- */
ray_t* nopush_res = NULL;
bool nopush_unsplit = false;
{
ray_opt_no_group_pushdown = true;
ray_graph_t* g = ray_graph_new(tbl);
ray_op_t* root = build_having_plan(g, pred_key_and_range, NULL);
nopush_unsplit = plan_having_unsplit(g, root); /* capture before free */
ray_op_t* skeys[1] = {ray_scan(g, "k")};
uint8_t descs[1] = {0};
root = ray_sort_op(g, root, skeys, descs, NULL, 1);
nopush_res = ray_execute(g, root);
ray_graph_free(g);
ray_opt_no_group_pushdown = false; /* reset before any assert */
}
/* Asserts run only after the knob is restored — a failure here can't leak
* the disabled-pushdown state into later tests. */
TEST_ASSERT(nopush_unsplit,
"disabled-pushdown AND over GROUP must stay un-split FILTER(AND, GROUP)");
TEST_ASSERT_FALSE(RAY_IS_ERR(nopush_res));
TEST_ASSERT_EQ_I(ray_table_nrows(nopush_res), 2);
ray_release(nopush_res);

ray_release(pushed_res); ray_release(base_res);
ray_release(tbl); ray_sym_destroy(); ray_heap_destroy();
PASS();
}

const test_entry_t group_pushdown_entries[] = {
{ "group_pushdown/agg_pred_not_pushed", test_having_on_agg_not_pushed, NULL, NULL },
{ "group_pushdown/having_and_agg_exec", test_having_and_agg_executes, NULL, NULL },
{ "group_pushdown/and_split_non_group", test_and_split_non_group_still_splits, NULL, NULL },
{ "group_pushdown/exec_pushed_filter", test_exec_group_with_pushed_filter, NULL, NULL },
{ "group_pushdown/having_on_key_pushed", test_having_on_key_pushed, NULL, NULL },
{ "group_pushdown/diff_key_ge", test_diff_key_ge, NULL, NULL },
Expand Down
Loading