diff --git a/src/ops/opt.c b/src/ops/opt.c index 4567e936..11418c27 100644 --- a/src/ops/opt.c +++ b/src/ops/opt.c @@ -1537,6 +1537,14 @@ static ray_op_t* split_and_filter(ray_graph_t* g, ray_op_t* filter_node) { ray_op_t* input = filter_node->inputs[0]; if (!pred_a || !pred_b || !input) return filter_node; + /* Don't split a FILTER whose child is a GROUP. Splitting an AND here + * produces FILTER(b, FILTER(a, GROUP)); exec's HAVING fusion swaps the + * table to the GROUP output only for a FILTER whose DIRECT child is GROUP, + * so the outer conjunct would evaluate against the base table (schema error + * on an agg-output ref, row-count mismatch on a key ref). Left intact, the + * single FILTER(AND, GROUP) is handled correctly by the HAVING fusion. */ + if (input->opcode == OP_GROUP) return filter_node; + /* Save IDs before potential realloc */ uint32_t filter_id = filter_node->id; uint32_t pred_a_id = pred_a->id; diff --git a/test/test_group_pushdown.c b/test/test_group_pushdown.c index cf708201..e9a8d67e 100644 --- a/test/test_group_pushdown.c +++ b/test/test_group_pushdown.c @@ -71,6 +71,15 @@ static bool plan_pushed(ray_graph_t* g, ray_op_t* root) { root->inputs[0] && root->inputs[0]->opcode == OP_FILTER; } +/* Un-split HAVING shape: root is FILTER whose direct child is GROUP and whose + * predicate is still an intact AND (the split would have made inputs[0] a FILTER). */ +static bool plan_having_unsplit(ray_graph_t* g, ray_op_t* root) { + (void)g; + return root && root->opcode == OP_FILTER && + root->inputs[0] && root->inputs[0]->opcode == OP_GROUP && + root->inputs[1] && root->inputs[1]->opcode == OP_AND; +} + /* Pred on the AGG OUTPUT column — HAVING proper; must never push. * After GROUP BY k, SUM(v), the agg output column is named "v_sum". */ static ray_op_t* pred_on_agg(ray_graph_t* g) { @@ -91,6 +100,65 @@ static test_result_t test_having_on_agg_not_pushed(void) { PASS(); } +/* Shape (a): HAVING AND where BOTH conjuncts reference the agg output v_sum. + * pred = AND(v_sum > 10, v_sum < 30). Neither conjunct is a group key, so + * pushdown is skipped; the AND must NOT be split over GROUP (split would make + * the outer conjunct evaluate against the base table → schema error). + * Per-group v_sum: 6,15,24,33 → 15 and 24 satisfy (>10 AND <30) → 2 rows. */ +static test_result_t test_having_and_agg_executes(void) { + ray_heap_init(); + ray_t* tbl = make_gp_table(); + ray_graph_t* g = ray_graph_new(tbl); + + ray_op_t* k = ray_scan(g, "k"); + ray_op_t* v = ray_scan(g, "v"); + ray_op_t* keys[] = {k}; + uint16_t aops[] = {OP_SUM}; + ray_op_t* ains[] = {v}; + ray_op_t* grp = ray_group(g, keys, 1, aops, ains, 1); + ray_op_t* pred = ray_and(g, + ray_gt(g, ray_scan(g, "v_sum"), ray_const_i64(g, 10)), + ray_lt(g, ray_scan(g, "v_sum"), ray_const_i64(g, 30))); + ray_op_t* filt = ray_filter(g, grp, pred); + ray_op_t* root = ray_optimize(g, filt); + + /* Guard keeps FILTER(AND, GROUP) intact — not a split chain. */ + TEST_ASSERT(plan_having_unsplit(g, root), + "AND(agg,agg) over GROUP must stay un-split FILTER(AND, GROUP)"); + + ray_t* r = ray_execute(g, root); + TEST_ASSERT_FALSE(RAY_IS_ERR(r)); + TEST_ASSERT_EQ_I(ray_table_nrows(r), 2); /* v_sum 15 and 24 */ + + ray_release(r); ray_graph_free(g); ray_release(tbl); + ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + +/* Guard narrowness: an AND-filter whose child is NOT a GROUP must still split + * into a chain (filter reorder is unaffected away from GROUP). */ +static test_result_t test_and_split_non_group_still_splits(void) { + ray_heap_init(); + ray_t* tbl = make_gp_table(); + ray_graph_t* g = ray_graph_new(tbl); + + ray_op_t* base = ray_scan(g, "k"); /* non-GROUP data input */ + ray_op_t* pred = ray_and(g, + ray_ge(g, ray_scan(g, "k"), ray_const_i64(g, 2)), + ray_le(g, ray_scan(g, "k"), ray_const_i64(g, 3))); + ray_op_t* filt = ray_filter(g, base, pred); + ray_op_t* root = ray_optimize(g, filt); + + /* Split still happens: root is the outer FILTER, its input another FILTER. */ + TEST_ASSERT(root && root->opcode == OP_FILTER, "root is FILTER"); + TEST_ASSERT(root->inputs[0] && root->inputs[0]->opcode == OP_FILTER, + "non-GROUP AND-filter must still split into a chain"); + + ray_graph_free(g); ray_release(tbl); + ray_sym_destroy(); ray_heap_destroy(); + PASS(); +} + /* Hand-build the post-rewrite DAG (Task-3 optimizer shape) and execute: * GROUP.inputs[0] = FILTER(k >= 3, const_table). Without the executor * hook the filter never runs and all 4 groups appear. */ @@ -453,71 +521,43 @@ static test_result_t test_no_push_multi_consumer(void) { /* c. AND predicate mixing a key-scan conjunct and an agg-output conjunct. * - * pred = AND(k >= 3, v_sum > 10). collect_pred_scans traverses the full AND - * subtree and finds both `k` and `v_sum` scans. `v_sum` is not a group key - * (it is an agg output), so keys_only=false → pushdown skipped at pass 6. - * - * At pass 7 (filter_reorder) split_and_filter decomposes the AND-filter into - * two chained filters: FILTER(v_sum>10, FILTER(k>=3, GROUP(...))). Passes - * run once (ray_optimize makes a single linear sweep, pass 6 before pass 7), - * so the k>=3 filter is NOT re-attempted for pushdown after the split. - * The optimised shape is therefore FILTER(FILTER(GROUP)), not GROUP(FILTER). + * pred = AND(k >= 3, v_sum > 10). v_sum is an agg output, not a group key, so + * keys_only=false → pushdown skipped at pass 6 (root stays FILTER, not GROUP). * - * Execution note: the outer FILTER evaluates its predicate (v_sum > 10) via - * exec_node, which reads SCAN nodes from g->table (the original table). The - * original table has no `v_sum` column, so executing the OPTIMIZED plan - * returns a schema error. The plan-shape assert is the meaningful check for - * the optimised path. - * - * Value correctness is verified on the UN-OPTIMIZED plan (no ray_optimize - * call): FILTER(GROUP) preserves the single-filter exec path (line ~1230 in - * exec.c) that swaps g->table to the GROUP output before evaluating the pred. - * Values: k=3 sum=24, k=4 sum=33 both satisfy AND(k>=3, v_sum>10) → 2 rows. */ + * At pass 7 the AND-filter sits directly over GROUP, so split_and_filter's + * GROUP guard leaves it intact: the optimised shape is FILTER(AND, GROUP). + * The executor's HAVING fusion swaps g->table to the GROUP output and + * evaluates the whole AND there: k=3 sum=24 and k=4 sum=33 satisfy both + * conjuncts → 2 rows. We assert this on the OPTIMISED plan. */ static test_result_t test_no_push_mixed_pred(void) { ray_heap_init(); ray_t* tbl = make_gp_table(); + ray_graph_t* g = ray_graph_new(tbl); - /* Plan-shape check: optimise and assert no push */ - { - ray_graph_t* g = ray_graph_new(tbl); - ray_op_t* k = ray_scan(g, "k"); - ray_op_t* v = ray_scan(g, "v"); - ray_op_t* keys[] = {k}; - uint16_t aops[] = {OP_SUM}; - ray_op_t* ains[] = {v}; - ray_op_t* grp = ray_group(g, keys, 1, aops, ains, 1); - ray_op_t* pred = ray_and(g, - ray_ge(g, ray_scan(g, "k"), ray_const_i64(g, 3)), - ray_gt(g, ray_scan(g, "v_sum"), ray_const_i64(g, 10))); - ray_op_t* filt = ray_filter(g, grp, pred); - ray_op_t* root = ray_optimize(g, filt); - TEST_ASSERT(!plan_pushed(g, root), "mixed-pred AND must not push"); - ray_graph_free(g); - } + ray_op_t* k = ray_scan(g, "k"); + ray_op_t* v = ray_scan(g, "v"); + ray_op_t* keys[] = {k}; + uint16_t aops[] = {OP_SUM}; + ray_op_t* ains[] = {v}; + ray_op_t* grp = ray_group(g, keys, 1, aops, ains, 1); + ray_op_t* pred = ray_and(g, + ray_ge(g, ray_scan(g, "k"), ray_const_i64(g, 3)), + ray_gt(g, ray_scan(g, "v_sum"), ray_const_i64(g, 10))); + ray_op_t* filt = ray_filter(g, grp, pred); + ray_op_t* root = ray_optimize(g, filt); - /* Value check: un-optimized FILTER(GROUP) preserves the executor's - * special HAVING path (filter_child->opcode == OP_GROUP) so scans in - * the pred correctly resolve against the GROUP output, not g->table. */ - { - ray_graph_t* g = ray_graph_new(tbl); - ray_op_t* k = ray_scan(g, "k"); - ray_op_t* v = ray_scan(g, "v"); - ray_op_t* keys[] = {k}; - uint16_t aops[] = {OP_SUM}; - ray_op_t* ains[] = {v}; - ray_op_t* grp = ray_group(g, keys, 1, aops, ains, 1); - ray_op_t* pred = ray_and(g, - ray_ge(g, ray_scan(g, "k"), ray_const_i64(g, 3)), - ray_gt(g, ray_scan(g, "v_sum"), ray_const_i64(g, 10))); - ray_op_t* filt = ray_filter(g, grp, pred); - ray_t* r = ray_execute(g, filt); /* no ray_optimize — keeps FILTER(GROUP) */ - TEST_ASSERT_FALSE(RAY_IS_ERR(r)); - /* k=3 sum=24, k=4 sum=33 both pass both conjuncts → 2 rows */ - TEST_ASSERT_EQ_I(ray_table_nrows(r), 2); - ray_release(r); ray_graph_free(g); - } + /* Mixed AND is not pushable, and the GROUP guard prevents the split: the + * optimised shape is the intact FILTER(AND, GROUP). */ + TEST_ASSERT(!plan_pushed(g, root), "mixed-pred AND must not push"); + TEST_ASSERT(plan_having_unsplit(g, root), + "mixed-pred AND over GROUP must stay un-split FILTER(AND, GROUP)"); - ray_release(tbl); + ray_t* r = ray_execute(g, root); + TEST_ASSERT_FALSE(RAY_IS_ERR(r)); + /* k=3 sum=24, k=4 sum=33 satisfy AND(k>=3, v_sum>10) → 2 rows. */ + TEST_ASSERT_EQ_I(ray_table_nrows(r), 2); + + ray_release(r); ray_graph_free(g); ray_release(tbl); ray_sym_destroy(); ray_heap_destroy(); PASS(); } @@ -592,12 +632,10 @@ static test_result_t test_no_push_const_pred(void) { * still the un-split AND and whose data input is the const-table (chain * depth 1). * - * The knob-based diff helper (diff_having) is NOT usable here: with - * pushdown disabled, pass-7 still splits the AND above GROUP into - * FILTER(k<=3, FILTER(k>=2, GROUP)); the outer filter's pred evaluates - * against g->table (12 rows) but its input is the 2-row group output — - * a row-count mismatch error (empirically verified, pre-existing and - * independent of the pushdown bug). + * With pushdown DISABLED the AND stays above GROUP as FILTER(AND, GROUP); + * split_and_filter's GROUP guard leaves it un-split, so the HAVING fusion + * evaluates the whole AND against the GROUP output and the disabled-pushdown + * path returns the same 2 rows as the pushed path (asserted below). * =================================================================== */ /* (and (>= k 2) (<= k 3)) — keys-only AND, nominally groups k=2 and k=3 */ @@ -694,6 +732,31 @@ static test_result_t test_diff_and_of_keys(void) { TEST_ASSERT_EQ_I(ray_vec_get_i64(sv, 0), 15); /* k=2 */ TEST_ASSERT_EQ_I(ray_vec_get_i64(sv, 1), 24); /* k=3 */ + /* --- Disabled-pushdown path: FILTER(AND, GROUP) optimised; the GROUP + * guard suppresses the split, so this now executes correctly (it was the + * documented row-count-mismatch failure before the fix). --- */ + ray_t* nopush_res = NULL; + bool nopush_unsplit = false; + { + ray_opt_no_group_pushdown = true; + ray_graph_t* g = ray_graph_new(tbl); + ray_op_t* root = build_having_plan(g, pred_key_and_range, NULL); + nopush_unsplit = plan_having_unsplit(g, root); /* capture before free */ + ray_op_t* skeys[1] = {ray_scan(g, "k")}; + uint8_t descs[1] = {0}; + root = ray_sort_op(g, root, skeys, descs, NULL, 1); + nopush_res = ray_execute(g, root); + ray_graph_free(g); + ray_opt_no_group_pushdown = false; /* reset before any assert */ + } + /* Asserts run only after the knob is restored — a failure here can't leak + * the disabled-pushdown state into later tests. */ + TEST_ASSERT(nopush_unsplit, + "disabled-pushdown AND over GROUP must stay un-split FILTER(AND, GROUP)"); + TEST_ASSERT_FALSE(RAY_IS_ERR(nopush_res)); + TEST_ASSERT_EQ_I(ray_table_nrows(nopush_res), 2); + ray_release(nopush_res); + ray_release(pushed_res); ray_release(base_res); ray_release(tbl); ray_sym_destroy(); ray_heap_destroy(); PASS(); @@ -701,6 +764,8 @@ static test_result_t test_diff_and_of_keys(void) { const test_entry_t group_pushdown_entries[] = { { "group_pushdown/agg_pred_not_pushed", test_having_on_agg_not_pushed, NULL, NULL }, + { "group_pushdown/having_and_agg_exec", test_having_and_agg_executes, NULL, NULL }, + { "group_pushdown/and_split_non_group", test_and_split_non_group_still_splits, NULL, NULL }, { "group_pushdown/exec_pushed_filter", test_exec_group_with_pushed_filter, NULL, NULL }, { "group_pushdown/having_on_key_pushed", test_having_on_key_pushed, NULL, NULL }, { "group_pushdown/diff_key_ge", test_diff_key_ge, NULL, NULL },