From fe928f5c79b91402ff6fba42892f86609599bd62 Mon Sep 17 00:00:00 2001
From: STWang <STWang@node0.stwang-297991.cloudmigration-pg0.utah.cloudlab.us>
Date: Thu, 2 Apr 2026 15:42:51 -0600
Subject: [PATCH 01/10] initial

---
 .../execution-utilities/benchmark/README.md   | 344 ++++++++++++++
 .../configs/clickbench_hits_init.sql          | 115 +++++
 .../configs/clickbench_inference.yaml         |  21 +
 .../configs/clickbench_streaming.yaml         |  26 ++
 .../benchmark/configs/h2o_inference.yaml      |  20 +
 .../benchmark/configs/h2o_init.sql            |  20 +
 .../benchmark/configs/h2o_streaming.yaml      |  26 ++
 .../benchmark/download_dataset.py             | 164 +++++++
 .../benchmark/export_to_arroyo.py             | 254 ++++++++++
 .../benchmark/export_to_database.py           | 353 ++++++++++++++
 .../benchmark/generate_queries.py             | 390 ++++++++++++++++
 .../benchmark/prepare_data.py                 | 187 ++++++++
 .../benchmark/requirements.txt                |   5 +
 .../benchmark/run_benchmark.py                | 434 ++++++++++++++++++
 14 files changed, 2359 insertions(+)
 create mode 100644 asap-tools/execution-utilities/benchmark/README.md
 create mode 100644 asap-tools/execution-utilities/benchmark/configs/clickbench_hits_init.sql
 create mode 100644 asap-tools/execution-utilities/benchmark/configs/clickbench_inference.yaml
 create mode 100644 asap-tools/execution-utilities/benchmark/configs/clickbench_streaming.yaml
 create mode 100644 asap-tools/execution-utilities/benchmark/configs/h2o_inference.yaml
 create mode 100644 asap-tools/execution-utilities/benchmark/configs/h2o_init.sql
 create mode 100644 asap-tools/execution-utilities/benchmark/configs/h2o_streaming.yaml
 create mode 100644 asap-tools/execution-utilities/benchmark/download_dataset.py
 create mode 100644 asap-tools/execution-utilities/benchmark/export_to_arroyo.py
 create mode 100644 asap-tools/execution-utilities/benchmark/export_to_database.py
 create mode 100644 asap-tools/execution-utilities/benchmark/generate_queries.py
 create mode 100644 asap-tools/execution-utilities/benchmark/prepare_data.py
 create mode 100644 asap-tools/execution-utilities/benchmark/requirements.txt
 create mode 100644 asap-tools/execution-utilities/benchmark/run_benchmark.py

diff --git a/asap-tools/execution-utilities/benchmark/README.md b/asap-tools/execution-utilities/benchmark/README.md
new file mode 100644
index 00000000..9a0608cc
--- /dev/null
+++ b/asap-tools/execution-utilities/benchmark/README.md
@@ -0,0 +1,344 @@
+# ASAP Generalized Benchmark Pipeline
+
+Measures ASAP query latency (KLL sketch) against ClickHouse baseline for
+arbitrary datasets. Supports ClickBench and H2O groupby out of the box.
+
+## Architecture
+
+```
+data_file → prepare_data.py → arroyo_file.json
+                                    ↓
+                       export_to_arroyo.py (file source)
+                                    ↓
+                         sketch_topic (Kafka)
+                                    ↓
+                        QueryEngineRust :8088
+                                    ↓
+data_file → export_to_database.py  run_benchmark.py → results/
+                ↓
+          ClickHouse :8123 (baseline)
+```
+
+**Key difference from the old pipeline:** Arroyo reads directly from a local
+file (`single_file_custom` connector) rather than from a Kafka input topic.
+Kafka is still required for the **sketch output** topic (`sketch_topic`).
+
+---
+
+## Prerequisites
+
+```bash
+export INSTALL_DIR=/scratch/sketch_db_for_prometheus
+pip3 install --user -r requirements.txt
+
+# Build binaries (one-time)
+cd ~/ASAPQuery/asap-query-engine && cargo build --release
+```
+
+---
+
+## ClickBench + ClickHouse End-to-End Example
+
+### Step 1 — Download dataset
+
+```bash
+cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
+python download_dataset.py --dataset clickbench --output-dir ./data
+```
+
+Optionally limit to 1M rows:
+
+```bash
+cd ./data
+mv hits.json.gz hits_full.json.gz
+zcat hits_full.json.gz | head -n 1000000 | gzip > hits.json.gz
+```
+
+### Step 2 — Prepare data for Arroyo file source
+
+The Arroyo file source requires RFC3339 timestamps and string metadata columns.
+This step converts the raw ClickBench JSON:
+
+```bash
+python prepare_data.py \
+    --dataset clickbench \
+    --input ./data/hits.json.gz \
+    --output ./data/hits_arroyo.json \
+    --max-rows 1000000
+```
+
+This produces `hits_arroyo.json` with:
+- `EventTime` converted from `"2013-07-14 20:38:47"` → `"2013-07-14T20:38:47Z"`
+- `RegionID`, `OS`, `UserAgent`, `TraficSourceID` as strings
+- Records sorted by `EventTime`
+
+### Step 3 — Start infrastructure
+
+```bash
+# Kafka
+~/ASAPQuery/asap-tools/installation/kafka/run.sh $INSTALL_DIR/kafka
+
+# Create sketch output topic
+KAFKA=$INSTALL_DIR/kafka/bin
+$KAFKA/kafka-topics.sh --bootstrap-server localhost:9092 --create \
+    --topic sketch_topic --partitions 1 --replication-factor 1 \
+    --config max.message.bytes=20971520
+
+# ClickHouse
+~/ASAPQuery/asap-tools/installation/clickhouse/run.sh $INSTALL_DIR
+```
+
+### Step 4 — Start Arroyo cluster
+
+```bash
+~/ASAPQuery/asap-summary-ingest/target/release/arroyo \
+    --config ~/ASAPQuery/asap-summary-ingest/config.yaml cluster \
+    > /tmp/arroyo.log 2>&1 &
+```
+
+### Step 5 — Launch Arroyo sketch pipeline (file source)
+
+```bash
+python export_to_arroyo.py \
+    --streaming-config ./configs/clickbench_streaming.yaml \
+    --source-type file \
+    --input-file ./data/hits_arroyo.json \
+    --file-format json \
+    --ts-format rfc3339 \
+    --pipeline-name clickbench_pipeline \
+    --arroyosketch-dir ~/ASAPQuery/asap-summary-ingest \
+    --output-dir ./arroyo_outputs
+```
+
+### Step 6 — Start QueryEngineRust
+
+```bash
+cd ~/ASAPQuery/asap-query-engine
+nohup ./target/release/query_engine_rust \
+    --kafka-topic sketch_topic --input-format json \
+    --config ~/ASAPQuery/asap-tools/execution-utilities/benchmark/configs/clickbench_inference.yaml \
+    --streaming-config ~/ASAPQuery/asap-tools/execution-utilities/benchmark/configs/clickbench_streaming.yaml \
+    --http-port 8088 --delete-existing-db --log-level DEBUG \
+    --output-dir ./output --streaming-engine arroyo \
+    --query-language SQL --lock-strategy per-key \
+    --prometheus-scrape-interval 1 > /tmp/query_engine.log 2>&1 &
+```
+
+### Step 7 — Load data into ClickHouse (baseline)
+
+```bash
+cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
+python export_to_database.py \
+    --dataset clickbench \
+    --file-path ./data/hits.json.gz \
+    --clickhouse-url "http://localhost:8123/" \
+    --init-sql-file ./configs/clickbench_hits_init.sql
+```
+
+Verify: `$INSTALL_DIR/clickhouse client --query "SELECT count(*) FROM hits"`
+
+### Step 8 — Generate SQL query files
+
+```bash
+python generate_queries.py \
+    --table-name hits \
+    --ts-column EventTime \
+    --value-column ResolutionWidth \
+    --group-by-columns RegionID,OS,UserAgent,TraficSourceID \
+    --window-size 10 \
+    --num-queries 50 \
+    --ts-format datetime \
+    --window-form dateadd \
+    --auto-detect-timestamps \
+    --data-file ./data/hits_arroyo.json \
+    --data-file-format json \
+    --output-prefix ./queries/clickbench
+```
+
+This writes `queries/clickbench_asap.sql` and `queries/clickbench_clickhouse.sql`.
+
+### Step 9 — Run benchmark
+
+```bash
+python run_benchmark.py \
+    --mode both \
+    --asap-sql-file ./queries/clickbench_asap.sql \
+    --baseline-sql-file ./queries/clickbench_clickhouse.sql \
+    --output-dir ./results \
+    --output-prefix clickbench
+```
+
+Results: `results/clickbench_asap.csv`, `results/clickbench_baseline.csv`,
+`results/clickbench_comparison.png`.
+
+---
+
+## H2O GroupBy End-to-End Example
+
+### Step 1 — Download dataset
+
+```bash
+python download_dataset.py --dataset h2o --output-dir ./data
+```
+
+### Step 2 — Prepare data for Arroyo file source
+
+```bash
+python prepare_data.py \
+    --dataset h2o \
+    --input ./data/G1_1e7_1e2_0_0.csv \
+    --output ./data/h2o_arroyo.json \
+    --max-rows 1000000
+```
+
+### Steps 3–4 — Start infrastructure and Arroyo (same as ClickBench)
+
+### Step 5 — Launch Arroyo sketch pipeline
+
+```bash
+python export_to_arroyo.py \
+    --streaming-config ./configs/h2o_streaming.yaml \
+    --source-type file \
+    --input-file ./data/h2o_arroyo.json \
+    --file-format json \
+    --ts-format rfc3339 \
+    --pipeline-name h2o_pipeline \
+    --arroyosketch-dir ~/ASAPQuery/asap-summary-ingest \
+    --output-dir ./arroyo_outputs
+```
+
+### Step 6 — Start QueryEngineRust
+
+```bash
+cd ~/ASAPQuery/asap-query-engine
+nohup ./target/release/query_engine_rust \
+    --kafka-topic sketch_topic --input-format json \
+    --config ~/ASAPQuery/asap-tools/execution-utilities/benchmark/configs/h2o_inference.yaml \
+    --streaming-config ~/ASAPQuery/asap-tools/execution-utilities/benchmark/configs/h2o_streaming.yaml \
+    --http-port 8088 --delete-existing-db --log-level DEBUG \
+    --output-dir ./output --streaming-engine arroyo \
+    --query-language SQL --lock-strategy per-key \
+    --prometheus-scrape-interval 1 > /tmp/query_engine.log 2>&1 &
+```
+
+### Step 7 — Load data into ClickHouse (baseline)
+
+```bash
+python export_to_database.py \
+    --dataset h2o \
+    --file-path ./data/G1_1e7_1e2_0_0.csv \
+    --init-sql-file ./configs/h2o_init.sql \
+    --max-rows 1000000
+```
+
+### Step 8 — Generate SQL query files
+
+```bash
+python generate_queries.py \
+    --table-name h2o_groupby \
+    --ts-column timestamp \
+    --value-column v1 \
+    --group-by-columns id1,id2 \
+    --window-size 10 \
+    --num-queries 50 \
+    --ts-format iso \
+    --auto-detect-timestamps \
+    --data-file ./data/h2o_arroyo.json \
+    --data-file-format json \
+    --output-prefix ./queries/h2o
+```
+
+### Step 9 — Run benchmark
+
+```bash
+python run_benchmark.py \
+    --mode both \
+    --asap-sql-file ./queries/h2o_asap.sql \
+    --baseline-sql-file ./queries/h2o_clickhouse.sql \
+    --output-dir ./results \
+    --output-prefix h2o
+```
+
+---
+
+## Custom Dataset
+
+```bash
+# 1. Download (any HTTP URL)
+python download_dataset.py --dataset custom \
+    --custom-url https://example.com/mydata.json.gz \
+    --output-dir ./data
+
+# 2. Prepare (edit prepare_data.py for your schema, or skip if already RFC3339)
+
+# 3. Export to Arroyo
+python export_to_arroyo.py \
+    --streaming-config ./configs/my_streaming.yaml \
+    --source-type file \
+    --input-file ./data/mydata.json \
+    --file-format json \
+    --ts-format rfc3339 \
+    --pipeline-name my_pipeline \
+    --arroyosketch-dir ~/ASAPQuery/asap-summary-ingest
+
+# 4. Export to ClickHouse
+python export_to_database.py \
+    --dataset custom \
+    --file-path ./data/mydata.json \
+    --init-sql-file ./configs/my_init.sql \
+    --table-name my_table
+
+# 5. Generate queries
+python generate_queries.py \
+    --table-name my_table \
+    --ts-column event_time \
+    --value-column metric_value \
+    --group-by-columns region,host \
+    --window-size 10 \
+    --num-queries 50 \
+    --auto-detect-timestamps \
+    --data-file ./data/mydata.json \
+    --output-prefix ./queries/my_dataset
+
+# 6. Run benchmark
+python run_benchmark.py \
+    --mode both \
+    --asap-sql-file ./queries/my_dataset_asap.sql \
+    --baseline-sql-file ./queries/my_dataset_clickhouse.sql \
+    --output-dir ./results
+```
+
+---
+
+## Reset
+
+```bash
+pkill -f "arroyo"; pkill -f "query_engine_rust"
+sleep 2
+pkill -f "kafka-server-start.sh"; pkill -f "clickhouse server"
+sleep 2
+rm -rf /tmp/arroyo/
+
+KAFKA=$INSTALL_DIR/kafka/bin
+$KAFKA/kafka-topics.sh --bootstrap-server localhost:9092 --delete --topic sketch_topic
+
+cd ~/ASAPQuery/asap-summary-ingest
+python3 delete_pipeline.py --all_pipelines
+
+$INSTALL_DIR/clickhouse client --query "TRUNCATE TABLE hits"
+# or for H2O: $INSTALL_DIR/clickhouse client --query "TRUNCATE TABLE h2o_groupby"
+```
+
+---
+
+## Files
+
+| File | Purpose |
+|------|---------|
+| `download_dataset.py` | Download ClickBench, H2O, or custom datasets |
+| `prepare_data.py` | Convert raw data to Arroyo file source format (RFC3339, string columns) |
+| `export_to_arroyo.py` | Launch Arroyo sketch pipeline (file or kafka source) |
+| `export_to_database.py` | Load data into ClickHouse for baseline |
+| `generate_queries.py` | Generate paired ASAP + ClickHouse SQL query files |
+| `run_benchmark.py` | Run queries and produce CSV results + plots |
+| `configs/` | Dataset-specific streaming/inference YAML and ClickHouse init SQL |
diff --git a/asap-tools/execution-utilities/benchmark/configs/clickbench_hits_init.sql b/asap-tools/execution-utilities/benchmark/configs/clickbench_hits_init.sql
new file mode 100644
index 00000000..b462faec
--- /dev/null
+++ b/asap-tools/execution-utilities/benchmark/configs/clickbench_hits_init.sql
@@ -0,0 +1,115 @@
+-- ClickHouse init for ClickBench baseline (MergeTree only, no Kafka engine)
+-- Use this with export_to_database.py --dataset clickbench --init-sql-file
+
+CREATE TABLE IF NOT EXISTS hits
+(
+    WatchID Int64,
+    JavaEnable UInt8,
+    Title String,
+    GoodEvent Int16,
+    EventTime DateTime,
+    EventDate Date,
+    CounterID UInt32,
+    ClientIP Int32,
+    RegionID UInt32,
+    UserID Int64,
+    CounterClass Int8,
+    OS UInt8,
+    UserAgent UInt8,
+    URL String,
+    Referer String,
+    IsRefresh UInt8,
+    RefererCategoryID UInt16,
+    RefererRegionID UInt32,
+    URLCategoryID UInt16,
+    URLRegionID UInt32,
+    ResolutionWidth UInt16,
+    ResolutionHeight UInt16,
+    ResolutionDepth UInt8,
+    FlashMajor UInt8,
+    FlashMinor UInt8,
+    FlashMinor2 String,
+    NetMajor UInt8,
+    NetMinor UInt8,
+    UserAgentMajor UInt16,
+    UserAgentMinor String,
+    CookieEnable UInt8,
+    JavascriptEnable UInt8,
+    IsMobile UInt8,
+    MobilePhone UInt8,
+    MobilePhoneModel String,
+    Params String,
+    IPNetworkID UInt32,
+    TraficSourceID Int8,
+    SearchEngineID UInt16,
+    SearchPhrase String,
+    AdvEngineID UInt8,
+    IsArtifical UInt8,
+    WindowClientWidth UInt16,
+    WindowClientHeight UInt16,
+    ClientTimeZone Int16,
+    ClientEventTime DateTime,
+    SilverlightVersion1 UInt8,
+    SilverlightVersion2 UInt8,
+    SilverlightVersion3 UInt32,
+    SilverlightVersion4 UInt16,
+    PageCharset String,
+    CodeVersion UInt32,
+    IsLink UInt8,
+    IsDownload UInt8,
+    IsNotBounce UInt8,
+    FUniqID Int64,
+    OriginalURL String,
+    HID UInt32,
+    IsOldCounter UInt8,
+    IsEvent UInt8,
+    IsParameter UInt8,
+    DontCountHits UInt8,
+    WithHash UInt8,
+    HitColor String,
+    LocalEventTime DateTime,
+    Age UInt8,
+    Sex UInt8,
+    Income UInt8,
+    Interests UInt16,
+    Robotness UInt8,
+    RemoteIP Int32,
+    WindowName Int32,
+    OpenerName Int32,
+    HistoryLength Int16,
+    BrowserLanguage String,
+    BrowserCountry String,
+    SocialNetwork String,
+    SocialAction String,
+    HTTPError UInt16,
+    SendTiming UInt32,
+    DNSTiming UInt32,
+    ConnectTiming UInt32,
+    ResponseStartTiming UInt32,
+    ResponseEndTiming UInt32,
+    FetchTiming UInt32,
+    SocialSourceNetworkID UInt8,
+    SocialSourcePage String,
+    ParamPrice Int64,
+    ParamOrderID String,
+    ParamCurrency String,
+    ParamCurrencyID UInt16,
+    OpenstatServiceName String,
+    OpenstatCampaignID String,
+    OpenstatAdID String,
+    OpenstatSourceID String,
+    UTMSource String,
+    UTMMedium String,
+    UTMCampaign String,
+    UTMContent String,
+    UTMTerm String,
+    FromTag String,
+    HasGCLID UInt8,
+    RefererHash Int64,
+    URLHash Int64,
+    CLID UInt32
+)
+ENGINE = MergeTree
+PARTITION BY toYYYYMM(EventDate)
+ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime, WatchID)
+SETTINGS index_granularity = 8192;
diff --git a/asap-tools/execution-utilities/benchmark/configs/clickbench_inference.yaml b/asap-tools/execution-utilities/benchmark/configs/clickbench_inference.yaml
new file mode 100644
index 00000000..7c4af097
--- /dev/null
+++ b/asap-tools/execution-utilities/benchmark/configs/clickbench_inference.yaml
@@ -0,0 +1,21 @@
+# ASAP Inference Config for ClickBench Hits Dataset
+# Source: asap_query_latency/inference_config.yaml
+
+tables:
+  - name: hits
+    time_column: EventTime
+    metadata_columns: [RegionID, OS, UserAgent, TraficSourceID]
+    value_columns: [ResolutionWidth]
+
+cleanup_policy:
+  name: read_based
+
+queries:
+  # Temporal queries (10s window, all labels) - QUANTILE
+  - aggregations:
+      - aggregation_id: 12
+        read_count_threshold: 999999
+    query: |
+      SELECT QUANTILE(0.95, ResolutionWidth) FROM hits
+      WHERE EventTime BETWEEN DATEADD(s, -10, NOW()) AND NOW()
+      GROUP BY RegionID, OS, UserAgent, TraficSourceID
diff --git a/asap-tools/execution-utilities/benchmark/configs/clickbench_streaming.yaml b/asap-tools/execution-utilities/benchmark/configs/clickbench_streaming.yaml
new file mode 100644
index 00000000..3d18e1ed
--- /dev/null
+++ b/asap-tools/execution-utilities/benchmark/configs/clickbench_streaming.yaml
@@ -0,0 +1,26 @@
+# ASAP Streaming Config for ClickBench Hits Dataset
+# Defines sketch aggregations for Arroyo to compute
+# Source: asap_query_latency/streaming_config.yaml
+
+tables:
+  - name: hits
+    time_column: EventTime
+    metadata_columns: [RegionID, OS, UserAgent, TraficSourceID]
+    value_columns: [ResolutionWidth]
+
+aggregations:
+  # Temporal queries (10s window, all labels) - QUANTILE (DatasketchesKLL)
+  - aggregationId: 12
+    aggregationType: DatasketchesKLL
+    aggregationSubType: ''
+    labels:
+      grouping: [RegionID, OS, UserAgent, TraficSourceID]
+      rollup: []
+      aggregated: []
+    table_name: hits
+    value_column: ResolutionWidth
+    parameters:
+      K: 200
+    windowSize: 10
+    windowType: tumbling
+    spatialFilter: ''
diff --git a/asap-tools/execution-utilities/benchmark/configs/h2o_inference.yaml b/asap-tools/execution-utilities/benchmark/configs/h2o_inference.yaml
new file mode 100644
index 00000000..0d1e45b0
--- /dev/null
+++ b/asap-tools/execution-utilities/benchmark/configs/h2o_inference.yaml
@@ -0,0 +1,20 @@
+# ASAP Inference Config for H2O GroupBy Dataset
+# Source: asap_benchmark_pipeline/inference_config.yaml
+
+tables:
+  - name: h2o_groupby
+    time_column: timestamp
+    metadata_columns: [id1, id2]
+    value_columns: [v1]
+
+cleanup_policy:
+  name: read_based
+
+queries:
+  - aggregations:
+    - aggregation_id: 12
+      read_count_threshold: 999999
+    query: |-
+      SELECT QUANTILE(0.95, v1) FROM h2o_groupby
+      WHERE timestamp BETWEEN DATEADD(s, -10, NOW()) AND NOW()
+      GROUP BY id1, id2;
diff --git a/asap-tools/execution-utilities/benchmark/configs/h2o_init.sql b/asap-tools/execution-utilities/benchmark/configs/h2o_init.sql
new file mode 100644
index 00000000..dbaf81c0
--- /dev/null
+++ b/asap-tools/execution-utilities/benchmark/configs/h2o_init.sql
@@ -0,0 +1,20 @@
+-- ClickHouse init for H2O GroupBy baseline (MergeTree, direct load)
+-- Use this with export_to_database.py --dataset h2o --init-sql-file
+-- Source: asap_benchmark_pipeline/h2o_init.sql
+
+DROP TABLE IF EXISTS h2o_groupby;
+
+CREATE TABLE IF NOT EXISTS h2o_groupby
+(
+    timestamp DateTime,
+    id1 String,
+    id2 String,
+    id3 String,
+    id4 Int32,
+    id5 Int32,
+    id6 Int32,
+    v1 Int32,
+    v2 Int32,
+    v3 Float64
+) ENGINE = MergeTree()
+ORDER BY (id1, id2);
diff --git a/asap-tools/execution-utilities/benchmark/configs/h2o_streaming.yaml b/asap-tools/execution-utilities/benchmark/configs/h2o_streaming.yaml
new file mode 100644
index 00000000..c500d696
--- /dev/null
+++ b/asap-tools/execution-utilities/benchmark/configs/h2o_streaming.yaml
@@ -0,0 +1,26 @@
+# ASAP Streaming Config for H2O GroupBy Dataset
+# Source: asap_benchmark_pipeline/streaming_config.yaml
+
+tables:
+  - name: h2o_groupby
+    time_column: timestamp
+    metadata_columns: [id1, id2]
+    value_columns: [v1]
+
+aggregations:
+  # Temporal queries (10s window, all labels) - QUANTILE (DatasketchesKLL)
+  - aggregationId: 12
+    aggregationType: DatasketchesKLL
+    aggregationSubType: ''
+    labels:
+      grouping: [id1, id2]
+      rollup: []
+      aggregated: []
+    table_name: h2o_groupby
+    value_column: v1
+    parameters:
+      K: 200
+    tumblingWindowSize: 10
+    windowSize: 10
+    windowType: tumbling
+    spatialFilter: ''
diff --git a/asap-tools/execution-utilities/benchmark/download_dataset.py b/asap-tools/execution-utilities/benchmark/download_dataset.py
new file mode 100644
index 00000000..5226ae53
--- /dev/null
+++ b/asap-tools/execution-utilities/benchmark/download_dataset.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+"""
+Unified dataset downloader for the ASAP benchmark pipeline.
+
+Supports ClickBench (hits.json.gz), H2O groupby (G1_1e7_1e2_0_0.csv),
+or any custom HTTP URL.
+
+Usage:
+    python download_dataset.py --dataset clickbench --output-dir ./data
+    python download_dataset.py --dataset h2o --output-dir ./data
+    python download_dataset.py --dataset custom --custom-url https://... --output-dir ./data
+"""
+
+import argparse
+import os
+import sys
+import urllib.request
+
+
+CLICKBENCH_URL = "https://datasets.clickhouse.com/hits_compatible/hits.json.gz"
+CLICKBENCH_FILENAME = "hits.json.gz"
+
+H2O_FILE_ID = "15SVQjQ2QehzYDLoDonio4aP7xqdMiNyi"
+H2O_FILENAME = "G1_1e7_1e2_0_0.csv"
+
+
+def _http_download(url: str, output_path: str) -> str:
+    """Download a file via HTTP with progress reporting."""
+    print(f"Downloading from {url}")
+    request = urllib.request.Request(
+        url, headers={"User-Agent": "Mozilla/5.0 (compatible; ASAP-Benchmark/1.0)"}
+    )
+    try:
+        with urllib.request.urlopen(request) as response:
+            total_size = int(response.headers.get("Content-Length", 0))
+            downloaded = 0
+            last_percent = -1
+            block_size = 8192 * 128  # ~1 MB blocks
+
+            with open(output_path, "wb") as f:
+                while True:
+                    block = response.read(block_size)
+                    if not block:
+                        break
+                    f.write(block)
+                    downloaded += len(block)
+                    if total_size > 0:
+                        percent = downloaded * 100 // total_size
+                        if percent != last_percent:
+                            last_percent = percent
+                            mb = downloaded / (1024 * 1024)
+                            total_mb = total_size / (1024 * 1024)
+                            sys.stdout.write(
+                                f"\rProgress: {percent}% ({mb:.1f}/{total_mb:.1f} MB)"
+                            )
+                            sys.stdout.flush()
+
+        print("\nDownload complete!")
+        return output_path
+
+    except urllib.error.HTTPError as e:
+        print(f"\nDownload failed: HTTP {e.code} - {e.reason}")
+        raise
+
+
+def download_clickbench(output_path: str, force: bool = False) -> str:
+    """Download hits.json.gz from ClickHouse datasets CDN."""
+    if not force and os.path.exists(output_path):
+        print(f"Using existing file: {output_path}")
+        return output_path
+    print("Downloading ClickBench dataset (~14 GB compressed). Please wait...")
+    return _http_download(CLICKBENCH_URL, output_path)
+
+
+def download_h2o(output_path: str, force: bool = False) -> str:
+    """Download H2O groupby CSV (~300 MB) from Google Drive via gdown."""
+    if not force and os.path.exists(output_path) and os.path.getsize(output_path) > 100 * 1024 * 1024:
+        print(f"Using existing file: {output_path}")
+        return output_path
+
+    try:
+        import gdown
+    except ImportError:
+        print("Installing gdown...")
+        import subprocess
+        subprocess.check_call([sys.executable, "-m", "pip", "install", "gdown"])
+        import gdown
+
+    print(f"Downloading H2O dataset via gdown (ID: {H2O_FILE_ID})...")
+    url = f"https://drive.google.com/uc?id={H2O_FILE_ID}"
+    gdown.download(url, output_path, quiet=False)
+    return output_path
+
+
+def download_custom(url: str, output_path: str, force: bool = False) -> str:
+    """Download a dataset from an arbitrary HTTP URL."""
+    if not force and os.path.exists(output_path):
+        print(f"Using existing file: {output_path}")
+        return output_path
+    return _http_download(url, output_path)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Download benchmark datasets",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+    parser.add_argument(
+        "--dataset",
+        choices=["clickbench", "h2o", "custom"],
+        required=True,
+        help="Dataset to download",
+    )
+    parser.add_argument(
+        "--output-dir",
+        required=True,
+        help="Directory to save the downloaded file",
+    )
+    parser.add_argument(
+        "--output-file",
+        default=None,
+        help="Exact output file path (overrides --output-dir)",
+    )
+    parser.add_argument(
+        "--custom-url",
+        default=None,
+        help="URL to download (required when --dataset custom)",
+    )
+    parser.add_argument(
+        "--force-redownload",
+        action="store_true",
+        help="Re-download even if the file already exists",
+    )
+    args = parser.parse_args()
+
+    if args.dataset == "custom" and not args.custom_url:
+        parser.error("--custom-url is required when --dataset custom")
+
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    if args.output_file:
+        output_path = args.output_file
+        os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
+    elif args.dataset == "clickbench":
+        output_path = os.path.join(args.output_dir, CLICKBENCH_FILENAME)
+    elif args.dataset == "h2o":
+        output_path = os.path.join(args.output_dir, H2O_FILENAME)
+    else:
+        filename = args.custom_url.rstrip("/").split("/")[-1] or "data"
+        output_path = os.path.join(args.output_dir, filename)
+
+    if args.dataset == "clickbench":
+        download_clickbench(output_path, force=args.force_redownload)
+    elif args.dataset == "h2o":
+        download_h2o(output_path, force=args.force_redownload)
+    else:
+        download_custom(args.custom_url, output_path, force=args.force_redownload)
+
+    print(f"Dataset saved to: {output_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/asap-tools/execution-utilities/benchmark/export_to_arroyo.py b/asap-tools/execution-utilities/benchmark/export_to_arroyo.py
new file mode 100644
index 00000000..6e72af72
--- /dev/null
+++ b/asap-tools/execution-utilities/benchmark/export_to_arroyo.py
@@ -0,0 +1,254 @@
+#!/usr/bin/env python3
+"""
+Launch an Arroyo sketch pipeline against a dataset.
+
+Supports two source modes:
+  file   (default): Arroyo reads directly from a local JSON/Parquet file.
+                    No Kafka input topic is required.
+  kafka:            Arroyo reads from a Kafka topic (legacy path).
+
+In both cases the sketch output is written to a Kafka topic (default:
+sketch_topic) for consumption by QueryEngineRust.
+
+Usage:
+    # File source (recommended)
+    python export_to_arroyo.py \\
+        --streaming-config configs/clickbench_streaming.yaml \\
+        --source-type file \\
+        --input-file ./data/hits.json.gz \\
+        --file-format json \\
+        --ts-format rfc3339 \\
+        --pipeline-name clickbench_pipeline \\
+        --arroyosketch-dir ~/ASAPQuery/asap-summary-ingest
+
+    # Kafka source (legacy)
+    python export_to_arroyo.py \\
+        --streaming-config configs/h2o_streaming.yaml \\
+        --source-type kafka \\
+        --input-kafka-topic h2o_groupby \\
+        --pipeline-name h2o_pipeline \\
+        --arroyosketch-dir ~/ASAPQuery/asap-summary-ingest
+"""
+
+import argparse
+import os
+import subprocess
+import sys
+import time
+
+import requests
+
+DEFAULT_ARROYO_URL = "http://localhost:5115/api/v1"
+DEFAULT_OUTPUT_KAFKA_TOPIC = "sketch_topic"
+DEFAULT_PARALLELISM = 1
+DEFAULT_WAIT_TIMEOUT = 300
+
+
+def wait_for_pipeline_running(
+    pipeline_name: str,
+    arroyo_url: str = DEFAULT_ARROYO_URL,
+    timeout: int = DEFAULT_WAIT_TIMEOUT,
+) -> bool:
+    """Poll the Arroyo API until the named pipeline reaches RUNNING state.
+
+    Translated from asap_benchmark_pipeline/run_pipeline.sh lines 107-141.
+    A pipeline is considered running when its 'state' field is None and
+    'stop' is 'none' (Arroyo's representation of a healthy running pipeline).
+    """
+    print(f"Waiting for pipeline '{pipeline_name}' to reach RUNNING state...")
+    elapsed = 0
+    while True:
+        state = "error"
+        try:
+            r = requests.get(f"{arroyo_url}/pipelines", timeout=5)
+            if r.ok:
+                data = r.json()
+                for p in data.get("data", []):
+                    if p.get("name") == pipeline_name:
+                        s = p.get("state")
+                        stop = p.get("stop", "")
+                        if s is None and stop == "none":
+                            state = "running"
+                        else:
+                            state = str(s).lower() if s else "unknown"
+                        break
+                else:
+                    state = "not_found"
+        except Exception:
+            state = "error"
+
+        if state == "running":
+            print(f"Pipeline '{pipeline_name}' is RUNNING")
+            return True
+
+        print(f"  Pipeline state: {state} (elapsed: {elapsed}s)")
+        time.sleep(5)
+        elapsed += 5
+        if elapsed >= timeout:
+            print(
+                f"ERROR: Pipeline did not reach RUNNING state within {timeout}s"
+            )
+            return False
+
+
+def build_arroyosketch_cmd(args, arroyosketch_script: str) -> list:
+    """Build the run_arroyosketch.py command from our CLI arguments."""
+    cmd = [
+        sys.executable,
+        arroyosketch_script,
+        "--source_type", args.source_type,
+        "--output_format", "json",
+        "--pipeline_name", args.pipeline_name,
+        "--config_file_path", os.path.abspath(args.streaming_config),
+        "--output_kafka_topic", args.output_kafka_topic,
+        "--output_dir", os.path.abspath(args.output_dir),
+        "--parallelism", str(args.parallelism),
+        "--query_language", "sql",
+    ]
+
+    if args.source_type == "file":
+        cmd += [
+            "--input_file_path", os.path.abspath(args.input_file),
+            "--file_format", args.file_format,
+            "--ts_format", args.ts_format,
+        ]
+    elif args.source_type == "kafka":
+        cmd += [
+            "--kafka_input_format", "json",
+            "--input_kafka_topic", args.input_kafka_topic,
+        ]
+
+    return cmd
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Launch Arroyo sketch pipeline (file or kafka source)",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+    parser.add_argument(
+        "--streaming-config",
+        required=True,
+        help="Path to streaming_config.yaml",
+    )
+    parser.add_argument(
+        "--source-type",
+        choices=["file", "kafka"],
+        default="file",
+        help="Data source type (default: file)",
+    )
+    # File source args
+    parser.add_argument(
+        "--input-file",
+        default=None,
+        help="Path to input data file (required for --source-type file)",
+    )
+    parser.add_argument(
+        "--file-format",
+        choices=["json", "parquet"],
+        default="json",
+        help="File format (default: json)",
+    )
+    parser.add_argument(
+        "--ts-format",
+        choices=["unix_millis", "unix_seconds", "rfc3339"],
+        default="rfc3339",
+        help="Timestamp format in the data file (default: rfc3339)",
+    )
+    # Kafka source args
+    parser.add_argument(
+        "--input-kafka-topic",
+        default=None,
+        help="Kafka topic to read from (required for --source-type kafka)",
+    )
+    # Common args
+    parser.add_argument(
+        "--output-kafka-topic",
+        default=DEFAULT_OUTPUT_KAFKA_TOPIC,
+        help=f"Kafka topic for sketch output (default: {DEFAULT_OUTPUT_KAFKA_TOPIC})",
+    )
+    parser.add_argument(
+        "--pipeline-name",
+        required=True,
+        help="Arroyo pipeline name",
+    )
+    parser.add_argument(
+        "--parallelism",
+        type=int,
+        default=DEFAULT_PARALLELISM,
+        help=f"Arroyo pipeline parallelism (default: {DEFAULT_PARALLELISM})",
+    )
+    parser.add_argument(
+        "--arroyosketch-dir",
+        required=True,
+        help="Path to asap-summary-ingest/ directory (contains run_arroyosketch.py)",
+    )
+    parser.add_argument(
+        "--arroyo-url",
+        default=DEFAULT_ARROYO_URL,
+        help=f"Arroyo API base URL (default: {DEFAULT_ARROYO_URL})",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="./arroyo_outputs",
+        help="Directory for Arroyo pipeline output artifacts (default: ./arroyo_outputs)",
+    )
+    parser.add_argument(
+        "--wait-for-pipeline",
+        action="store_true",
+        default=True,
+        help="Poll until pipeline reaches RUNNING state (default: True)",
+    )
+    parser.add_argument(
+        "--no-wait",
+        action="store_true",
+        help="Do not wait for pipeline to reach RUNNING state",
+    )
+    parser.add_argument(
+        "--wait-timeout",
+        type=int,
+        default=DEFAULT_WAIT_TIMEOUT,
+        help=f"Seconds to wait for RUNNING state (default: {DEFAULT_WAIT_TIMEOUT})",
+    )
+
+    args = parser.parse_args()
+
+    # Validate source-specific required args
+    if args.source_type == "file" and not args.input_file:
+        parser.error("--input-file is required when --source-type file")
+    if args.source_type == "kafka" and not args.input_kafka_topic:
+        parser.error("--input-kafka-topic is required when --source-type kafka")
+
+    arroyosketch_script = os.path.join(
+        os.path.abspath(args.arroyosketch_dir), "run_arroyosketch.py"
+    )
+    if not os.path.exists(arroyosketch_script):
+        print(f"ERROR: run_arroyosketch.py not found at {arroyosketch_script}")
+        sys.exit(1)
+
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    cmd = build_arroyosketch_cmd(args, arroyosketch_script)
+    print(f"Launching Arroyo pipeline '{args.pipeline_name}' ({args.source_type} source)...")
+    print(f"Command: {' '.join(cmd)}")
+
+    result = subprocess.run(cmd)
+    if result.returncode != 0:
+        print(f"ERROR: run_arroyosketch.py exited with code {result.returncode}")
+        sys.exit(result.returncode)
+
+    if not args.no_wait:
+        success = wait_for_pipeline_running(
+            args.pipeline_name,
+            arroyo_url=args.arroyo_url,
+            timeout=args.wait_timeout,
+        )
+        if not success:
+            sys.exit(1)
+
+    print("Done.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/asap-tools/execution-utilities/benchmark/export_to_database.py b/asap-tools/execution-utilities/benchmark/export_to_database.py
new file mode 100644
index 00000000..d9583641
--- /dev/null
+++ b/asap-tools/execution-utilities/benchmark/export_to_database.py
@@ -0,0 +1,353 @@
+#!/usr/bin/env python3
+"""
+Load a dataset into ClickHouse for baseline comparison.
+
+Supports ClickBench (hits.json.gz), H2O groupby CSV, or a custom table.
+
+Usage:
+    # ClickBench
+    python export_to_database.py \\
+        --dataset clickbench \\
+        --file-path ./data/hits.json.gz \\
+        --init-sql-file ../clickhouse-benchmark-pipeline/clickhouse/clickbench_init.sql
+
+    # H2O
+    python export_to_database.py \\
+        --dataset h2o \\
+        --file-path ./data/G1_1e7_1e2_0_0.csv \\
+        --init-sql-file ../asap_benchmark_pipeline/h2o_init.sql
+
+    # Custom JSON file
+    python export_to_database.py \\
+        --dataset custom \\
+        --file-path ./data/mydata.json \\
+        --table-name mytable \\
+        --ts-column event_time \\
+        --ts-assignment passthrough
+"""
+
+import argparse
+import gzip
+import os
+import subprocess
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+
+import requests
+
+DEFAULT_CLICKHOUSE_URL = "http://localhost:8123/"
+H2O_BATCH_SIZE = 50_000
+H2O_ROWS_PER_SECOND = 1000
+H2O_BASE_EPOCH = 1704067200  # 2024-01-01T00:00:00Z
+
+
+def _exec_clickhouse_sql(clickhouse_url: str, sql: str, label: str = ""):
+    """Execute a SQL statement via the ClickHouse HTTP API."""
+    r = requests.post(clickhouse_url, data=sql.encode())
+    if not r.ok:
+        print(f"  WARN [{label}]: {r.text.strip()[:200]}")
+    else:
+        short = sql.strip()[:80].replace("\n", " ")
+        print(f"  OK: {short}")
+
+
+def run_init_sql(clickhouse_url: str, init_sql_file: str):
+    """Execute DDL statements from a SQL file."""
+    print(f"Running init SQL from {init_sql_file}...")
+    with open(init_sql_file) as f:
+        content = f.read()
+    stmts = [s.strip() for s in content.split(";") if s.strip()]
+    for stmt in stmts:
+        _exec_clickhouse_sql(clickhouse_url, stmt, label=stmt[:40])
+
+
+def check_row_count(clickhouse_url: str, table_name: str) -> int:
+    r = requests.post(clickhouse_url, data=f"SELECT count(*) FROM {table_name}")
+    if r.ok:
+        return int(r.text.strip())
+    return 0
+
+
+def load_clickbench(
+    clickhouse_url: str,
+    file_path: str,
+    init_sql_file: str = None,
+    skip_table_init: bool = False,
+    skip_if_loaded: bool = False,
+    max_rows: int = 0,
+):
+    """Load hits.json.gz into ClickHouse.
+
+    Uses `zcat | clickhouse-client INSERT` for gzip-compressed JSON.
+    Adapted from asap_query_latency/run_benchmark.py:load_clickbench_data().
+    """
+    if not skip_table_init and init_sql_file:
+        run_init_sql(clickhouse_url, init_sql_file)
+
+    if skip_if_loaded:
+        count = check_row_count(clickhouse_url, "hits")
+        if count > 0:
+            print(f"Data already loaded ({count:,} rows). Skipping.")
+            return True
+
+    if not os.path.exists(file_path):
+        print(f"ERROR: Data file not found: {file_path}")
+        return False
+
+    print(f"Loading ClickBench data from {file_path}...")
+    if max_rows > 0:
+        # Pipe through head to limit rows
+        cmd = (
+            f"zcat {file_path} | head -n {max_rows} | "
+            f"clickhouse-client --query='INSERT INTO hits FORMAT JSONEachRow'"
+        )
+    else:
+        cmd = (
+            f"zcat {file_path} | "
+            f"clickhouse-client --query='INSERT INTO hits FORMAT JSONEachRow'"
+        )
+
+    result = subprocess.run(cmd, shell=True)
+    if result.returncode != 0:
+        print("ERROR: ClickHouse insert failed")
+        return False
+
+    count = check_row_count(clickhouse_url, "hits")
+    print(f"Loaded {count:,} rows into ClickHouse (hits)")
+    return True
+
+
+def _flush_h2o_batch(clickhouse_url: str, rows: list):
+    """Flush a batch of H2O rows to ClickHouse via HTTP INSERT."""
+    sql = "INSERT INTO h2o_groupby VALUES " + ",".join(rows)
+    r = requests.post(clickhouse_url, data=sql.encode())
+    if not r.ok:
+        raise RuntimeError(f"ClickHouse insert failed: {r.text[:200]}")
+
+
+def load_h2o(
+    clickhouse_url: str,
+    file_path: str,
+    init_sql_file: str = None,
+    skip_table_init: bool = False,
+    skip_if_loaded: bool = False,
+    max_rows: int = 0,
+):
+    """Load H2O groupby CSV into ClickHouse with synthetic timestamps.
+
+    Timestamps are assigned at H2O_ROWS_PER_SECOND rows/sec starting from
+    H2O_BASE_EPOCH (2024-01-01T00:00:00Z).
+    Adapted from asap_benchmark_pipeline/run_benchmark.py:load_h2o_data_clickhouse().
+    """
+    if not skip_table_init and init_sql_file:
+        run_init_sql(clickhouse_url, init_sql_file)
+
+    if skip_if_loaded:
+        count = check_row_count(clickhouse_url, "h2o_groupby")
+        if count > 0:
+            print(f"Data already loaded ({count:,} rows). Skipping.")
+            return True
+
+    if not os.path.exists(file_path):
+        print(f"ERROR: Data file not found: {file_path}")
+        return False
+
+    print(f"Inserting H2O data from {file_path} into ClickHouse...")
+    batch: list = []
+    total = 0
+
+    with open(file_path, "r", encoding="utf-8") as f:
+        f.readline()  # skip header
+        for i, line in enumerate(f):
+            if max_rows > 0 and i >= max_rows:
+                break
+            parts = line.rstrip("\n").split(",")
+            abs_sec = H2O_BASE_EPOCH + i // H2O_ROWS_PER_SECOND
+            ts = datetime.fromtimestamp(abs_sec, tz=timezone.utc)
+            ts_str = ts.strftime("%Y-%m-%d %H:%M:%S")
+
+            batch.append(
+                f"('{ts_str}','{parts[0]}','{parts[1]}','{parts[2]}',"
+                f"{parts[3]},{parts[4]},{parts[5]},"
+                f"{parts[6]},{parts[7]},{parts[8]})"
+            )
+
+            if len(batch) >= H2O_BATCH_SIZE:
+                _flush_h2o_batch(clickhouse_url, batch)
+                total += len(batch)
+                batch = []
+                if total % 500_000 == 0:
+                    print(f"  Inserted {total:,} rows...")
+
+    if batch:
+        _flush_h2o_batch(clickhouse_url, batch)
+        total += len(batch)
+
+    print(f"Loaded {total:,} rows into ClickHouse (h2o_groupby)")
+    return True
+
+
+def load_custom(
+    clickhouse_url: str,
+    file_path: str,
+    table_name: str,
+    ts_column: str,
+    ts_assignment: str = "passthrough",
+    init_sql_file: str = None,
+    skip_table_init: bool = False,
+    skip_if_loaded: bool = False,
+    max_rows: int = 0,
+):
+    """Load a custom JSON or CSV file into ClickHouse.
+
+    For JSON files: uses INSERT FORMAT JSONEachRow via clickhouse-client.
+    ts_assignment='synthetic' is only supported for CSV (same logic as H2O).
+    """
+    if not skip_table_init and init_sql_file:
+        run_init_sql(clickhouse_url, init_sql_file)
+
+    if skip_if_loaded:
+        count = check_row_count(clickhouse_url, table_name)
+        if count > 0:
+            print(f"Data already loaded ({count:,} rows). Skipping.")
+            return True
+
+    if not os.path.exists(file_path):
+        print(f"ERROR: Data file not found: {file_path}")
+        return False
+
+    path_lower = file_path.lower()
+    if path_lower.endswith(".json.gz") or path_lower.endswith(".jsonl.gz"):
+        head_cmd = f"| head -n {max_rows}" if max_rows > 0 else ""
+        cmd = (
+            f"zcat {file_path} {head_cmd} | "
+            f"clickhouse-client --query='INSERT INTO {table_name} FORMAT JSONEachRow'"
+        )
+        print(f"Loading {file_path} into ClickHouse ({table_name})...")
+        result = subprocess.run(cmd, shell=True)
+        if result.returncode != 0:
+            print("ERROR: ClickHouse insert failed")
+            return False
+    elif path_lower.endswith(".json") or path_lower.endswith(".jsonl"):
+        head_cmd = f"head -n {max_rows} {file_path} | " if max_rows > 0 else ""
+        cmd = (
+            f"{head_cmd}clickhouse-client --query='INSERT INTO {table_name} FORMAT JSONEachRow' "
+            f"< {file_path}"
+        )
+        print(f"Loading {file_path} into ClickHouse ({table_name})...")
+        result = subprocess.run(cmd, shell=True)
+        if result.returncode != 0:
+            print("ERROR: ClickHouse insert failed")
+            return False
+    else:
+        print(f"ERROR: Unsupported file format for {file_path}. Use --dataset h2o for CSV.")
+        return False
+
+    count = check_row_count(clickhouse_url, table_name)
+    print(f"Loaded {count:,} rows into ClickHouse ({table_name})")
+    return True
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Load a dataset into ClickHouse for baseline comparison",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+    parser.add_argument(
+        "--dataset",
+        choices=["clickbench", "h2o", "custom"],
+        required=True,
+        help="Dataset type",
+    )
+    parser.add_argument(
+        "--file-path",
+        required=True,
+        help="Path to the source data file",
+    )
+    parser.add_argument(
+        "--clickhouse-url",
+        default=DEFAULT_CLICKHOUSE_URL,
+        help=f"ClickHouse HTTP URL (default: {DEFAULT_CLICKHOUSE_URL})",
+    )
+    parser.add_argument(
+        "--init-sql-file",
+        default=None,
+        help="DDL SQL file to run before loading (CREATE TABLE ...)",
+    )
+    parser.add_argument(
+        "--table-name",
+        default=None,
+        help="Target table name (required for --dataset custom)",
+    )
+    parser.add_argument(
+        "--ts-column",
+        default=None,
+        help="Timestamp column name (for --dataset custom)",
+    )
+    parser.add_argument(
+        "--ts-assignment",
+        choices=["synthetic", "passthrough"],
+        default="passthrough",
+        help="How to assign timestamps for custom CSV data (default: passthrough)",
+    )
+    parser.add_argument(
+        "--skip-table-init",
+        action="store_true",
+        help="Skip CREATE TABLE (assume tables already exist)",
+    )
+    parser.add_argument(
+        "--skip-if-loaded",
+        action="store_true",
+        help="Skip insert if the table already has rows",
+    )
+    parser.add_argument(
+        "--max-rows",
+        type=int,
+        default=0,
+        help="Maximum rows to load (0 = all)",
+    )
+
+    args = parser.parse_args()
+
+    if args.dataset == "custom" and not args.table_name:
+        parser.error("--table-name is required when --dataset custom")
+
+    success = False
+    if args.dataset == "clickbench":
+        success = load_clickbench(
+            args.clickhouse_url,
+            args.file_path,
+            init_sql_file=args.init_sql_file,
+            skip_table_init=args.skip_table_init,
+            skip_if_loaded=args.skip_if_loaded,
+            max_rows=args.max_rows,
+        )
+    elif args.dataset == "h2o":
+        success = load_h2o(
+            args.clickhouse_url,
+            args.file_path,
+            init_sql_file=args.init_sql_file,
+            skip_table_init=args.skip_table_init,
+            skip_if_loaded=args.skip_if_loaded,
+            max_rows=args.max_rows,
+        )
+    else:
+        success = load_custom(
+            args.clickhouse_url,
+            args.file_path,
+            table_name=args.table_name,
+            ts_column=args.ts_column,
+            ts_assignment=args.ts_assignment,
+            init_sql_file=args.init_sql_file,
+            skip_table_init=args.skip_table_init,
+            skip_if_loaded=args.skip_if_loaded,
+            max_rows=args.max_rows,
+        )
+
+    sys.exit(0 if success else 1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/asap-tools/execution-utilities/benchmark/generate_queries.py b/asap-tools/execution-utilities/benchmark/generate_queries.py
new file mode 100644
index 00000000..13989100
--- /dev/null
+++ b/asap-tools/execution-utilities/benchmark/generate_queries.py
@@ -0,0 +1,390 @@
+#!/usr/bin/env python3
+"""
+Generate paired ASAP and ClickHouse SQL query files for benchmarking.
+
+Each query targets a fixed time window (window-end timestamp) and matches the
+annotation format `-- T{NNN}: description` expected by run_benchmark.py.
+
+Output:
+  {prefix}_asap.sql        QUANTILE(q, col) syntax for QueryEngineRust
+  {prefix}_clickhouse.sql  quantile(q)(col) syntax for ClickHouse baseline
+
+Usage:
+    # Auto-detect timestamps from data file
+    python generate_queries.py \\
+        --table-name hits \\
+        --ts-column EventTime \\
+        --value-column ResolutionWidth \\
+        --group-by-columns RegionID,OS,UserAgent,TraficSourceID \\
+        --window-size 10 \\
+        --num-queries 50 \\
+        --auto-detect-timestamps \\
+        --data-file ./data/hits.json.gz \\
+        --data-file-format json.gz \\
+        --output-prefix ./queries/clickbench
+
+    # Explicit timestamp file (one ISO timestamp per line)
+    python generate_queries.py \\
+        --table-name h2o_groupby \\
+        --ts-column timestamp \\
+        --value-column v1 \\
+        --group-by-columns id1,id2 \\
+        --window-size 10 \\
+        --num-queries 50 \\
+        --timestamps-file ./my_timestamps.txt \\
+        --output-prefix ./queries/h2o
+"""
+
+import argparse
+import gzip
+import json
+import sys
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+from typing import List, Optional
+
+
+SAMPLE_SIZE = 10_000  # rows to read for timestamp auto-detection
+
+
+def _parse_timestamp(value: str) -> Optional[datetime]:
+    """Try to parse a timestamp string in common formats."""
+    value = str(value).strip()
+    for fmt in (
+        "%Y-%m-%dT%H:%M:%SZ",
+        "%Y-%m-%dT%H:%M:%S.%fZ",
+        "%Y-%m-%dT%H:%M:%S",
+        "%Y-%m-%d %H:%M:%S",
+        "%Y-%m-%d",
+    ):
+        try:
+            return datetime.strptime(value, fmt).replace(tzinfo=timezone.utc)
+        except ValueError:
+            pass
+    # Try unix seconds/millis (numeric string)
+    try:
+        v = float(value)
+        if v > 1e12:  # millis
+            return datetime.fromtimestamp(v / 1000, tz=timezone.utc)
+        return datetime.fromtimestamp(v, tz=timezone.utc)
+    except ValueError:
+        pass
+    return None
+
+
+def _read_timestamps_from_json(
+    file_path: str, ts_column: str, compressed: bool
+) -> List[datetime]:
+    """Read up to SAMPLE_SIZE timestamps from a JSON-lines file."""
+    timestamps = []
+    opener = gzip.open if compressed else open
+    mode = "rt" if compressed else "r"
+    with opener(file_path, mode) as f:
+        for i, line in enumerate(f):
+            if i >= SAMPLE_SIZE:
+                break
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                obj = json.loads(line)
+                val = obj.get(ts_column)
+                if val is not None:
+                    ts = _parse_timestamp(val)
+                    if ts:
+                        timestamps.append(ts)
+            except (json.JSONDecodeError, KeyError):
+                continue
+    return timestamps
+
+
+def _read_timestamps_from_csv(
+    file_path: str, ts_column: str
+) -> List[datetime]:
+    """Read up to SAMPLE_SIZE timestamps from a CSV file."""
+    import csv
+    timestamps = []
+    with open(file_path, "r", newline="") as f:
+        reader = csv.DictReader(f)
+        if ts_column not in (reader.fieldnames or []):
+            print(
+                f"WARNING: Column '{ts_column}' not found in CSV. "
+                f"Available: {reader.fieldnames}"
+            )
+            return []
+        for i, row in enumerate(reader):
+            if i >= SAMPLE_SIZE:
+                break
+            ts = _parse_timestamp(row[ts_column])
+            if ts:
+                timestamps.append(ts)
+    return timestamps
+
+
+def detect_timestamps(
+    data_file: str, data_file_format: str, ts_column: str
+) -> tuple:
+    """Return (min_ts, max_ts) from a sample of the data file."""
+    fmt = data_file_format.lower()
+    if fmt in ("json.gz", "jsonl.gz"):
+        timestamps = _read_timestamps_from_json(data_file, ts_column, compressed=True)
+    elif fmt in ("json", "jsonl"):
+        timestamps = _read_timestamps_from_json(data_file, ts_column, compressed=False)
+    elif fmt == "csv":
+        timestamps = _read_timestamps_from_csv(data_file, ts_column)
+    else:
+        print(f"ERROR: Unsupported data file format: {data_file_format}")
+        sys.exit(1)
+
+    if not timestamps:
+        print(
+            f"ERROR: No '{ts_column}' timestamps found in the first {SAMPLE_SIZE} "
+            f"rows of {data_file}"
+        )
+        sys.exit(1)
+
+    return min(timestamps), max(timestamps)
+
+
+def _snap_to_window_boundary(ts: datetime, window_size: int) -> datetime:
+    """Round a timestamp up to the next window boundary (epoch-aligned).
+
+    Arroyo tumbling windows are aligned to epoch multiples of window_size.
+    Querying at a non-boundary timestamp will miss the sketch.
+    """
+    epoch_sec = int(ts.timestamp())
+    remainder = epoch_sec % window_size
+    if remainder == 0:
+        return ts
+    snapped = epoch_sec + (window_size - remainder)
+    return datetime.fromtimestamp(snapped, tz=timezone.utc)
+
+
+def generate_window_ends(
+    min_ts: datetime,
+    max_ts: datetime,
+    window_size: int,
+    stride: int,
+    num_queries: int,
+) -> List[datetime]:
+    """Generate evenly-spaced window-end timestamps within [min_ts, max_ts].
+
+    Timestamps are snapped to epoch-aligned window boundaries so that
+    Arroyo's tumbling window sketches can be found by QueryEngineRust.
+    """
+    # First valid window-end: snap to next boundary after min_ts + window_size
+    earliest = min_ts + timedelta(seconds=window_size)
+    start = _snap_to_window_boundary(earliest, window_size)
+    if start >= max_ts:
+        print(
+            f"WARNING: window_size ({window_size}s) exceeds the data time range "
+            f"({(max_ts - min_ts).total_seconds():.0f}s). Using max_ts as only endpoint."
+        )
+        return [max_ts]
+
+    ends = []
+    current = start
+    while current <= max_ts and len(ends) < num_queries:
+        ends.append(current)
+        current += timedelta(seconds=stride)
+
+    return ends
+
+
+def format_ts(ts: datetime, ts_format: str) -> str:
+    """Format a timestamp for SQL injection."""
+    if ts_format == "iso":
+        return ts.strftime("%Y-%m-%dT%H:%M:%SZ")
+    else:  # datetime
+        return ts.strftime("%Y-%m-%d %H:%M:%S")
+
+
+def generate_sql_files(
+    table_name: str,
+    ts_column: str,
+    value_column: str,
+    group_by_columns: List[str],
+    quantile: float,
+    window_size: int,
+    window_ends: List[datetime],
+    ts_format: str,
+    window_form: str,
+    output_prefix: str,
+):
+    """Write the paired ASAP and ClickHouse SQL files."""
+    group_by_clause = ", ".join(group_by_columns)
+    asap_lines = []
+    ch_lines = []
+
+    for i, end_ts in enumerate(window_ends):
+        end_str = format_ts(end_ts, ts_format)
+        start_ts = end_ts - timedelta(seconds=window_size)
+        start_str = format_ts(start_ts, ts_format)
+        label = f"T{i:03d}"
+        desc = f"quantile window ending at {end_str}"
+
+        if window_form == "dateadd":
+            where_clause = (
+                f"{ts_column} BETWEEN DATEADD(s, -{window_size}, '{end_str}') AND '{end_str}'"
+            )
+        else:
+            where_clause = (
+                f"{ts_column} BETWEEN '{start_str}' AND '{end_str}'"
+            )
+
+        asap_sql = (
+            f"-- {label}: {desc}\n"
+            f"SELECT QUANTILE({quantile}, {value_column}) FROM {table_name} "
+            f"WHERE {where_clause} GROUP BY {group_by_clause};"
+        )
+        ch_sql = (
+            f"-- {label}: {desc}\n"
+            f"SELECT quantile({quantile})({value_column}) FROM {table_name} "
+            f"WHERE {where_clause} GROUP BY {group_by_clause};"
+        )
+
+        asap_lines.append(asap_sql)
+        ch_lines.append(ch_sql)
+
+    asap_file = f"{output_prefix}_asap.sql"
+    ch_file = f"{output_prefix}_clickhouse.sql"
+
+    Path(asap_file).parent.mkdir(parents=True, exist_ok=True)
+
+    with open(asap_file, "w") as f:
+        f.write("\n".join(asap_lines) + "\n")
+
+    with open(ch_file, "w") as f:
+        f.write("\n".join(ch_lines) + "\n")
+
+    print(f"Generated {len(window_ends)} queries:")
+    print(f"  ASAP:       {asap_file}")
+    print(f"  ClickHouse: {ch_file}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate paired ASAP + ClickHouse SQL query files",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+    # Table/column config
+    parser.add_argument("--table-name", required=True)
+    parser.add_argument("--ts-column", required=True, help="Timestamp column name")
+    parser.add_argument("--value-column", required=True, help="Column to compute quantile on")
+    parser.add_argument(
+        "--group-by-columns",
+        required=True,
+        help="Comma-separated GROUP BY columns",
+    )
+    # Query parameters
+    parser.add_argument("--quantile", type=float, default=0.95)
+    parser.add_argument("--window-size", type=int, default=10, help="Window size in seconds")
+    parser.add_argument("--num-queries", type=int, default=50)
+    parser.add_argument(
+        "--ts-format",
+        choices=["iso", "datetime"],
+        default="iso",
+        help="Timestamp format in SQL: iso='YYYY-MM-DDTHH:MM:SSZ', datetime='YYYY-MM-DD HH:MM:SS' (default: iso)",
+    )
+    parser.add_argument(
+        "--window-form",
+        choices=["explicit", "dateadd"],
+        default="explicit",
+        help="SQL window form: explicit='BETWEEN start AND end', dateadd='BETWEEN DATEADD(s,-N,end) AND end' (default: explicit)",
+    )
+    parser.add_argument(
+        "--output-prefix",
+        required=True,
+        help="Output file prefix (e.g. ./queries/clickbench → clickbench_asap.sql + clickbench_clickhouse.sql)",
+    )
+    # Timestamp sources (mutually exclusive)
+    ts_group = parser.add_mutually_exclusive_group(required=True)
+    ts_group.add_argument(
+        "--auto-detect-timestamps",
+        action="store_true",
+        help="Scan data file to determine time range",
+    )
+    ts_group.add_argument(
+        "--timestamps-file",
+        default=None,
+        help="File with explicit window-end timestamps (one ISO timestamp per line)",
+    )
+    # Auto-detect options
+    parser.add_argument(
+        "--data-file",
+        default=None,
+        help="Path to data file (required with --auto-detect-timestamps)",
+    )
+    parser.add_argument(
+        "--data-file-format",
+        choices=["json", "jsonl", "json.gz", "jsonl.gz", "csv"],
+        default="json",
+        help="Data file format (default: json)",
+    )
+    parser.add_argument(
+        "--stride-seconds",
+        type=int,
+        default=None,
+        help="Spacing between window-end timestamps (default: window-size * 3)",
+    )
+
+    args = parser.parse_args()
+
+    if args.auto_detect_timestamps and not args.data_file:
+        parser.error("--data-file is required when --auto-detect-timestamps is set")
+
+    group_by_columns = [c.strip() for c in args.group_by_columns.split(",")]
+    stride = args.stride_seconds if args.stride_seconds else args.window_size * 3
+
+    # Determine window-end timestamps
+    if args.timestamps_file:
+        window_ends = []
+        with open(args.timestamps_file) as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                ts = _parse_timestamp(line)
+                if ts:
+                    window_ends.append(ts)
+                else:
+                    print(f"WARNING: Could not parse timestamp: {line!r}")
+        if not window_ends:
+            print("ERROR: No valid timestamps found in --timestamps-file")
+            sys.exit(1)
+        window_ends = window_ends[: args.num_queries]
+        print(
+            f"Using {len(window_ends)} timestamps from {args.timestamps_file} "
+            f"({window_ends[0]} – {window_ends[-1]})"
+        )
+    else:
+        print(f"Scanning {args.data_file} for timestamp range...")
+        min_ts, max_ts = detect_timestamps(
+            args.data_file, args.data_file_format, args.ts_column
+        )
+        print(f"  Detected range: {min_ts} – {max_ts}")
+        window_ends = generate_window_ends(
+            min_ts, max_ts, args.window_size, stride, args.num_queries
+        )
+        print(
+            f"  Generated {len(window_ends)} window endpoints "
+            f"(stride={stride}s, window={args.window_size}s)"
+        )
+
+    generate_sql_files(
+        table_name=args.table_name,
+        ts_column=args.ts_column,
+        value_column=args.value_column,
+        group_by_columns=group_by_columns,
+        quantile=args.quantile,
+        window_size=args.window_size,
+        window_ends=window_ends,
+        ts_format=args.ts_format,
+        window_form=args.window_form,
+        output_prefix=args.output_prefix,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/asap-tools/execution-utilities/benchmark/prepare_data.py b/asap-tools/execution-utilities/benchmark/prepare_data.py
new file mode 100644
index 00000000..33bc207d
--- /dev/null
+++ b/asap-tools/execution-utilities/benchmark/prepare_data.py
@@ -0,0 +1,187 @@
+#!/usr/bin/env python3
+"""
+Prepare data files for use with the Arroyo file source.
+
+The Arroyo file source (single_file_custom connector) requires:
+  - JSON-lines format
+  - Timestamps in RFC3339 format (e.g. "2013-07-14T20:38:47Z")
+  - Metadata columns (GROUP BY columns) as strings
+  - Value columns as floats
+
+This script converts raw downloaded datasets into the right format.
+
+Usage:
+    # ClickBench: convert hits.json.gz → hits_arroyo.json
+    python prepare_data.py --dataset clickbench \\
+        --input ./data/hits.json.gz \\
+        --output ./data/hits_arroyo.json \\
+        [--max-rows 1000000]
+
+    # H2O: convert G1_1e7_1e2_0_0.csv → h2o_arroyo.json (adds synthetic timestamps)
+    python prepare_data.py --dataset h2o \\
+        --input ./data/G1_1e7_1e2_0_0.csv \\
+        --output ./data/h2o_arroyo.json \\
+        [--max-rows 1000000]
+"""
+
+import argparse
+import gzip
+import json
+import sys
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+
+# Synthetic timestamp base for H2O (2024-01-01T00:00:00Z)
+H2O_BASE_EPOCH = 1704067200
+H2O_ROWS_PER_SECOND = 1000
+
+# ClickBench columns needed by Arroyo (must match streaming_config.yaml)
+CB_TIMESTAMP_FIELD = "EventTime"
+CB_VALUE_FIELDS = ["ResolutionWidth"]
+CB_METADATA_FIELDS = ["RegionID", "OS", "UserAgent", "TraficSourceID"]
+CB_KEEP_FIELDS = [CB_TIMESTAMP_FIELD] + CB_VALUE_FIELDS + CB_METADATA_FIELDS
+
+# H2O columns
+H2O_TIMESTAMP_FIELD = "timestamp"
+H2O_METADATA_FIELDS = ["id1", "id2"]
+H2O_VALUE_FIELDS = ["v1"]
+
+
+def _parse_clickbench_ts(ts_str: str) -> str:
+    """Convert 'YYYY-MM-DD HH:MM:SS' → 'YYYY-MM-DDTHH:MM:SSZ' (RFC3339)."""
+    try:
+        dt = datetime.strptime(ts_str, "%Y-%m-%d %H:%M:%S")
+        return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
+    except ValueError:
+        return ts_str  # already RFC3339 or unknown format
+
+
+def prepare_clickbench(input_path: str, output_path: str, max_rows: int = 0):
+    """Convert hits.json.gz to Arroyo-compatible JSON.
+
+    - Converts EventTime to RFC3339
+    - Stringifies integer metadata columns (RegionID, OS, UserAgent, TraficSourceID)
+    - Sorts by EventTime (required for Arroyo event-time watermarks)
+    - Writes only the fields needed by the streaming config
+    """
+    print(f"Reading {input_path}...")
+    records = []
+
+    opener = gzip.open if input_path.endswith(".gz") else open
+    with opener(input_path, "rt") as f:
+        for i, line in enumerate(f):
+            if max_rows > 0 and i >= max_rows:
+                break
+            if i % 100_000 == 0 and i > 0:
+                print(f"  Read {i:,} rows...", end="\r")
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                obj = json.loads(line)
+            except json.JSONDecodeError:
+                continue
+
+            ts = _parse_clickbench_ts(str(obj.get(CB_TIMESTAMP_FIELD, "")))
+            record = {CB_TIMESTAMP_FIELD: ts}
+            for col in CB_VALUE_FIELDS:
+                record[col] = float(obj.get(col, 0))
+            for col in CB_METADATA_FIELDS:
+                record[col] = str(obj.get(col, ""))
+            records.append(record)
+
+    print(f"\nSorting {len(records):,} records by {CB_TIMESTAMP_FIELD}...")
+    records.sort(key=lambda r: r[CB_TIMESTAMP_FIELD])
+
+    print(f"Writing to {output_path}...")
+    with open(output_path, "w") as f:
+        for record in records:
+            f.write(json.dumps(record) + "\n")
+
+    print(f"Done. {len(records):,} records written.")
+    if records:
+        print(f"  Time range: {records[0][CB_TIMESTAMP_FIELD]} – {records[-1][CB_TIMESTAMP_FIELD]}")
+
+
+def prepare_h2o(input_path: str, output_path: str, max_rows: int = 0):
+    """Convert H2O CSV to Arroyo-compatible JSON with synthetic timestamps.
+
+    - Adds synthetic RFC3339 timestamps at H2O_ROWS_PER_SECOND rows/sec
+      starting from 2024-01-01T00:00:00Z
+    - Converts id4, id5, id6 to strings (metadata columns are expected as strings)
+    """
+    print(f"Reading {input_path}...")
+    count = 0
+
+    with open(input_path, "r", encoding="utf-8") as fin, \
+         open(output_path, "w") as fout:
+
+        header = fin.readline().strip()
+        cols = header.split(",")
+        id_idx = {c: i for i, c in enumerate(cols)}
+
+        for i, line in enumerate(fin):
+            if max_rows > 0 and i >= max_rows:
+                break
+            if i % 100_000 == 0 and i > 0:
+                print(f"  Written {i:,} rows...", end="\r")
+
+            parts = line.rstrip("\n").split(",")
+            abs_sec = H2O_BASE_EPOCH + i // H2O_ROWS_PER_SECOND
+            ms = i % H2O_ROWS_PER_SECOND
+            ts = datetime.fromtimestamp(abs_sec, tz=timezone.utc)
+            ts_str = ts.strftime("%Y-%m-%dT%H:%M:%S") + f".{ms:03d}Z"
+
+            record = {
+                H2O_TIMESTAMP_FIELD: ts_str,
+                "id1": parts[id_idx["id1"]],
+                "id2": parts[id_idx["id2"]],
+                "id3": parts[id_idx["id3"]],
+                "id4": int(parts[id_idx["id4"]]),
+                "id5": int(parts[id_idx["id5"]]),
+                "id6": int(parts[id_idx["id6"]]),
+                "v1": float(parts[id_idx["v1"]]),
+                "v2": float(parts[id_idx["v2"]]),
+                "v3": float(parts[id_idx["v3"]]),
+            }
+            fout.write(json.dumps(record) + "\n")
+            count += 1
+
+    print(f"\nDone. {count:,} records written to {output_path}.")
+    first_ts = datetime.fromtimestamp(H2O_BASE_EPOCH, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+    last_ts = datetime.fromtimestamp(H2O_BASE_EPOCH + count // H2O_ROWS_PER_SECOND, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+    print(f"  Time range: {first_ts} – {last_ts}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Prepare dataset files for Arroyo file source",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+    parser.add_argument(
+        "--dataset",
+        choices=["clickbench", "h2o"],
+        required=True,
+        help="Dataset type to prepare",
+    )
+    parser.add_argument("--input", required=True, help="Path to raw input file")
+    parser.add_argument("--output", required=True, help="Path to write prepared JSON file")
+    parser.add_argument(
+        "--max-rows",
+        type=int,
+        default=0,
+        help="Max rows to process (0 = all, default: 0)",
+    )
+    args = parser.parse_args()
+
+    Path(args.output).parent.mkdir(parents=True, exist_ok=True)
+
+    if args.dataset == "clickbench":
+        prepare_clickbench(args.input, args.output, args.max_rows)
+    else:
+        prepare_h2o(args.input, args.output, args.max_rows)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/asap-tools/execution-utilities/benchmark/requirements.txt b/asap-tools/execution-utilities/benchmark/requirements.txt
new file mode 100644
index 00000000..85676314
--- /dev/null
+++ b/asap-tools/execution-utilities/benchmark/requirements.txt
@@ -0,0 +1,5 @@
+requests>=2.28
+gdown>=4.7
+pyyaml>=6.0
+matplotlib>=3.7
+numpy>=1.24
diff --git a/asap-tools/execution-utilities/benchmark/run_benchmark.py b/asap-tools/execution-utilities/benchmark/run_benchmark.py
new file mode 100644
index 00000000..a196aced
--- /dev/null
+++ b/asap-tools/execution-utilities/benchmark/run_benchmark.py
@@ -0,0 +1,434 @@
+#!/usr/bin/env python3
+"""
+Unified benchmark runner: ASAP (QueryEngineRust) vs ClickHouse baseline.
+
+Reads SQL files generated by generate_queries.py, sends each query to the
+configured endpoint, and writes results to CSV. With --mode both, runs
+baseline then ASAP and generates a latency comparison plot.
+
+Usage:
+    # Both modes with comparison plot
+    python run_benchmark.py \\
+        --mode both \\
+        --asap-sql-file ./queries/clickbench_asap.sql \\
+        --baseline-sql-file ./queries/clickbench_clickhouse.sql \\
+        --output-dir ./results
+
+    # ASAP only
+    python run_benchmark.py \\
+        --mode asap \\
+        --asap-sql-file ./queries/h2o_asap.sql \\
+        --output-dir ./results
+
+    # Baseline only
+    python run_benchmark.py \\
+        --mode baseline \\
+        --baseline-sql-file ./queries/h2o_clickhouse.sql \\
+        --output-dir ./results
+"""
+
+import argparse
+import csv
+import re
+import time
+import urllib.parse
+from pathlib import Path
+from typing import List, Optional, Tuple
+
+import matplotlib.pyplot as plt
+import numpy as np
+import requests
+
+DEFAULT_ASAP_URL = "http://localhost:8088/clickhouse/query"
+DEFAULT_CLICKHOUSE_URL = "http://localhost:8123/?session_timezone=UTC"
+DEFAULT_OUTPUT_DIR = "./results"
+DEFAULT_OUTPUT_PREFIX = "benchmark"
+
+
+# ---------------------------------------------------------------------------
+# Query extraction
+# Reused from asap_query_latency/run_benchmark.py:extract_queries_from_sql()
+# ---------------------------------------------------------------------------
+
+
+def extract_queries_from_sql(sql_file: Path) -> List[Tuple[str, str]]:
+    """Extract (query_id, sql) pairs from an annotated SQL file.
+
+    Expects lines of the form:
+        -- T001: description
+        SELECT ... ;
+    """
+    with open(sql_file) as f:
+        content = f.read()
+    pattern = r"-- ([A-Za-z0-9_]+):[^\n]*\n(SELECT[^;]+;)"
+    return [
+        (qid, sql.strip())
+        for qid, sql in re.findall(pattern, content, re.DOTALL | re.IGNORECASE)
+    ]
+
+
+# ---------------------------------------------------------------------------
+# Query runner
+# Adapted from asap_benchmark_pipeline/run_benchmark.py:run_query()
+# Uses requests.Session for connection reuse across queries.
+# ---------------------------------------------------------------------------
+
+
+def run_query(
+    query: str,
+    endpoint_url: str,
+    session: requests.Session,
+    timeout: int = 30,
+    debug: bool = False,
+) -> Tuple[float, Optional[str], Optional[str]]:
+    """Send a single SQL query and return (latency_ms, result_text, error)."""
+    encoded_query = urllib.parse.quote(query)
+    separator = "&" if "?" in endpoint_url else "?"
+    url = f"{endpoint_url}{separator}query={encoded_query}"
+
+    try:
+        start = time.time()
+        response = session.get(url, timeout=timeout)
+        latency_ms = (time.time() - start) * 1000
+
+        if debug:
+            source = "OK" if response.status_code == 200 else f"HTTP {response.status_code}"
+            print(f"    [{source}] {latency_ms:.2f}ms")
+
+        if response.status_code == 200:
+            return latency_ms, response.text.strip(), None
+        else:
+            return latency_ms, None, f"HTTP {response.status_code}: {response.text[:200]}"
+    except requests.Timeout:
+        return timeout * 1000.0, None, "Timeout"
+    except Exception as e:
+        return 0.0, None, str(e)
+
+
+# ---------------------------------------------------------------------------
+# Benchmark runner
+# Consolidated from both asap_query_latency/run_benchmark.py and
+# asap_benchmark_pipeline/run_benchmark.py:run_benchmark().
+# ---------------------------------------------------------------------------
+
+
+def _infer_pattern(query_id: str) -> str:
+    if query_id.startswith("ST"):
+        return "SpatioTemporal"
+    if query_id.startswith("S"):
+        return "Spatial"
+    if query_id.startswith("T"):
+        return "Temporal"
+    if query_id.startswith("N"):
+        return "Nested"
+    if query_id.startswith("D"):
+        return "Dated"
+    if query_id.startswith("L"):
+        return "LongRange"
+    return "Unknown"
+
+
+def _latency_summary(latencies: List[float], label: str):
+    if not latencies:
+        return
+    s = sorted(latencies)
+    n = len(s)
+    print(f"\n{label} ({n} successful queries):")
+    print(
+        f"  min={s[0]:.2f}ms  avg={sum(s)/n:.2f}ms  "
+        f"p50={s[int(n*0.50)]:.2f}ms  p95={s[int(n*0.95)]:.2f}ms  max={s[-1]:.2f}ms"
+    )
+
+
+def run_benchmark(
+    sql_file: Path,
+    endpoint_url: str,
+    output_csv: Path,
+    mode: str,
+    query_filter: Optional[List[str]] = None,
+    timeout: int = 30,
+    repeat: int = 1,
+    debug: bool = False,
+    no_plot: bool = False,
+):
+    """Run all queries and write results to CSV.
+
+    CSV columns: query_id, query_pattern, latency_ms, result_rows,
+                 result_full, error, mode
+    """
+    print(f"\nRunning benchmark in {mode.upper()} mode...")
+    print(f"Endpoint: {endpoint_url}")
+    print(f"SQL file: {sql_file}")
+    print(f"Output:   {output_csv}")
+    if debug:
+        print("Debug: per-request HTTP status shown.")
+
+    queries = extract_queries_from_sql(sql_file)
+    if query_filter:
+        queries = [(qid, sql) for qid, sql in queries if qid in query_filter]
+    print(f"Found {len(queries)} queries (repeat={repeat})")
+
+    output_csv.parent.mkdir(parents=True, exist_ok=True)
+    session = requests.Session()
+    latencies_ok: List[float] = []
+    plot_latencies: List[float] = []
+
+    with open(output_csv, "w", newline="") as csvfile:
+        writer = csv.writer(csvfile)
+        writer.writerow(
+            ["query_id", "query_pattern", "latency_ms", "result_rows", "result_full", "error", "mode"]
+        )
+
+        for query_id, sql in queries:
+            pattern = _infer_pattern(query_id)
+            print(f"Running {query_id}...", end=" " if not debug else "\n", flush=True)
+
+            # Repeat and take median
+            trial_latencies = []
+            last_result, last_error = None, None
+            for _ in range(repeat):
+                lat, result, error = run_query(sql, endpoint_url, session, timeout, debug)
+                trial_latencies.append(lat)
+                last_result, last_error = result, error
+                if error:
+                    break  # don't retry on error
+
+            latency_ms = sorted(trial_latencies)[len(trial_latencies) // 2]
+
+            if last_error:
+                print(f"ERROR {last_error}")
+                writer.writerow([query_id, pattern, f"{latency_ms:.2f}", 0, "", last_error, mode])
+                plot_latencies.append(0.0)
+            else:
+                result_lines = last_result.strip().split("\n") if last_result else []
+                num_rows = len(result_lines)
+                preview = last_result.replace("\n", " | ")[:200] if last_result else ""
+                latencies_ok.append(latency_ms)
+                plot_latencies.append(latency_ms)
+                print(f"{latency_ms:.2f}ms ({num_rows} rows)")
+                writer.writerow(
+                    [query_id, pattern, f"{latency_ms:.2f}", num_rows, preview, "", mode]
+                )
+
+            time.sleep(0.1)
+
+    print(f"\nResults saved to {output_csv}")
+    _latency_summary(latencies_ok, f"Latency summary")
+
+    if not no_plot and plot_latencies:
+        _plot_single(plot_latencies, mode, output_csv.with_suffix(".png"))
+
+
+def _plot_single(latencies: List[float], mode: str, out_path: Path):
+    """Bar chart of per-query latency for a single mode."""
+    color = "#4682b4" if mode == "asap" else "#f4a460"
+    x = list(range(1, len(latencies) + 1))
+    plt.figure(figsize=(12, 5))
+    plt.bar(x, latencies, color=color, edgecolor="black")
+    plt.xlabel("Query Execution Order")
+    plt.ylabel("Latency (ms)")
+    plt.title(f"Query Latency — {mode.upper()} mode")
+    plt.grid(axis="y", linestyle="--", alpha=0.7)
+    plt.tight_layout()
+    plt.savefig(out_path, dpi=150)
+    plt.close()
+    print(f"Plot saved to {out_path}")
+
+
+def _plot_comparison(asap_csv: Path, baseline_csv: Path, out_path: Path):
+    """Two-panel comparison plot: per-query bars + speedup bars.
+
+    Adapted from asap_query_latency/plot_latency.py.
+    """
+    def _load(path):
+        rows = {}
+        with open(path) as f:
+            for row in csv.DictReader(f):
+                if not row["error"]:
+                    rows[row["query_id"]] = float(row["latency_ms"])
+        return rows
+
+    asap = _load(asap_csv)
+    base = _load(baseline_csv)
+    qids = sorted(set(asap) & set(base))
+    if not qids:
+        print("WARNING: No common query IDs for comparison plot.")
+        return
+
+    x = np.arange(len(qids))
+    a_vals = [asap[q] for q in qids]
+    b_vals = [base[q] for q in qids]
+    speedup = [b / a if a > 0 else 0 for a, b in zip(a_vals, b_vals)]
+
+    fig, (ax1, ax2) = plt.subplots(
+        2, 1, figsize=(14, 7), gridspec_kw={"height_ratios": [3, 1]}
+    )
+
+    w = 0.4
+    ax1.bar(x - w / 2, b_vals, w, label="ClickHouse baseline", color="#f4a460")
+    ax1.bar(x + w / 2, a_vals, w, label="ASAP (KLL sketch)", color="#4682b4")
+    ax1.set_xticks(x)
+    ax1.set_xticklabels(qids, rotation=90, fontsize=7)
+    ax1.set_ylabel("Latency (ms)")
+    ax1.set_title(
+        f"Query latency: ASAP vs ClickHouse baseline  "
+        f"(p50: {np.median(a_vals):.1f}ms vs {np.median(b_vals):.1f}ms)"
+    )
+    ax1.legend()
+    ax1.set_xlim(-0.6, len(qids) - 0.4)
+
+    ax2.bar(x, speedup, color="#2e8b57", width=0.7)
+    ax2.axhline(
+        np.mean(speedup),
+        color="red",
+        linewidth=1,
+        linestyle="--",
+        label=f"mean {np.mean(speedup):.1f}×",
+    )
+    ax2.set_xticks(x)
+    ax2.set_xticklabels(qids, rotation=90, fontsize=7)
+    ax2.set_ylabel("Speedup (×)")
+    ax2.legend(fontsize=8)
+    ax2.set_xlim(-0.6, len(qids) - 0.4)
+
+    plt.tight_layout()
+    plt.savefig(out_path, dpi=150)
+    plt.close()
+    print(f"Comparison plot saved to {out_path}")
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Benchmark ASAP vs ClickHouse baseline",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+    parser.add_argument(
+        "--mode",
+        choices=["asap", "baseline", "both"],
+        default="both",
+        help="Which mode(s) to run (default: both)",
+    )
+    parser.add_argument(
+        "--asap-sql-file",
+        default=None,
+        help="SQL file for ASAP mode (required if mode is asap or both)",
+    )
+    parser.add_argument(
+        "--baseline-sql-file",
+        default=None,
+        help="SQL file for baseline mode (required if mode is baseline or both)",
+    )
+    parser.add_argument(
+        "--asap-url",
+        default=DEFAULT_ASAP_URL,
+        help=f"QueryEngineRust endpoint (default: {DEFAULT_ASAP_URL})",
+    )
+    parser.add_argument(
+        "--clickhouse-url",
+        default=DEFAULT_CLICKHOUSE_URL,
+        help=f"ClickHouse HTTP URL (default: {DEFAULT_CLICKHOUSE_URL})",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default=DEFAULT_OUTPUT_DIR,
+        help=f"Directory for results (default: {DEFAULT_OUTPUT_DIR})",
+    )
+    parser.add_argument(
+        "--output-prefix",
+        default=DEFAULT_OUTPUT_PREFIX,
+        help=f"Prefix for output files (default: {DEFAULT_OUTPUT_PREFIX})",
+    )
+    parser.add_argument(
+        "--query-filter",
+        default=None,
+        help="Comma-separated query IDs to run (e.g. T000,T001)",
+    )
+    parser.add_argument(
+        "--repeat",
+        type=int,
+        default=1,
+        help="Run each query N times and report the median (default: 1)",
+    )
+    parser.add_argument(
+        "--timeout",
+        type=int,
+        default=30,
+        help="Per-query timeout in seconds (default: 30)",
+    )
+    parser.add_argument(
+        "--debug",
+        action="store_true",
+        help="Show per-query HTTP status",
+    )
+    parser.add_argument(
+        "--no-plot",
+        action="store_true",
+        help="Do not generate any plots",
+    )
+    # Ignored flag for backward compatibility
+    parser.add_argument(
+        "--measure-pipeline-overhead",
+        action="store_true",
+        help="(No-op) Pipeline overhead measurement is not applicable with file source",
+    )
+
+    args = parser.parse_args()
+
+    if args.measure_pipeline_overhead:
+        print(
+            "WARNING: --measure-pipeline-overhead is not applicable when using "
+            "file source (no Kafka ingest). Ignoring."
+        )
+
+    # Validate required SQL files
+    if args.mode in ("asap", "both") and not args.asap_sql_file:
+        parser.error("--asap-sql-file is required when --mode is asap or both")
+    if args.mode in ("baseline", "both") and not args.baseline_sql_file:
+        parser.error("--baseline-sql-file is required when --mode is baseline or both")
+
+    output_dir = Path(args.output_dir)
+    prefix = args.output_prefix
+    query_filter = [q.strip() for q in args.query_filter.split(",")] if args.query_filter else None
+
+    asap_csv = output_dir / f"{prefix}_asap.csv"
+    baseline_csv = output_dir / f"{prefix}_baseline.csv"
+
+    if args.mode in ("baseline", "both"):
+        run_benchmark(
+            sql_file=Path(args.baseline_sql_file),
+            endpoint_url=args.clickhouse_url,
+            output_csv=baseline_csv,
+            mode="baseline",
+            query_filter=query_filter,
+            timeout=args.timeout,
+            repeat=args.repeat,
+            debug=args.debug,
+            no_plot=args.no_plot,
+        )
+
+    if args.mode in ("asap", "both"):
+        run_benchmark(
+            sql_file=Path(args.asap_sql_file),
+            endpoint_url=args.asap_url,
+            output_csv=asap_csv,
+            mode="asap",
+            query_filter=query_filter,
+            timeout=args.timeout,
+            repeat=args.repeat,
+            debug=args.debug,
+            no_plot=args.no_plot,
+        )
+
+    if args.mode == "both" and not args.no_plot:
+        comparison_png = output_dir / f"{prefix}_comparison.png"
+        _plot_comparison(asap_csv, baseline_csv, comparison_png)
+
+
+if __name__ == "__main__":
+    main()

From c0446360e181ccb898e4715f02a48f1fcfa117ac Mon Sep 17 00:00:00 2001
From: STWang <STWang@node0.stwang-297991.cloudmigration-pg0.utah.cloudlab.us>
Date: Wed, 8 Apr 2026 12:59:00 -0600
Subject: [PATCH 02/10] format; clickhouse can run

---
 asap-common/.gitignore                        |   1 +
 asap-query-engine/.gitignore                  |   1 +
 asap-query-engine/src/main.rs                 |   2 +-
 asap-summary-ingest/.gitignore                |   1 +
 .../templates/udfs/countminsketch_count.rs.j2 |   2 +-
 asap-tools/execution-utilities/.gitignore     |   3 +
 .../execution-utilities/benchmark/README.md   |  57 +++++----
 .../benchmark/download_dataset.py             |   7 +-
 .../benchmark/export_to_arroyo.py             | 116 +++++-------------
 .../benchmark/export_to_database.py           |  78 ++++++------
 .../benchmark/generate_queries.py             |  76 +++++-------
 .../benchmark/prepare_data.py                 |  22 ++--
 .../benchmark/run_benchmark.py                |  58 +++++----
 13 files changed, 200 insertions(+), 224 deletions(-)

diff --git a/asap-common/.gitignore b/asap-common/.gitignore
index 102b6eac..a3b80cf2 100644
--- a/asap-common/.gitignore
+++ b/asap-common/.gitignore
@@ -5,6 +5,7 @@
 .vscode/
 
 dependencies/py/promql_utilities/promql_utilities.egg-info/
+dependencies/py/promql_utilities/build/
 dependencies/rs/**/target/
 
 tests/**/*.json
diff --git a/asap-query-engine/.gitignore b/asap-query-engine/.gitignore
index eb5a316c..5c63ba3f 100644
--- a/asap-query-engine/.gitignore
+++ b/asap-query-engine/.gitignore
@@ -1 +1,2 @@
 target
+output/
diff --git a/asap-query-engine/src/main.rs b/asap-query-engine/src/main.rs
index fa589aa0..842fe284 100644
--- a/asap-query-engine/src/main.rs
+++ b/asap-query-engine/src/main.rs
@@ -541,4 +541,4 @@ fn setup_logging(
     info!("Logging initialized (respects RUST_LOG environment variable)");
     info!("Logs will be written to: {}/query_engine.log", output_dir);
     Ok(guard)
-}
+}
\ No newline at end of file
diff --git a/asap-summary-ingest/.gitignore b/asap-summary-ingest/.gitignore
index f7ee054e..49407f65 100644
--- a/asap-summary-ingest/.gitignore
+++ b/asap-summary-ingest/.gitignore
@@ -1,3 +1,4 @@
 __pycache__
 **/*.pyc
 **/*.swp
+outputs/
diff --git a/asap-summary-ingest/templates/udfs/countminsketch_count.rs.j2 b/asap-summary-ingest/templates/udfs/countminsketch_count.rs.j2
index b8c3d54d..b720e603 100644
--- a/asap-summary-ingest/templates/udfs/countminsketch_count.rs.j2
+++ b/asap-summary-ingest/templates/udfs/countminsketch_count.rs.j2
@@ -110,4 +110,4 @@ fn countminsketch_count(keys: Vec<&str>, values: Vec<f64>) -> Option<Vec<u8>> {
             .ok()?;
         Some(buf)
     }
-}
+}
\ No newline at end of file
diff --git a/asap-tools/execution-utilities/.gitignore b/asap-tools/execution-utilities/.gitignore
index c8760c7f..7704e7d1 100644
--- a/asap-tools/execution-utilities/.gitignore
+++ b/asap-tools/execution-utilities/.gitignore
@@ -7,6 +7,9 @@
 
 clickhouse-benchmark-pipeline/benchmark_results/
 **/data/
+benchmark/arroyo_outputs/
+benchmark/queries/
+benchmark/results/
 
 **/*.csv
 **/*.png
diff --git a/asap-tools/execution-utilities/benchmark/README.md b/asap-tools/execution-utilities/benchmark/README.md
index 9a0608cc..c45b171c 100644
--- a/asap-tools/execution-utilities/benchmark/README.md
+++ b/asap-tools/execution-utilities/benchmark/README.md
@@ -19,10 +19,6 @@ data_file → export_to_database.py  run_benchmark.py → results/
           ClickHouse :8123 (baseline)
 ```
 
-**Key difference from the old pipeline:** Arroyo reads directly from a local
-file (`single_file_custom` connector) rather than from a Kafka input topic.
-Kafka is still required for the **sketch output** topic (`sketch_topic`).
-
 ---
 
 ## Prerequisites
@@ -31,8 +27,8 @@ Kafka is still required for the **sketch output** topic (`sketch_topic`).
 export INSTALL_DIR=/scratch/sketch_db_for_prometheus
 pip3 install --user -r requirements.txt
 
-# Build binaries (one-time)
-cd ~/ASAPQuery/asap-query-engine && cargo build --release
+# Build binaries (one-time) — workspace target is at ~/ASAPQuery/target/release/
+cd ~/ASAPQuery && cargo build --release
 ```
 
 ---
@@ -60,6 +56,7 @@ The Arroyo file source requires RFC3339 timestamps and string metadata columns.
 This step converts the raw ClickBench JSON:
 
 ```bash
+cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
 python prepare_data.py \
     --dataset clickbench \
     --input ./data/hits.json.gz \
@@ -74,17 +71,19 @@ This produces `hits_arroyo.json` with:
 
 ### Step 3 — Start infrastructure
 
+Skip any service that is already running.
+
 ```bash
-# Kafka
+# Kafka — skip if `kafka-topics.sh --list` succeeds
 ~/ASAPQuery/asap-tools/installation/kafka/run.sh $INSTALL_DIR/kafka
 
-# Create sketch output topic
+# Create sketch output topic — skip if sketch_topic already exists
 KAFKA=$INSTALL_DIR/kafka/bin
 $KAFKA/kafka-topics.sh --bootstrap-server localhost:9092 --create \
     --topic sketch_topic --partitions 1 --replication-factor 1 \
     --config max.message.bytes=20971520
 
-# ClickHouse
+# ClickHouse — skip if port 8123 is already listening
 ~/ASAPQuery/asap-tools/installation/clickhouse/run.sh $INSTALL_DIR
 ```
 
@@ -99,9 +98,9 @@ $KAFKA/kafka-topics.sh --bootstrap-server localhost:9092 --create \
 ### Step 5 — Launch Arroyo sketch pipeline (file source)
 
 ```bash
+cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
 python export_to_arroyo.py \
     --streaming-config ./configs/clickbench_streaming.yaml \
-    --source-type file \
     --input-file ./data/hits_arroyo.json \
     --file-format json \
     --ts-format rfc3339 \
@@ -113,13 +112,13 @@ python export_to_arroyo.py \
 ### Step 6 — Start QueryEngineRust
 
 ```bash
-cd ~/ASAPQuery/asap-query-engine
+cd ~/ASAPQuery
 nohup ./target/release/query_engine_rust \
     --kafka-topic sketch_topic --input-format json \
     --config ~/ASAPQuery/asap-tools/execution-utilities/benchmark/configs/clickbench_inference.yaml \
     --streaming-config ~/ASAPQuery/asap-tools/execution-utilities/benchmark/configs/clickbench_streaming.yaml \
     --http-port 8088 --delete-existing-db --log-level DEBUG \
-    --output-dir ./output --streaming-engine arroyo \
+    --output-dir ./asap-query-engine/output --streaming-engine arroyo \
     --query-language SQL --lock-strategy per-key \
     --prometheus-scrape-interval 1 > /tmp/query_engine.log 2>&1 &
 ```
@@ -140,6 +139,7 @@ Verify: `$INSTALL_DIR/clickhouse client --query "SELECT count(*) FROM hits"`
 ### Step 8 — Generate SQL query files
 
 ```bash
+cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
 python generate_queries.py \
     --table-name hits \
     --ts-column EventTime \
@@ -155,15 +155,16 @@ python generate_queries.py \
     --output-prefix ./queries/clickbench
 ```
 
-This writes `queries/clickbench_asap.sql` and `queries/clickbench_clickhouse.sql`.
+This writes `queries/clickbench.sql`.
 
 ### Step 9 — Run benchmark
 
 ```bash
+cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
 python run_benchmark.py \
     --mode both \
-    --asap-sql-file ./queries/clickbench_asap.sql \
-    --baseline-sql-file ./queries/clickbench_clickhouse.sql \
+    --asap-sql-file ./queries/clickbench.sql \
+    --baseline-sql-file ./queries/clickbench.sql \
     --output-dir ./results \
     --output-prefix clickbench
 ```
@@ -178,12 +179,14 @@ Results: `results/clickbench_asap.csv`, `results/clickbench_baseline.csv`,
 ### Step 1 — Download dataset
 
 ```bash
+cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
 python download_dataset.py --dataset h2o --output-dir ./data
 ```
 
 ### Step 2 — Prepare data for Arroyo file source
 
 ```bash
+cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
 python prepare_data.py \
     --dataset h2o \
     --input ./data/G1_1e7_1e2_0_0.csv \
@@ -196,9 +199,9 @@ python prepare_data.py \
 ### Step 5 — Launch Arroyo sketch pipeline
 
 ```bash
+cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
 python export_to_arroyo.py \
     --streaming-config ./configs/h2o_streaming.yaml \
-    --source-type file \
     --input-file ./data/h2o_arroyo.json \
     --file-format json \
     --ts-format rfc3339 \
@@ -210,13 +213,13 @@ python export_to_arroyo.py \
 ### Step 6 — Start QueryEngineRust
 
 ```bash
-cd ~/ASAPQuery/asap-query-engine
+cd ~/ASAPQuery
 nohup ./target/release/query_engine_rust \
     --kafka-topic sketch_topic --input-format json \
     --config ~/ASAPQuery/asap-tools/execution-utilities/benchmark/configs/h2o_inference.yaml \
     --streaming-config ~/ASAPQuery/asap-tools/execution-utilities/benchmark/configs/h2o_streaming.yaml \
     --http-port 8088 --delete-existing-db --log-level DEBUG \
-    --output-dir ./output --streaming-engine arroyo \
+    --output-dir ./asap-query-engine/output --streaming-engine arroyo \
     --query-language SQL --lock-strategy per-key \
     --prometheus-scrape-interval 1 > /tmp/query_engine.log 2>&1 &
 ```
@@ -224,6 +227,7 @@ nohup ./target/release/query_engine_rust \
 ### Step 7 — Load data into ClickHouse (baseline)
 
 ```bash
+cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
 python export_to_database.py \
     --dataset h2o \
     --file-path ./data/G1_1e7_1e2_0_0.csv \
@@ -234,6 +238,7 @@ python export_to_database.py \
 ### Step 8 — Generate SQL query files
 
 ```bash
+cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
 python generate_queries.py \
     --table-name h2o_groupby \
     --ts-column timestamp \
@@ -251,10 +256,11 @@ python generate_queries.py \
 ### Step 9 — Run benchmark
 
 ```bash
+cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
 python run_benchmark.py \
     --mode both \
-    --asap-sql-file ./queries/h2o_asap.sql \
-    --baseline-sql-file ./queries/h2o_clickhouse.sql \
+    --asap-sql-file ./queries/h2o.sql \
+    --baseline-sql-file ./queries/h2o.sql \
     --output-dir ./results \
     --output-prefix h2o
 ```
@@ -264,6 +270,8 @@ python run_benchmark.py \
 ## Custom Dataset
 
 ```bash
+cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
+
 # 1. Download (any HTTP URL)
 python download_dataset.py --dataset custom \
     --custom-url https://example.com/mydata.json.gz \
@@ -274,7 +282,6 @@ python download_dataset.py --dataset custom \
 # 3. Export to Arroyo
 python export_to_arroyo.py \
     --streaming-config ./configs/my_streaming.yaml \
-    --source-type file \
     --input-file ./data/mydata.json \
     --file-format json \
     --ts-format rfc3339 \
@@ -303,8 +310,8 @@ python generate_queries.py \
 # 6. Run benchmark
 python run_benchmark.py \
     --mode both \
-    --asap-sql-file ./queries/my_dataset_asap.sql \
-    --baseline-sql-file ./queries/my_dataset_clickhouse.sql \
+    --asap-sql-file ./queries/my_dataset.sql \
+    --baseline-sql-file ./queries/my_dataset.sql \
     --output-dir ./results
 ```
 
@@ -337,8 +344,8 @@ $INSTALL_DIR/clickhouse client --query "TRUNCATE TABLE hits"
 |------|---------|
 | `download_dataset.py` | Download ClickBench, H2O, or custom datasets |
 | `prepare_data.py` | Convert raw data to Arroyo file source format (RFC3339, string columns) |
-| `export_to_arroyo.py` | Launch Arroyo sketch pipeline (file or kafka source) |
+| `export_to_arroyo.py` | Launch Arroyo sketch pipeline from a local file source |
 | `export_to_database.py` | Load data into ClickHouse for baseline |
-| `generate_queries.py` | Generate paired ASAP + ClickHouse SQL query files |
+| `generate_queries.py` | Generate a single SQL query file (database-style, compatible with both ASAP and ClickHouse) |
 | `run_benchmark.py` | Run queries and produce CSV results + plots |
 | `configs/` | Dataset-specific streaming/inference YAML and ClickHouse init SQL |
diff --git a/asap-tools/execution-utilities/benchmark/download_dataset.py b/asap-tools/execution-utilities/benchmark/download_dataset.py
index 5226ae53..750b5502 100644
--- a/asap-tools/execution-utilities/benchmark/download_dataset.py
+++ b/asap-tools/execution-utilities/benchmark/download_dataset.py
@@ -74,7 +74,11 @@ def download_clickbench(output_path: str, force: bool = False) -> str:
 
 def download_h2o(output_path: str, force: bool = False) -> str:
     """Download H2O groupby CSV (~300 MB) from Google Drive via gdown."""
-    if not force and os.path.exists(output_path) and os.path.getsize(output_path) > 100 * 1024 * 1024:
+    if (
+        not force
+        and os.path.exists(output_path)
+        and os.path.getsize(output_path) > 100 * 1024 * 1024
+    ):
         print(f"Using existing file: {output_path}")
         return output_path
 
@@ -83,6 +87,7 @@ def download_h2o(output_path: str, force: bool = False) -> str:
     except ImportError:
         print("Installing gdown...")
         import subprocess
+
         subprocess.check_call([sys.executable, "-m", "pip", "install", "gdown"])
         import gdown
 
diff --git a/asap-tools/execution-utilities/benchmark/export_to_arroyo.py b/asap-tools/execution-utilities/benchmark/export_to_arroyo.py
index 6e72af72..38533668 100644
--- a/asap-tools/execution-utilities/benchmark/export_to_arroyo.py
+++ b/asap-tools/execution-utilities/benchmark/export_to_arroyo.py
@@ -1,33 +1,18 @@
 #!/usr/bin/env python3
 """
-Launch an Arroyo sketch pipeline against a dataset.
+Launch an Arroyo sketch pipeline from a local file source.
 
-Supports two source modes:
-  file   (default): Arroyo reads directly from a local JSON/Parquet file.
-                    No Kafka input topic is required.
-  kafka:            Arroyo reads from a Kafka topic (legacy path).
-
-In both cases the sketch output is written to a Kafka topic (default:
-sketch_topic) for consumption by QueryEngineRust.
+Arroyo reads directly from a local JSON/Parquet file and writes sketches to
+a Kafka topic (default: sketch_topic) for consumption by QueryEngineRust.
 
 Usage:
-    # File source (recommended)
     python export_to_arroyo.py \\
         --streaming-config configs/clickbench_streaming.yaml \\
-        --source-type file \\
-        --input-file ./data/hits.json.gz \\
+        --input-file ./data/hits_arroyo.json \\
         --file-format json \\
         --ts-format rfc3339 \\
         --pipeline-name clickbench_pipeline \\
         --arroyosketch-dir ~/ASAPQuery/asap-summary-ingest
-
-    # Kafka source (legacy)
-    python export_to_arroyo.py \\
-        --streaming-config configs/h2o_streaming.yaml \\
-        --source-type kafka \\
-        --input-kafka-topic h2o_groupby \\
-        --pipeline-name h2o_pipeline \\
-        --arroyosketch-dir ~/ASAPQuery/asap-summary-ingest
 """
 
 import argparse
@@ -49,12 +34,7 @@ def wait_for_pipeline_running(
     arroyo_url: str = DEFAULT_ARROYO_URL,
     timeout: int = DEFAULT_WAIT_TIMEOUT,
 ) -> bool:
-    """Poll the Arroyo API until the named pipeline reaches RUNNING state.
-
-    Translated from asap_benchmark_pipeline/run_pipeline.sh lines 107-141.
-    A pipeline is considered running when its 'state' field is None and
-    'stop' is 'none' (Arroyo's representation of a healthy running pipeline).
-    """
+    """Poll the Arroyo API until the named pipeline reaches RUNNING state."""
     print(f"Waiting for pipeline '{pipeline_name}' to reach RUNNING state...")
     elapsed = 0
     while True:
@@ -85,45 +65,43 @@ def wait_for_pipeline_running(
         time.sleep(5)
         elapsed += 5
         if elapsed >= timeout:
-            print(
-                f"ERROR: Pipeline did not reach RUNNING state within {timeout}s"
-            )
+            print(f"ERROR: Pipeline did not reach RUNNING state within {timeout}s")
             return False
 
 
 def build_arroyosketch_cmd(args, arroyosketch_script: str) -> list:
     """Build the run_arroyosketch.py command from our CLI arguments."""
-    cmd = [
+    return [
         sys.executable,
         arroyosketch_script,
-        "--source_type", args.source_type,
-        "--output_format", "json",
-        "--pipeline_name", args.pipeline_name,
-        "--config_file_path", os.path.abspath(args.streaming_config),
-        "--output_kafka_topic", args.output_kafka_topic,
-        "--output_dir", os.path.abspath(args.output_dir),
-        "--parallelism", str(args.parallelism),
-        "--query_language", "sql",
+        "--source_type",
+        "file",
+        "--output_format",
+        "json",
+        "--pipeline_name",
+        args.pipeline_name,
+        "--config_file_path",
+        os.path.abspath(args.streaming_config),
+        "--output_kafka_topic",
+        args.output_kafka_topic,
+        "--output_dir",
+        os.path.abspath(args.output_dir),
+        "--parallelism",
+        str(args.parallelism),
+        "--query_language",
+        "sql",
+        "--input_file_path",
+        os.path.abspath(args.input_file),
+        "--file_format",
+        args.file_format,
+        "--ts_format",
+        args.ts_format,
     ]
 
-    if args.source_type == "file":
-        cmd += [
-            "--input_file_path", os.path.abspath(args.input_file),
-            "--file_format", args.file_format,
-            "--ts_format", args.ts_format,
-        ]
-    elif args.source_type == "kafka":
-        cmd += [
-            "--kafka_input_format", "json",
-            "--input_kafka_topic", args.input_kafka_topic,
-        ]
-
-    return cmd
-
 
 def main():
     parser = argparse.ArgumentParser(
-        description="Launch Arroyo sketch pipeline (file or kafka source)",
+        description="Launch Arroyo sketch pipeline from a local file source",
         formatter_class=argparse.RawDescriptionHelpFormatter,
         epilog=__doc__,
     )
@@ -132,17 +110,10 @@ def main():
         required=True,
         help="Path to streaming_config.yaml",
     )
-    parser.add_argument(
-        "--source-type",
-        choices=["file", "kafka"],
-        default="file",
-        help="Data source type (default: file)",
-    )
-    # File source args
     parser.add_argument(
         "--input-file",
-        default=None,
-        help="Path to input data file (required for --source-type file)",
+        required=True,
+        help="Path to input data file (JSON or Parquet)",
     )
     parser.add_argument(
         "--file-format",
@@ -156,13 +127,6 @@ def main():
         default="rfc3339",
         help="Timestamp format in the data file (default: rfc3339)",
     )
-    # Kafka source args
-    parser.add_argument(
-        "--input-kafka-topic",
-        default=None,
-        help="Kafka topic to read from (required for --source-type kafka)",
-    )
-    # Common args
     parser.add_argument(
         "--output-kafka-topic",
         default=DEFAULT_OUTPUT_KAFKA_TOPIC,
@@ -194,12 +158,6 @@ def main():
         default="./arroyo_outputs",
         help="Directory for Arroyo pipeline output artifacts (default: ./arroyo_outputs)",
     )
-    parser.add_argument(
-        "--wait-for-pipeline",
-        action="store_true",
-        default=True,
-        help="Poll until pipeline reaches RUNNING state (default: True)",
-    )
     parser.add_argument(
         "--no-wait",
         action="store_true",
@@ -214,12 +172,6 @@ def main():
 
     args = parser.parse_args()
 
-    # Validate source-specific required args
-    if args.source_type == "file" and not args.input_file:
-        parser.error("--input-file is required when --source-type file")
-    if args.source_type == "kafka" and not args.input_kafka_topic:
-        parser.error("--input-kafka-topic is required when --source-type kafka")
-
     arroyosketch_script = os.path.join(
         os.path.abspath(args.arroyosketch_dir), "run_arroyosketch.py"
     )
@@ -230,10 +182,10 @@ def main():
     os.makedirs(args.output_dir, exist_ok=True)
 
     cmd = build_arroyosketch_cmd(args, arroyosketch_script)
-    print(f"Launching Arroyo pipeline '{args.pipeline_name}' ({args.source_type} source)...")
+    print(f"Launching Arroyo pipeline '{args.pipeline_name}'...")
     print(f"Command: {' '.join(cmd)}")
 
-    result = subprocess.run(cmd)
+    result = subprocess.run(cmd, cwd=os.path.abspath(args.arroyosketch_dir))
     if result.returncode != 0:
         print(f"ERROR: run_arroyosketch.py exited with code {result.returncode}")
         sys.exit(result.returncode)
diff --git a/asap-tools/execution-utilities/benchmark/export_to_database.py b/asap-tools/execution-utilities/benchmark/export_to_database.py
index d9583641..9811917c 100644
--- a/asap-tools/execution-utilities/benchmark/export_to_database.py
+++ b/asap-tools/execution-utilities/benchmark/export_to_database.py
@@ -29,10 +29,8 @@
 import argparse
 import gzip
 import os
-import subprocess
 import sys
 from datetime import datetime, timezone
-from pathlib import Path
 
 import requests
 
@@ -77,11 +75,7 @@ def load_clickbench(
     skip_if_loaded: bool = False,
     max_rows: int = 0,
 ):
-    """Load hits.json.gz into ClickHouse.
-
-    Uses `zcat | clickhouse-client INSERT` for gzip-compressed JSON.
-    Adapted from asap_query_latency/run_benchmark.py:load_clickbench_data().
-    """
+    """Load hits.json.gz into ClickHouse via HTTP INSERT."""
     if not skip_table_init and init_sql_file:
         run_init_sql(clickhouse_url, init_sql_file)
 
@@ -96,21 +90,18 @@ def load_clickbench(
         return False
 
     print(f"Loading ClickBench data from {file_path}...")
-    if max_rows > 0:
-        # Pipe through head to limit rows
-        cmd = (
-            f"zcat {file_path} | head -n {max_rows} | "
-            f"clickhouse-client --query='INSERT INTO hits FORMAT JSONEachRow'"
-        )
-    else:
-        cmd = (
-            f"zcat {file_path} | "
-            f"clickhouse-client --query='INSERT INTO hits FORMAT JSONEachRow'"
-        )
 
-    result = subprocess.run(cmd, shell=True)
-    if result.returncode != 0:
-        print("ERROR: ClickHouse insert failed")
+    def _row_stream():
+        with gzip.open(file_path, "rt") as f:
+            for i, line in enumerate(f):
+                if max_rows > 0 and i >= max_rows:
+                    break
+                yield line.encode()
+
+    url = clickhouse_url.rstrip("/") + "/?query=INSERT+INTO+hits+FORMAT+JSONEachRow"
+    r = requests.post(url, data=_row_stream(), stream=True)
+    if not r.ok:
+        print(f"ERROR: ClickHouse insert failed: {r.text[:200]}")
         return False
 
     count = check_row_count(clickhouse_url, "hits")
@@ -218,30 +209,41 @@ def load_custom(
         return False
 
     path_lower = file_path.lower()
+    url = (
+        clickhouse_url.rstrip("/")
+        + f"/?query=INSERT+INTO+{table_name}+FORMAT+JSONEachRow"
+    )
+
+    def _stream_gzip():
+        with gzip.open(file_path, "rt") as f:
+            for i, line in enumerate(f):
+                if max_rows > 0 and i >= max_rows:
+                    break
+                yield line.encode()
+
+    def _stream_plain():
+        with open(file_path, "r") as f:
+            for i, line in enumerate(f):
+                if max_rows > 0 and i >= max_rows:
+                    break
+                yield line.encode()
+
     if path_lower.endswith(".json.gz") or path_lower.endswith(".jsonl.gz"):
-        head_cmd = f"| head -n {max_rows}" if max_rows > 0 else ""
-        cmd = (
-            f"zcat {file_path} {head_cmd} | "
-            f"clickhouse-client --query='INSERT INTO {table_name} FORMAT JSONEachRow'"
-        )
         print(f"Loading {file_path} into ClickHouse ({table_name})...")
-        result = subprocess.run(cmd, shell=True)
-        if result.returncode != 0:
-            print("ERROR: ClickHouse insert failed")
+        r = requests.post(url, data=_stream_gzip(), stream=True)
+        if not r.ok:
+            print(f"ERROR: ClickHouse insert failed: {r.text[:200]}")
             return False
     elif path_lower.endswith(".json") or path_lower.endswith(".jsonl"):
-        head_cmd = f"head -n {max_rows} {file_path} | " if max_rows > 0 else ""
-        cmd = (
-            f"{head_cmd}clickhouse-client --query='INSERT INTO {table_name} FORMAT JSONEachRow' "
-            f"< {file_path}"
-        )
         print(f"Loading {file_path} into ClickHouse ({table_name})...")
-        result = subprocess.run(cmd, shell=True)
-        if result.returncode != 0:
-            print("ERROR: ClickHouse insert failed")
+        r = requests.post(url, data=_stream_plain(), stream=True)
+        if not r.ok:
+            print(f"ERROR: ClickHouse insert failed: {r.text[:200]}")
             return False
     else:
-        print(f"ERROR: Unsupported file format for {file_path}. Use --dataset h2o for CSV.")
+        print(
+            f"ERROR: Unsupported file format for {file_path}. Use --dataset h2o for CSV."
+        )
         return False
 
     count = check_row_count(clickhouse_url, table_name)
diff --git a/asap-tools/execution-utilities/benchmark/generate_queries.py b/asap-tools/execution-utilities/benchmark/generate_queries.py
index 13989100..730b2efd 100644
--- a/asap-tools/execution-utilities/benchmark/generate_queries.py
+++ b/asap-tools/execution-utilities/benchmark/generate_queries.py
@@ -1,16 +1,17 @@
 #!/usr/bin/env python3
 """
-Generate paired ASAP and ClickHouse SQL query files for benchmarking.
+Generate a SQL query file for benchmarking ASAP and ClickHouse.
 
-Each query targets a fixed time window (window-end timestamp) and matches the
+Each query uses database-style quantile(q)(col) syntax, compatible with both
+QueryEngineRust and ClickHouse. Queries target fixed time windows and match the
 annotation format `-- T{NNN}: description` expected by run_benchmark.py.
 
 Output:
-  {prefix}_asap.sql        QUANTILE(q, col) syntax for QueryEngineRust
-  {prefix}_clickhouse.sql  quantile(q)(col) syntax for ClickHouse baseline
+  {prefix}.sql             quantile(q)(col) database-style syntax, compatible with both
+                           QueryEngineRust and ClickHouse baseline
 
 Usage:
-    # Auto-detect timestamps from data file
+    # Auto-detect timestamps from data file → ./queries/clickbench.sql
     python generate_queries.py \\
         --table-name hits \\
         --ts-column EventTime \\
@@ -23,7 +24,7 @@
         --data-file-format json.gz \\
         --output-prefix ./queries/clickbench
 
-    # Explicit timestamp file (one ISO timestamp per line)
+    # Explicit timestamp file (one ISO timestamp per line) → ./queries/h2o.sql
     python generate_queries.py \\
         --table-name h2o_groupby \\
         --ts-column timestamp \\
@@ -98,11 +99,10 @@ def _read_timestamps_from_json(
     return timestamps
 
 
-def _read_timestamps_from_csv(
-    file_path: str, ts_column: str
-) -> List[datetime]:
+def _read_timestamps_from_csv(file_path: str, ts_column: str) -> List[datetime]:
     """Read up to SAMPLE_SIZE timestamps from a CSV file."""
     import csv
+
     timestamps = []
     with open(file_path, "r", newline="") as f:
         reader = csv.DictReader(f)
@@ -121,9 +121,7 @@ def _read_timestamps_from_csv(
     return timestamps
 
 
-def detect_timestamps(
-    data_file: str, data_file_format: str, ts_column: str
-) -> tuple:
+def detect_timestamps(data_file: str, data_file_format: str, ts_column: str) -> tuple:
     """Return (min_ts, max_ts) from a sample of the data file."""
     fmt = data_file_format.lower()
     if fmt in ("json.gz", "jsonl.gz"):
@@ -199,7 +197,7 @@ def format_ts(ts: datetime, ts_format: str) -> str:
         return ts.strftime("%Y-%m-%d %H:%M:%S")
 
 
-def generate_sql_files(
+def generate_sql_file(
     table_name: str,
     ts_column: str,
     value_column: str,
@@ -211,10 +209,9 @@ def generate_sql_files(
     window_form: str,
     output_prefix: str,
 ):
-    """Write the paired ASAP and ClickHouse SQL files."""
+    """Write a single SQL file compatible with both ASAP and ClickHouse."""
     group_by_clause = ", ".join(group_by_columns)
-    asap_lines = []
-    ch_lines = []
+    lines = []
 
     for i, end_ts in enumerate(window_ends):
         end_str = format_ts(end_ts, ts_format)
@@ -224,42 +221,23 @@ def generate_sql_files(
         desc = f"quantile window ending at {end_str}"
 
         if window_form == "dateadd":
-            where_clause = (
-                f"{ts_column} BETWEEN DATEADD(s, -{window_size}, '{end_str}') AND '{end_str}'"
-            )
+            where_clause = f"{ts_column} BETWEEN DATEADD(s, -{window_size}, '{end_str}') AND '{end_str}'"
         else:
-            where_clause = (
-                f"{ts_column} BETWEEN '{start_str}' AND '{end_str}'"
-            )
+            where_clause = f"{ts_column} BETWEEN '{start_str}' AND '{end_str}'"
 
-        asap_sql = (
-            f"-- {label}: {desc}\n"
-            f"SELECT QUANTILE({quantile}, {value_column}) FROM {table_name} "
-            f"WHERE {where_clause} GROUP BY {group_by_clause};"
-        )
-        ch_sql = (
+        lines.append(
             f"-- {label}: {desc}\n"
             f"SELECT quantile({quantile})({value_column}) FROM {table_name} "
             f"WHERE {where_clause} GROUP BY {group_by_clause};"
         )
 
-        asap_lines.append(asap_sql)
-        ch_lines.append(ch_sql)
+    sql_file = f"{output_prefix}.sql"
+    Path(sql_file).parent.mkdir(parents=True, exist_ok=True)
 
-    asap_file = f"{output_prefix}_asap.sql"
-    ch_file = f"{output_prefix}_clickhouse.sql"
+    with open(sql_file, "w") as f:
+        f.write("\n".join(lines) + "\n")
 
-    Path(asap_file).parent.mkdir(parents=True, exist_ok=True)
-
-    with open(asap_file, "w") as f:
-        f.write("\n".join(asap_lines) + "\n")
-
-    with open(ch_file, "w") as f:
-        f.write("\n".join(ch_lines) + "\n")
-
-    print(f"Generated {len(window_ends)} queries:")
-    print(f"  ASAP:       {asap_file}")
-    print(f"  ClickHouse: {ch_file}")
+    print(f"Generated {len(window_ends)} queries → {sql_file}")
 
 
 def main():
@@ -271,7 +249,9 @@ def main():
     # Table/column config
     parser.add_argument("--table-name", required=True)
     parser.add_argument("--ts-column", required=True, help="Timestamp column name")
-    parser.add_argument("--value-column", required=True, help="Column to compute quantile on")
+    parser.add_argument(
+        "--value-column", required=True, help="Column to compute quantile on"
+    )
     parser.add_argument(
         "--group-by-columns",
         required=True,
@@ -279,7 +259,9 @@ def main():
     )
     # Query parameters
     parser.add_argument("--quantile", type=float, default=0.95)
-    parser.add_argument("--window-size", type=int, default=10, help="Window size in seconds")
+    parser.add_argument(
+        "--window-size", type=int, default=10, help="Window size in seconds"
+    )
     parser.add_argument("--num-queries", type=int, default=50)
     parser.add_argument(
         "--ts-format",
@@ -296,7 +278,7 @@ def main():
     parser.add_argument(
         "--output-prefix",
         required=True,
-        help="Output file prefix (e.g. ./queries/clickbench → clickbench_asap.sql + clickbench_clickhouse.sql)",
+        help="Output file prefix (e.g. ./queries/clickbench → clickbench.sql)",
     )
     # Timestamp sources (mutually exclusive)
     ts_group = parser.add_mutually_exclusive_group(required=True)
@@ -372,7 +354,7 @@ def main():
             f"(stride={stride}s, window={args.window_size}s)"
         )
 
-    generate_sql_files(
+    generate_sql_file(
         table_name=args.table_name,
         ts_column=args.ts_column,
         value_column=args.value_column,
diff --git a/asap-tools/execution-utilities/benchmark/prepare_data.py b/asap-tools/execution-utilities/benchmark/prepare_data.py
index 33bc207d..5b2d7b56 100644
--- a/asap-tools/execution-utilities/benchmark/prepare_data.py
+++ b/asap-tools/execution-utilities/benchmark/prepare_data.py
@@ -27,8 +27,7 @@
 import argparse
 import gzip
 import json
-import sys
-from datetime import datetime, timedelta, timezone
+from datetime import datetime, timezone
 from pathlib import Path
 
 # Synthetic timestamp base for H2O (2024-01-01T00:00:00Z)
@@ -100,7 +99,9 @@ def prepare_clickbench(input_path: str, output_path: str, max_rows: int = 0):
 
     print(f"Done. {len(records):,} records written.")
     if records:
-        print(f"  Time range: {records[0][CB_TIMESTAMP_FIELD]} – {records[-1][CB_TIMESTAMP_FIELD]}")
+        print(
+            f"  Time range: {records[0][CB_TIMESTAMP_FIELD]} – {records[-1][CB_TIMESTAMP_FIELD]}"
+        )
 
 
 def prepare_h2o(input_path: str, output_path: str, max_rows: int = 0):
@@ -113,8 +114,7 @@ def prepare_h2o(input_path: str, output_path: str, max_rows: int = 0):
     print(f"Reading {input_path}...")
     count = 0
 
-    with open(input_path, "r", encoding="utf-8") as fin, \
-         open(output_path, "w") as fout:
+    with open(input_path, "r", encoding="utf-8") as fin, open(output_path, "w") as fout:
 
         header = fin.readline().strip()
         cols = header.split(",")
@@ -148,8 +148,12 @@ def prepare_h2o(input_path: str, output_path: str, max_rows: int = 0):
             count += 1
 
     print(f"\nDone. {count:,} records written to {output_path}.")
-    first_ts = datetime.fromtimestamp(H2O_BASE_EPOCH, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
-    last_ts = datetime.fromtimestamp(H2O_BASE_EPOCH + count // H2O_ROWS_PER_SECOND, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+    first_ts = datetime.fromtimestamp(H2O_BASE_EPOCH, tz=timezone.utc).strftime(
+        "%Y-%m-%dT%H:%M:%SZ"
+    )
+    last_ts = datetime.fromtimestamp(
+        H2O_BASE_EPOCH + count // H2O_ROWS_PER_SECOND, tz=timezone.utc
+    ).strftime("%Y-%m-%dT%H:%M:%SZ")
     print(f"  Time range: {first_ts} – {last_ts}")
 
 
@@ -166,7 +170,9 @@ def main():
         help="Dataset type to prepare",
     )
     parser.add_argument("--input", required=True, help="Path to raw input file")
-    parser.add_argument("--output", required=True, help="Path to write prepared JSON file")
+    parser.add_argument(
+        "--output", required=True, help="Path to write prepared JSON file"
+    )
     parser.add_argument(
         "--max-rows",
         type=int,
diff --git a/asap-tools/execution-utilities/benchmark/run_benchmark.py b/asap-tools/execution-utilities/benchmark/run_benchmark.py
index a196aced..85c637a7 100644
--- a/asap-tools/execution-utilities/benchmark/run_benchmark.py
+++ b/asap-tools/execution-utilities/benchmark/run_benchmark.py
@@ -92,13 +92,19 @@ def run_query(
         latency_ms = (time.time() - start) * 1000
 
         if debug:
-            source = "OK" if response.status_code == 200 else f"HTTP {response.status_code}"
+            source = (
+                "OK" if response.status_code == 200 else f"HTTP {response.status_code}"
+            )
             print(f"    [{source}] {latency_ms:.2f}ms")
 
         if response.status_code == 200:
             return latency_ms, response.text.strip(), None
         else:
-            return latency_ms, None, f"HTTP {response.status_code}: {response.text[:200]}"
+            return (
+                latency_ms,
+                None,
+                f"HTTP {response.status_code}: {response.text[:200]}",
+            )
     except requests.Timeout:
         return timeout * 1000.0, None, "Timeout"
     except Exception as e:
@@ -176,7 +182,15 @@ def run_benchmark(
     with open(output_csv, "w", newline="") as csvfile:
         writer = csv.writer(csvfile)
         writer.writerow(
-            ["query_id", "query_pattern", "latency_ms", "result_rows", "result_full", "error", "mode"]
+            [
+                "query_id",
+                "query_pattern",
+                "latency_ms",
+                "result_rows",
+                "result_full",
+                "error",
+                "mode",
+            ]
         )
 
         for query_id, sql in queries:
@@ -187,7 +201,9 @@ def run_benchmark(
             trial_latencies = []
             last_result, last_error = None, None
             for _ in range(repeat):
-                lat, result, error = run_query(sql, endpoint_url, session, timeout, debug)
+                lat, result, error = run_query(
+                    sql, endpoint_url, session, timeout, debug
+                )
                 trial_latencies.append(lat)
                 last_result, last_error = result, error
                 if error:
@@ -197,7 +213,9 @@ def run_benchmark(
 
             if last_error:
                 print(f"ERROR {last_error}")
-                writer.writerow([query_id, pattern, f"{latency_ms:.2f}", 0, "", last_error, mode])
+                writer.writerow(
+                    [query_id, pattern, f"{latency_ms:.2f}", 0, "", last_error, mode]
+                )
                 plot_latencies.append(0.0)
             else:
                 result_lines = last_result.strip().split("\n") if last_result else []
@@ -207,13 +225,21 @@ def run_benchmark(
                 plot_latencies.append(latency_ms)
                 print(f"{latency_ms:.2f}ms ({num_rows} rows)")
                 writer.writerow(
-                    [query_id, pattern, f"{latency_ms:.2f}", num_rows, preview, "", mode]
+                    [
+                        query_id,
+                        pattern,
+                        f"{latency_ms:.2f}",
+                        num_rows,
+                        preview,
+                        "",
+                        mode,
+                    ]
                 )
 
             time.sleep(0.1)
 
     print(f"\nResults saved to {output_csv}")
-    _latency_summary(latencies_ok, f"Latency summary")
+    _latency_summary(latencies_ok, "Latency summary")
 
     if not no_plot and plot_latencies:
         _plot_single(plot_latencies, mode, output_csv.with_suffix(".png"))
@@ -240,6 +266,7 @@ def _plot_comparison(asap_csv: Path, baseline_csv: Path, out_path: Path):
 
     Adapted from asap_query_latency/plot_latency.py.
     """
+
     def _load(path):
         rows = {}
         with open(path) as f:
@@ -371,21 +398,8 @@ def main():
         action="store_true",
         help="Do not generate any plots",
     )
-    # Ignored flag for backward compatibility
-    parser.add_argument(
-        "--measure-pipeline-overhead",
-        action="store_true",
-        help="(No-op) Pipeline overhead measurement is not applicable with file source",
-    )
-
     args = parser.parse_args()
 
-    if args.measure_pipeline_overhead:
-        print(
-            "WARNING: --measure-pipeline-overhead is not applicable when using "
-            "file source (no Kafka ingest). Ignoring."
-        )
-
     # Validate required SQL files
     if args.mode in ("asap", "both") and not args.asap_sql_file:
         parser.error("--asap-sql-file is required when --mode is asap or both")
@@ -394,7 +408,9 @@ def main():
 
     output_dir = Path(args.output_dir)
     prefix = args.output_prefix
-    query_filter = [q.strip() for q in args.query_filter.split(",")] if args.query_filter else None
+    query_filter = (
+        [q.strip() for q in args.query_filter.split(",")] if args.query_filter else None
+    )
 
     asap_csv = output_dir / f"{prefix}_asap.csv"
     baseline_csv = output_dir / f"{prefix}_baseline.csv"

From 14dbd06d03c8a0090c706c42a4ee449543f3a606 Mon Sep 17 00:00:00 2001
From: Kavya Bhat <kavyabhat@gmail.com>
Date: Sat, 11 Apr 2026 07:32:19 -0600
Subject: [PATCH 03/10] rebase and add Elastic to pipeline

---
 .../benchmark/configs/h2o_inference.yaml      |   8 +-
 .../benchmark/configs/h2o_streaming.yaml      |   8 +-
 .../benchmark/export_to_database.py           | 195 ++++++++++++++++--
 .../benchmark/generate_queries.py             |  40 +++-
 .../benchmark/prepare_data.py                 |  14 +-
 .../benchmark/run_benchmark.py                | 173 ++++++++++++----
 6 files changed, 358 insertions(+), 80 deletions(-)

diff --git a/asap-tools/execution-utilities/benchmark/configs/h2o_inference.yaml b/asap-tools/execution-utilities/benchmark/configs/h2o_inference.yaml
index 0d1e45b0..fde732f9 100644
--- a/asap-tools/execution-utilities/benchmark/configs/h2o_inference.yaml
+++ b/asap-tools/execution-utilities/benchmark/configs/h2o_inference.yaml
@@ -4,8 +4,8 @@
 tables:
   - name: h2o_groupby
     time_column: timestamp
-    metadata_columns: [id1, id2]
-    value_columns: [v1]
+    metadata_columns: [id1, id2, id3, id4, id5, id6]
+    value_columns: [v1, v2, v3]
 
 cleanup_policy:
   name: read_based
@@ -15,6 +15,6 @@ queries:
     - aggregation_id: 12
       read_count_threshold: 999999
     query: |-
-      SELECT QUANTILE(0.95, v1) FROM h2o_groupby
+      SELECT PERCENTILE(v3, 95) FROM h2o_groupby
       WHERE timestamp BETWEEN DATEADD(s, -10, NOW()) AND NOW()
-      GROUP BY id1, id2;
+      GROUP BY id1, id2 ORDER BY id1, id2;
diff --git a/asap-tools/execution-utilities/benchmark/configs/h2o_streaming.yaml b/asap-tools/execution-utilities/benchmark/configs/h2o_streaming.yaml
index c500d696..9a7e6299 100644
--- a/asap-tools/execution-utilities/benchmark/configs/h2o_streaming.yaml
+++ b/asap-tools/execution-utilities/benchmark/configs/h2o_streaming.yaml
@@ -4,8 +4,8 @@
 tables:
   - name: h2o_groupby
     time_column: timestamp
-    metadata_columns: [id1, id2]
-    value_columns: [v1]
+    metadata_columns: [id1, id2, id3, id4, id5, id6]
+    value_columns: [v1, v2, v3]
 
 aggregations:
   # Temporal queries (10s window, all labels) - QUANTILE (DatasketchesKLL)
@@ -14,10 +14,10 @@ aggregations:
     aggregationSubType: ''
     labels:
       grouping: [id1, id2]
-      rollup: []
+      rollup: [id3, id4, id5, id6]
       aggregated: []
     table_name: h2o_groupby
-    value_column: v1
+    value_column: v3
     parameters:
       K: 200
     tumblingWindowSize: 10
diff --git a/asap-tools/execution-utilities/benchmark/export_to_database.py b/asap-tools/execution-utilities/benchmark/export_to_database.py
index 9811917c..79511a71 100644
--- a/asap-tools/execution-utilities/benchmark/export_to_database.py
+++ b/asap-tools/execution-utilities/benchmark/export_to_database.py
@@ -1,25 +1,35 @@
 #!/usr/bin/env python3
 """
-Load a dataset into ClickHouse for baseline comparison.
+Load a dataset into ClickHouse or Elasticsearch for baseline comparison.
 
 Supports ClickBench (hits.json.gz), H2O groupby CSV, or a custom table.
 
 Usage:
-    # ClickBench
+    # ClickBench to Clickhouse
     python export_to_database.py \\
-        --dataset clickbench \\
+        --dataset clickbench --database clickhouse \\
         --file-path ./data/hits.json.gz \\
         --init-sql-file ../clickhouse-benchmark-pipeline/clickhouse/clickbench_init.sql
 
-    # H2O
+    # H2O to Clickhouse
     python export_to_database.py \\
-        --dataset h2o \\
+        --dataset h2o --database clickhouse \\
         --file-path ./data/G1_1e7_1e2_0_0.csv \\
         --init-sql-file ../asap_benchmark_pipeline/h2o_init.sql
 
-    # Custom JSON file
+    # H2O to Elasticsearch
     python export_to_database.py \\
-        --dataset custom \\
+        --dataset h2o --database elasticsearch \\
+        --file-path ./data/G1_1e7_1e2_0_0.csv \\
+        --es-host localhost \\
+        --es-port 9200 \\
+        --es-index h2o_benchmark \\
+        --es-api-key your_api_key_here \\
+        --es-bulk-size 5000
+
+    # Custom JSON to ClickHouse
+    python export_to_database.py \\
+        --dataset custom --database clickhouse \\
         --file-path ./data/mydata.json \\
         --table-name mytable \\
         --ts-column event_time \\
@@ -27,7 +37,6 @@
 """
 
 import argparse
-import gzip
 import os
 import sys
 from datetime import datetime, timezone
@@ -39,6 +48,14 @@
 H2O_ROWS_PER_SECOND = 1000
 H2O_BASE_EPOCH = 1704067200  # 2024-01-01T00:00:00Z
 
+# Valid (dataset, database) combinations tested so far
+VALID_COMBINATIONS = {
+    ("clickbench", "clickhouse"),
+    ("h2o", "clickhouse"),
+    ("h2o", "elasticsearch"),
+    ("custom", "clickhouse"),
+}
+
 
 def _exec_clickhouse_sql(clickhouse_url: str, sql: str, label: str = ""):
     """Execute a SQL statement via the ClickHouse HTTP API."""
@@ -117,7 +134,7 @@ def _flush_h2o_batch(clickhouse_url: str, rows: list):
         raise RuntimeError(f"ClickHouse insert failed: {r.text[:200]}")
 
 
-def load_h2o(
+def load_h2o_clickhouse(
     clickhouse_url: str,
     file_path: str,
     init_sql_file: str = None,
@@ -178,6 +195,112 @@ def load_h2o(
     print(f"Loaded {total:,} rows into ClickHouse (h2o_groupby)")
     return True
 
+def load_h2o_elasticsearch(
+    es_host: str,
+    es_port: int,
+    index_name: str,
+    file_path: str,
+    api_key: str = None,
+    skip_if_loaded: bool = False,
+    max_rows: int = 0,
+):
+    """Load H2O groupby CSV into Elasticsearch with synthetic timestamps."""
+    try:
+        from elasticsearch import Elasticsearch, helpers
+    except ImportError:
+        print("ERROR: elasticsearch-py not installed. Run: pip install elasticsearch")
+        return False
+
+    auth = {"api_key": api_key} if api_key else {}
+    es = Elasticsearch(f"http://{es_host}:{es_port}", **auth)
+
+    if not es.ping():
+        print(f"ERROR: Cannot connect to Elasticsearch at {es_host}:{es_port}")
+        return False
+
+    if skip_if_loaded and es.indices.exists(index=index_name):
+        count = es.count(index=index_name)["count"]
+        if count > 0:
+            print(f"Data already loaded ({count:,} rows). Skipping.")
+            return True
+
+    if es.indices.exists(index=index_name):
+        print(f"Deleting existing index: {index_name}")
+        es.indices.delete(index=index_name)
+
+    print(f"Creating index: {index_name}")
+    es.indices.create(index=index_name, body={
+        "settings": {
+            "number_of_shards": 1,
+            "number_of_replicas": 0,
+            "refresh_interval": "30s",
+        },
+        "mappings": {
+            "properties": {
+                "timestamp": {"type": "date", "format": "epoch_millis"},
+                "id1": {"type": "keyword"},
+                "id2": {"type": "keyword"},
+                "id3": {"type": "keyword"},
+                "id4": {"type": "long"},
+                "id5": {"type": "long"},
+                "id6": {"type": "long"},
+                "v1": {"type": "long"},
+                "v2": {"type": "long"},
+                "v3": {"type": "double"},
+            }
+        },
+    })
+
+    if not os.path.exists(file_path):
+        print(f"ERROR: Data file not found: {file_path}")
+        return False
+
+    print(f"Importing H2O data from {file_path} into Elasticsearch ({index_name})...")
+
+    base_timestamp_ms = 1704067200000  # 2024-01-01T00:00:00Z in millis
+
+    def generate_docs():
+        with open(file_path, "r", encoding="utf-8") as f:
+            f.readline()  # skip header
+            for row_num, line in enumerate(f):
+                if max_rows > 0 and row_num >= max_rows:
+                    break
+                parts = line.rstrip("\n").split(",")
+                if len(parts) < 9:
+                    continue
+                yield {
+                    "_index": index_name,
+                    "_source": {
+                        "timestamp": base_timestamp_ms + row_num * 10,
+                        "id1": parts[0],
+                        "id2": parts[1],
+                        "id3": parts[2],
+                        "id4": int(parts[3] or 0),
+                        "id5": int(parts[4] or 0),
+                        "id6": int(parts[5] or 0),
+                        "v1":  int(parts[6] or 0),
+                        "v2":  int(parts[7] or 0),
+                        "v3":  float(parts[8] or 0.0),
+                    },
+                }
+
+    total = 0
+    errors = 0
+    for ok, _ in helpers.streaming_bulk(
+        es, generate_docs(), chunk_size=H2O_BATCH_SIZE, raise_on_error=False
+    ):
+        if ok:
+            total += 1
+        else:
+            errors += 1
+        if total % 500_000 == 0 and total > 0:
+            print(f"  Indexed {total:,} documents...")
+
+    print(f"Indexed {total:,} documents ({errors} errors)")
+    print("Refreshing index...")
+    es.indices.refresh(index=index_name)
+    print(f"✓ Import complete! Index: {index_name}")
+    return True
 
 def load_custom(
     clickhouse_url: str,
@@ -253,7 +376,7 @@ def _stream_plain():
 
 def main():
     parser = argparse.ArgumentParser(
-        description="Load a dataset into ClickHouse for baseline comparison",
+        description="Load a dataset into ClickHouse or Elasticsearch for baseline comparison",
         formatter_class=argparse.RawDescriptionHelpFormatter,
         epilog=__doc__,
     )
@@ -263,6 +386,12 @@ def main():
         required=True,
         help="Dataset type",
     )
+    parser.add_argument(
+        "--database",
+        choices=["clickhouse", "elasticsearch"],
+        required=True,
+        help="Target database",
+    )
     parser.add_argument(
         "--file-path",
         required=True,
@@ -311,8 +440,25 @@ def main():
         help="Maximum rows to load (0 = all)",
     )
 
+    # Elasticsearch-specific flags
+    es_group = parser.add_argument_group("Elasticsearch options (--database elasticsearch)")
+    es_group.add_argument("--es-host", default="localhost", help="Elasticsearch host")
+    es_group.add_argument("--es-port", type=int, default=9200, help="Elasticsearch port")
+    es_group.add_argument("--es-index", default="h2o_benchmark", help="Elasticsearch index name")
+    es_group.add_argument("--es-api-key", default=None, help="Elasticsearch API key")
+    es_group.add_argument("--es-bulk-size", type=int, default=5000, help="Bulk insert batch size")
+
     args = parser.parse_args()
 
+    # Validate (dataset, database) combination
+    combo = (args.dataset, args.database)
+    if combo not in VALID_COMBINATIONS:
+        valid = ", ".join(f"({d}/{db})" for d, db in sorted(VALID_COMBINATIONS))
+        parser.error(
+            f"--dataset {args.dataset} is not supported with --database {args.database}. "
+            f"Valid combinations: {valid}"
+        )
+
     if args.dataset == "custom" and not args.table_name:
         parser.error("--table-name is required when --dataset custom")
 
@@ -327,15 +473,26 @@ def main():
             max_rows=args.max_rows,
         )
     elif args.dataset == "h2o":
-        success = load_h2o(
-            args.clickhouse_url,
-            args.file_path,
-            init_sql_file=args.init_sql_file,
-            skip_table_init=args.skip_table_init,
-            skip_if_loaded=args.skip_if_loaded,
-            max_rows=args.max_rows,
-        )
-    else:
+        if args.database == "elasticsearch":
+            success = load_h2o_elasticsearch(
+                es_host=args.es_host,
+                es_port=args.es_port,
+                index_name=args.es_index,
+                file_path=args.file_path,
+                api_key=args.es_api_key,
+                skip_if_loaded=args.skip_if_loaded,
+                max_rows=args.max_rows,
+            )
+        else:
+            success = load_h2o_clickhouse(
+                args.clickhouse_url,
+                args.file_path,
+                init_sql_file=args.init_sql_file,
+                skip_table_init=args.skip_table_init,
+                skip_if_loaded=args.skip_if_loaded,
+                max_rows=args.max_rows,
+            )
+    elif args.dataset == "custom":
         success = load_custom(
             args.clickhouse_url,
             args.file_path,
diff --git a/asap-tools/execution-utilities/benchmark/generate_queries.py b/asap-tools/execution-utilities/benchmark/generate_queries.py
index 730b2efd..44590e67 100644
--- a/asap-tools/execution-utilities/benchmark/generate_queries.py
+++ b/asap-tools/execution-utilities/benchmark/generate_queries.py
@@ -211,7 +211,12 @@ def generate_sql_file(
 ):
     """Write a single SQL file compatible with both ASAP and ClickHouse."""
     group_by_clause = ", ".join(group_by_columns)
-    lines = []
+    percentile = quantile * 100
+    # Strip trailing zero: 95.0 -> 95, 99.5 -> 99.5
+    percentile_str = f"{percentile:.1f}".rstrip("0").rstrip(".")
+
+    ch_lines = []
+    es_lines = []
 
     for i, end_ts in enumerate(window_ends):
         end_str = format_ts(end_ts, ts_format)
@@ -225,19 +230,40 @@ def generate_sql_file(
         else:
             where_clause = f"{ts_column} BETWEEN '{start_str}' AND '{end_str}'"
 
-        lines.append(
+        # Elasticsearch uses DATEADD + CAST form
+        es_where = (
+            f"{ts_column} BETWEEN DATEADD('s', -{window_size}, CAST('{end_str}' AS DATETIME)) "
+            f"AND CAST('{end_str}' AS DATETIME)"
+        )
+
+        ch_sql = (
             f"-- {label}: {desc}\n"
             f"SELECT quantile({quantile})({value_column}) FROM {table_name} "
             f"WHERE {where_clause} GROUP BY {group_by_clause};"
         )
 
-    sql_file = f"{output_prefix}.sql"
-    Path(sql_file).parent.mkdir(parents=True, exist_ok=True)
+        asap_lines.append(asap_sql)
+        ch_lines.append(ch_sql)
+        es_lines.append(
+            f"-- {label}: {desc}\n"
+            f"SELECT PERCENTILE({value_column}, {percentile_str}) FROM {table_name} "
+            f"WHERE {es_where} GROUP BY {group_by_clause};"
+        )
+
+    ch_file = f"{output_prefix}_clickhouse.sql"
+    es_file = f"{output_prefix}_elasticsearch.sql"
+
+    Path(asap_file).parent.mkdir(parents=True, exist_ok=True)
+
+    with open(ch_file, "w") as f:
+        f.write("\n".join(ch_lines) + "\n")
 
-    with open(sql_file, "w") as f:
-        f.write("\n".join(lines) + "\n")
+    with open(es_file, "w") as f:
+        f.write("\n".join(es_lines) + "\n")
 
-    print(f"Generated {len(window_ends)} queries → {sql_file}")
+    print(f"Generated {len(window_ends)} queries:")
+    print(f"  ClickHouse:    {ch_file}")
+    print(f"  Elasticsearch: {es_file}")
 
 
 def main():
diff --git a/asap-tools/execution-utilities/benchmark/prepare_data.py b/asap-tools/execution-utilities/benchmark/prepare_data.py
index 5b2d7b56..043c6e06 100644
--- a/asap-tools/execution-utilities/benchmark/prepare_data.py
+++ b/asap-tools/execution-utilities/benchmark/prepare_data.py
@@ -127,19 +127,15 @@ def prepare_h2o(input_path: str, output_path: str, max_rows: int = 0):
                 print(f"  Written {i:,} rows...", end="\r")
 
             parts = line.rstrip("\n").split(",")
-            abs_sec = H2O_BASE_EPOCH + i // H2O_ROWS_PER_SECOND
-            ms = i % H2O_ROWS_PER_SECOND
-            ts = datetime.fromtimestamp(abs_sec, tz=timezone.utc)
-            ts_str = ts.strftime("%Y-%m-%dT%H:%M:%S") + f".{ms:03d}Z"
-
+            abs_ms = H2O_BASE_EPOCH * 1000 + i * 10  # 10 ms per row
             record = {
-                H2O_TIMESTAMP_FIELD: ts_str,
+                H2O_TIMESTAMP_FIELD: abs_ms,
                 "id1": parts[id_idx["id1"]],
                 "id2": parts[id_idx["id2"]],
                 "id3": parts[id_idx["id3"]],
-                "id4": int(parts[id_idx["id4"]]),
-                "id5": int(parts[id_idx["id5"]]),
-                "id6": int(parts[id_idx["id6"]]),
+                "id4": str(parts[id_idx["id4"]]),
+                "id5": str(parts[id_idx["id5"]]),
+                "id6": str(parts[id_idx["id6"]]),
                 "v1": float(parts[id_idx["v1"]]),
                 "v2": float(parts[id_idx["v2"]]),
                 "v3": float(parts[id_idx["v3"]]),
diff --git a/asap-tools/execution-utilities/benchmark/run_benchmark.py b/asap-tools/execution-utilities/benchmark/run_benchmark.py
index 85c637a7..50ce4e84 100644
--- a/asap-tools/execution-utilities/benchmark/run_benchmark.py
+++ b/asap-tools/execution-utilities/benchmark/run_benchmark.py
@@ -1,28 +1,39 @@
 #!/usr/bin/env python3
 """
-Unified benchmark runner: ASAP (QueryEngineRust) vs ClickHouse baseline.
+Unified benchmark runner: ASAP (QueryEngineRust) vs ClickHouse/Elasticsearch baseline.
 
 Reads SQL files generated by generate_queries.py, sends each query to the
 configured endpoint, and writes results to CSV. With --mode both, runs
 baseline then ASAP and generates a latency comparison plot.
 
 Usage:
-    # Both modes with comparison plot
+    # Both modes, ClickHouse baseline
     python run_benchmark.py \\
-        --mode both \\
+        --mode both --database clickhouse \\
         --asap-sql-file ./queries/clickbench_asap.sql \\
         --baseline-sql-file ./queries/clickbench_clickhouse.sql \\
         --output-dir ./results
 
+    # Both modes, Elasticsearch baseline
+    python run_benchmark.py \\
+        --mode both --database elasticsearch \\
+        --asap-sql-file ./queries/h2o_asap.sql \\
+        --baseline-sql-file ./queries/h2o_elasticsearch.sql \\
+        --elastic-host localhost \\
+        --elastic-port 9200 \\
+        --elastic-api-key your_api_key_here \\
+        --output-dir ./results \\
+        --output-prefix h2o
+
     # ASAP only
     python run_benchmark.py \\
-        --mode asap \\
+        --mode asap --database clickhouse \\
         --asap-sql-file ./queries/h2o_asap.sql \\
         --output-dir ./results
 
     # Baseline only
     python run_benchmark.py \\
-        --mode baseline \\
+        --mode baseline --database clickhouse \\
         --baseline-sql-file ./queries/h2o_clickhouse.sql \\
         --output-dir ./results
 """
@@ -38,8 +49,12 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import requests
+import json
 
-DEFAULT_ASAP_URL = "http://localhost:8088/clickhouse/query"
+DEFAULT_ELASTIC_HOST = "localhost"
+DEFAULT_ELASTIC_PORT = 9200
+DEFAULT_ASAP_CLICKHOUSE_URL = "http://localhost:8088/clickhouse/query"
+DEFAULT_ASAP_ELASTIC_URL = "http://localhost:8088/_sql?format=json"
 DEFAULT_CLICKHOUSE_URL = "http://localhost:8123/?session_timezone=UTC"
 DEFAULT_OUTPUT_DIR = "./results"
 DEFAULT_OUTPUT_PREFIX = "benchmark"
@@ -80,15 +95,26 @@ def run_query(
     session: requests.Session,
     timeout: int = 30,
     debug: bool = False,
-) -> Tuple[float, Optional[str], Optional[str]]:
-    """Send a single SQL query and return (latency_ms, result_text, error)."""
-    encoded_query = urllib.parse.quote(query)
-    separator = "&" if "?" in endpoint_url else "?"
-    url = f"{endpoint_url}{separator}query={encoded_query}"
-
+    database: str = "clickhouse",
+    api_key: Optional[str] = None,
+    fetch_size: int = 1000,
+) -> Tuple[float, Optional[str], int, Optional[str]]:
+    """Send a single SQL query and return (latency_ms, result_text, num_rows, error)."""
     try:
         start = time.time()
-        response = session.get(url, timeout=timeout)
+
+        if database == "elasticsearch":
+            headers = {"Content-Type": "application/json"}
+            if api_key:
+                headers["Authorization"] = f"ApiKey {api_key}"
+            body = {"query": query.strip().rstrip(";"), "fetch_size": fetch_size}
+            response = session.post(endpoint_url, headers=headers, json=body, timeout=timeout)
+        else:
+            encoded_query = urllib.parse.quote(query)
+            separator = "&" if "?" in endpoint_url else "?"
+            url = f"{endpoint_url}{separator}query={encoded_query}"
+            response = session.get(url, timeout=timeout)
+
         latency_ms = (time.time() - start) * 1000
 
         if debug:
@@ -98,7 +124,45 @@ def run_query(
             print(f"    [{source}] {latency_ms:.2f}ms")
 
         if response.status_code == 200:
-            return latency_ms, response.text.strip(), None
+            if database == "elasticsearch":
+                data = response.json()
+
+                if "hits" in data:
+                    hits = data["hits"].get("hits", [])
+                    if hits:
+                        col_names = list(hits[0].get("_source", {}).keys())
+                        formatted_rows = [
+                            ", ".join(f"{k}={hit.get('_source', {}).get(k)}" for k in col_names)
+                            for hit in hits
+                        ]
+                        result_text = "\n".join(formatted_rows)
+                        num_rows = len(hits)
+                    else:
+                        result_text = ""
+                        num_rows = 0
+
+                elif "rows" in data:
+                    rows = data.get("rows", [])
+                    columns = data.get("columns", [])
+                    col_names = [c.get("name", f"col{i}") for i, c in enumerate(columns)]
+                    formatted_rows = [
+                        ", ".join(
+                            f"{col_names[i]}={v}" if i < len(col_names) else str(v)
+                            for i, v in enumerate(row)
+                        ) if isinstance(row, (list, tuple)) else str(row)
+                        for row in rows
+                    ]
+                    result_text = "\n".join(formatted_rows)
+                    num_rows = len(rows)
+
+                else:
+                    result_text = ""
+                    num_rows = 0
+            else:
+                result_text = response.text.strip()
+                num_rows = len(result_text.split("\n")) if result_text else 0
+
+            return latency_ms, result_text, num_rows, None
         else:
             return (
                 latency_ms,
@@ -106,9 +170,9 @@ def run_query(
                 f"HTTP {response.status_code}: {response.text[:200]}",
             )
     except requests.Timeout:
-        return timeout * 1000.0, None, "Timeout"
+        return timeout * 1000.0, None, 0, "Timeout"
     except Exception as e:
-        return 0.0, None, str(e)
+        return 0.0, None, 0, str(e)
 
 
 # ---------------------------------------------------------------------------
@@ -156,6 +220,8 @@ def run_benchmark(
     repeat: int = 1,
     debug: bool = False,
     no_plot: bool = False,
+    database: str = "clickhouse",
+    api_key: Optional[str] = None,
 ):
     """Run all queries and write results to CSV.
 
@@ -197,17 +263,17 @@ def run_benchmark(
             pattern = _infer_pattern(query_id)
             print(f"Running {query_id}...", end=" " if not debug else "\n", flush=True)
 
-            # Repeat and take median
             trial_latencies = []
-            last_result, last_error = None, None
+            last_result, last_error, last_row_count = None, None, 0
             for _ in range(repeat):
-                lat, result, error = run_query(
-                    sql, endpoint_url, session, timeout, debug
+                lat, result, row_count, error = run_query(
+                    sql, endpoint_url, session, timeout, debug,
+                    database=database, api_key=api_key,
                 )
                 trial_latencies.append(lat)
-                last_result, last_error = result, error
+                last_result, last_error, last_row_count = result, error, row_count
                 if error:
-                    break  # don't retry on error
+                    break
 
             latency_ms = sorted(trial_latencies)[len(trial_latencies) // 2]
 
@@ -218,18 +284,16 @@ def run_benchmark(
                 )
                 plot_latencies.append(0.0)
             else:
-                result_lines = last_result.strip().split("\n") if last_result else []
-                num_rows = len(result_lines)
                 preview = last_result.replace("\n", " | ")[:200] if last_result else ""
                 latencies_ok.append(latency_ms)
                 plot_latencies.append(latency_ms)
-                print(f"{latency_ms:.2f}ms ({num_rows} rows)")
+                print(f"{latency_ms:.2f}ms ({last_row_count} rows)")
                 writer.writerow(
                     [
                         query_id,
                         pattern,
                         f"{latency_ms:.2f}",
-                        num_rows,
+                        last_row_count,
                         preview,
                         "",
                         mode,
@@ -292,13 +356,13 @@ def _load(path):
     )
 
     w = 0.4
-    ax1.bar(x - w / 2, b_vals, w, label="ClickHouse baseline", color="#f4a460")
+    ax1.bar(x - w / 2, b_vals, w, label="Baseline", color="#f4a460")
     ax1.bar(x + w / 2, a_vals, w, label="ASAP (KLL sketch)", color="#4682b4")
     ax1.set_xticks(x)
     ax1.set_xticklabels(qids, rotation=90, fontsize=7)
     ax1.set_ylabel("Latency (ms)")
     ax1.set_title(
-        f"Query latency: ASAP vs ClickHouse baseline  "
+        f"Query latency: ASAP vs baseline  "
         f"(p50: {np.median(a_vals):.1f}ms vs {np.median(b_vals):.1f}ms)"
     )
     ax1.legend()
@@ -331,7 +395,7 @@ def _load(path):
 
 def main():
     parser = argparse.ArgumentParser(
-        description="Benchmark ASAP vs ClickHouse baseline",
+        description="Benchmark ASAP vs ClickHouse/Elasticsearch baseline",
         formatter_class=argparse.RawDescriptionHelpFormatter,
         epilog=__doc__,
     )
@@ -341,6 +405,12 @@ def main():
         default="both",
         help="Which mode(s) to run (default: both)",
     )
+    parser.add_argument(
+        "--database",
+        choices=["clickhouse", "elasticsearch"],
+        required=True,
+        help="Baseline database to benchmark against",
+    )
     parser.add_argument(
         "--asap-sql-file",
         default=None,
@@ -351,16 +421,30 @@ def main():
         default=None,
         help="SQL file for baseline mode (required if mode is baseline or both)",
     )
-    parser.add_argument(
+
+    # ClickHouse flags
+    ch_group = parser.add_argument_group("ClickHouse options (--database clickhouse)")
+    ch_group.add_argument(
         "--asap-url",
-        default=DEFAULT_ASAP_URL,
-        help=f"QueryEngineRust endpoint (default: {DEFAULT_ASAP_URL})",
+        default=None,
+        help=f"ASAP endpoint for ClickHouse mode (default: {DEFAULT_ASAP_CLICKHOUSE_URL})",
     )
-    parser.add_argument(
+    ch_group.add_argument(
         "--clickhouse-url",
         default=DEFAULT_CLICKHOUSE_URL,
         help=f"ClickHouse HTTP URL (default: {DEFAULT_CLICKHOUSE_URL})",
     )
+
+    # Elasticsearch flags
+    es_group = parser.add_argument_group("Elasticsearch options (--database elasticsearch)")
+    es_group.add_argument("--elastic-host", default=DEFAULT_ELASTIC_HOST,
+                          help="Elasticsearch host")
+    es_group.add_argument("--elastic-port", type=int, default=DEFAULT_ELASTIC_PORT,
+                          help="Elasticsearch port")
+    es_group.add_argument("--elastic-api-key", default=None,
+                          help="Elasticsearch API key")
+
+    # Shared flags
     parser.add_argument(
         "--output-dir",
         default=DEFAULT_OUTPUT_DIR,
@@ -406,6 +490,18 @@ def main():
     if args.mode in ("baseline", "both") and not args.baseline_sql_file:
         parser.error("--baseline-sql-file is required when --mode is baseline or both")
 
+    # Resolve endpoints based on --database
+    use_elastic = args.database == "elasticsearch"
+
+    baseline_url = (
+        f"http://{args.elastic_host}:{args.elastic_port}/_sql?format=json"
+        if use_elastic
+        else args.clickhouse_url
+    )
+    asap_url = (
+        args.asap_url or (DEFAULT_ASAP_ELASTIC_URL if use_elastic else DEFAULT_ASAP_CLICKHOUSE_URL)
+    )
+
     output_dir = Path(args.output_dir)
     prefix = args.output_prefix
     query_filter = (
@@ -418,9 +514,11 @@ def main():
     if args.mode in ("baseline", "both"):
         run_benchmark(
             sql_file=Path(args.baseline_sql_file),
-            endpoint_url=args.clickhouse_url,
+            endpoint_url=baseline_url,
             output_csv=baseline_csv,
             mode="baseline",
+            database=args.database,
+            api_key=args.elastic_api_key if use_elastic else None,
             query_filter=query_filter,
             timeout=args.timeout,
             repeat=args.repeat,
@@ -431,9 +529,11 @@ def main():
     if args.mode in ("asap", "both"):
         run_benchmark(
             sql_file=Path(args.asap_sql_file),
-            endpoint_url=args.asap_url,
+            endpoint_url=asap_url,
             output_csv=asap_csv,
             mode="asap",
+            database=args.database,
+            api_key=args.elastic_api_key if use_elastic else None,
             query_filter=query_filter,
             timeout=args.timeout,
             repeat=args.repeat,
@@ -442,8 +542,7 @@ def main():
         )
 
     if args.mode == "both" and not args.no_plot:
-        comparison_png = output_dir / f"{prefix}_comparison.png"
-        _plot_comparison(asap_csv, baseline_csv, comparison_png)
+        _plot_comparison(asap_csv, baseline_csv, output_dir / f"{prefix}_comparison.png")
 
 
 if __name__ == "__main__":

From f7110f815d8d9923acfcbd543343414b911ca0d3 Mon Sep 17 00:00:00 2001
From: Kavya Bhat <kavyabhat@gmail.com>
Date: Sat, 11 Apr 2026 07:43:02 -0600
Subject: [PATCH 04/10] fix formatting

---
 .../benchmark/export_to_database.py           | 68 +++++++++++--------
 .../benchmark/generate_queries.py             |  3 -
 .../benchmark/run_benchmark.py                | 64 +++++++++++------
 3 files changed, 86 insertions(+), 49 deletions(-)

diff --git a/asap-tools/execution-utilities/benchmark/export_to_database.py b/asap-tools/execution-utilities/benchmark/export_to_database.py
index 79511a71..1e6359d1 100644
--- a/asap-tools/execution-utilities/benchmark/export_to_database.py
+++ b/asap-tools/execution-utilities/benchmark/export_to_database.py
@@ -37,6 +37,7 @@
 """
 
 import argparse
+import gzip
 import os
 import sys
 from datetime import datetime, timezone
@@ -195,6 +196,7 @@ def load_h2o_clickhouse(
     print(f"Loaded {total:,} rows into ClickHouse (h2o_groupby)")
     return True
 
+
 def load_h2o_elasticsearch(
     es_host: str,
     es_port: int,
@@ -229,27 +231,30 @@ def load_h2o_elasticsearch(
         es.indices.delete(index=index_name)
 
     print(f"Creating index: {index_name}")
-    es.indices.create(index=index_name, body={
-        "settings": {
-            "number_of_shards": 1,
-            "number_of_replicas": 0,
-            "refresh_interval": "30s",
-        },
-        "mappings": {
-            "properties": {
-                "timestamp": {"type": "date", "format": "epoch_millis"},
-                "id1": {"type": "keyword"},
-                "id2": {"type": "keyword"},
-                "id3": {"type": "keyword"},
-                "id4": {"type": "long"},
-                "id5": {"type": "long"},
-                "id6": {"type": "long"},
-                "v1": {"type": "long"},
-                "v2": {"type": "long"},
-                "v3": {"type": "double"},
-            }
+    es.indices.create(
+        index=index_name,
+        body={
+            "settings": {
+                "number_of_shards": 1,
+                "number_of_replicas": 0,
+                "refresh_interval": "30s",
+            },
+            "mappings": {
+                "properties": {
+                    "timestamp": {"type": "date", "format": "epoch_millis"},
+                    "id1": {"type": "keyword"},
+                    "id2": {"type": "keyword"},
+                    "id3": {"type": "keyword"},
+                    "id4": {"type": "long"},
+                    "id5": {"type": "long"},
+                    "id6": {"type": "long"},
+                    "v1": {"type": "long"},
+                    "v2": {"type": "long"},
+                    "v3": {"type": "double"},
+                }
+            },
         },
-    })
+    )
 
     if not os.path.exists(file_path):
         print(f"ERROR: Data file not found: {file_path}")
@@ -278,9 +283,9 @@ def generate_docs():
                         "id4": int(parts[3] or 0),
                         "id5": int(parts[4] or 0),
                         "id6": int(parts[5] or 0),
-                        "v1":  int(parts[6] or 0),
-                        "v2":  int(parts[7] or 0),
-                        "v3":  float(parts[8] or 0.0),
+                        "v1": int(parts[6] or 0),
+                        "v2": int(parts[7] or 0),
+                        "v3": float(parts[8] or 0.0),
                     },
                 }
 
@@ -302,6 +307,7 @@ def generate_docs():
     print(f"✓ Import complete! Index: {index_name}")
     return True
 
+
 def load_custom(
     clickhouse_url: str,
     file_path: str,
@@ -441,12 +447,20 @@ def main():
     )
 
     # Elasticsearch-specific flags
-    es_group = parser.add_argument_group("Elasticsearch options (--database elasticsearch)")
+    es_group = parser.add_argument_group(
+        "Elasticsearch options (--database elasticsearch)"
+    )
     es_group.add_argument("--es-host", default="localhost", help="Elasticsearch host")
-    es_group.add_argument("--es-port", type=int, default=9200, help="Elasticsearch port")
-    es_group.add_argument("--es-index", default="h2o_benchmark", help="Elasticsearch index name")
+    es_group.add_argument(
+        "--es-port", type=int, default=9200, help="Elasticsearch port"
+    )
+    es_group.add_argument(
+        "--es-index", default="h2o_benchmark", help="Elasticsearch index name"
+    )
     es_group.add_argument("--es-api-key", default=None, help="Elasticsearch API key")
-    es_group.add_argument("--es-bulk-size", type=int, default=5000, help="Bulk insert batch size")
+    es_group.add_argument(
+        "--es-bulk-size", type=int, default=5000, help="Bulk insert batch size"
+    )
 
     args = parser.parse_args()
 
diff --git a/asap-tools/execution-utilities/benchmark/generate_queries.py b/asap-tools/execution-utilities/benchmark/generate_queries.py
index 44590e67..003dc52d 100644
--- a/asap-tools/execution-utilities/benchmark/generate_queries.py
+++ b/asap-tools/execution-utilities/benchmark/generate_queries.py
@@ -242,7 +242,6 @@ def generate_sql_file(
             f"WHERE {where_clause} GROUP BY {group_by_clause};"
         )
 
-        asap_lines.append(asap_sql)
         ch_lines.append(ch_sql)
         es_lines.append(
             f"-- {label}: {desc}\n"
@@ -253,8 +252,6 @@ def generate_sql_file(
     ch_file = f"{output_prefix}_clickhouse.sql"
     es_file = f"{output_prefix}_elasticsearch.sql"
 
-    Path(asap_file).parent.mkdir(parents=True, exist_ok=True)
-
     with open(ch_file, "w") as f:
         f.write("\n".join(ch_lines) + "\n")
 
diff --git a/asap-tools/execution-utilities/benchmark/run_benchmark.py b/asap-tools/execution-utilities/benchmark/run_benchmark.py
index 50ce4e84..696dee0a 100644
--- a/asap-tools/execution-utilities/benchmark/run_benchmark.py
+++ b/asap-tools/execution-utilities/benchmark/run_benchmark.py
@@ -108,7 +108,9 @@ def run_query(
             if api_key:
                 headers["Authorization"] = f"ApiKey {api_key}"
             body = {"query": query.strip().rstrip(";"), "fetch_size": fetch_size}
-            response = session.post(endpoint_url, headers=headers, json=body, timeout=timeout)
+            response = session.post(
+                endpoint_url, headers=headers, json=body, timeout=timeout
+            )
         else:
             encoded_query = urllib.parse.quote(query)
             separator = "&" if "?" in endpoint_url else "?"
@@ -132,7 +134,10 @@ def run_query(
                     if hits:
                         col_names = list(hits[0].get("_source", {}).keys())
                         formatted_rows = [
-                            ", ".join(f"{k}={hit.get('_source', {}).get(k)}" for k in col_names)
+                            ", ".join(
+                                f"{k}={hit.get('_source', {}).get(k)}"
+                                for k in col_names
+                            )
                             for hit in hits
                         ]
                         result_text = "\n".join(formatted_rows)
@@ -144,12 +149,18 @@ def run_query(
                 elif "rows" in data:
                     rows = data.get("rows", [])
                     columns = data.get("columns", [])
-                    col_names = [c.get("name", f"col{i}") for i, c in enumerate(columns)]
+                    col_names = [
+                        c.get("name", f"col{i}") for i, c in enumerate(columns)
+                    ]
                     formatted_rows = [
-                        ", ".join(
-                            f"{col_names[i]}={v}" if i < len(col_names) else str(v)
-                            for i, v in enumerate(row)
-                        ) if isinstance(row, (list, tuple)) else str(row)
+                        (
+                            ", ".join(
+                                f"{col_names[i]}={v}" if i < len(col_names) else str(v)
+                                for i, v in enumerate(row)
+                            )
+                            if isinstance(row, (list, tuple))
+                            else str(row)
+                        )
                         for row in rows
                     ]
                     result_text = "\n".join(formatted_rows)
@@ -267,8 +278,13 @@ def run_benchmark(
             last_result, last_error, last_row_count = None, None, 0
             for _ in range(repeat):
                 lat, result, row_count, error = run_query(
-                    sql, endpoint_url, session, timeout, debug,
-                    database=database, api_key=api_key,
+                    sql,
+                    endpoint_url,
+                    session,
+                    timeout,
+                    debug,
+                    database=database,
+                    api_key=api_key,
                 )
                 trial_latencies.append(lat)
                 last_result, last_error, last_row_count = result, error, row_count
@@ -436,13 +452,21 @@ def main():
     )
 
     # Elasticsearch flags
-    es_group = parser.add_argument_group("Elasticsearch options (--database elasticsearch)")
-    es_group.add_argument("--elastic-host", default=DEFAULT_ELASTIC_HOST,
-                          help="Elasticsearch host")
-    es_group.add_argument("--elastic-port", type=int, default=DEFAULT_ELASTIC_PORT,
-                          help="Elasticsearch port")
-    es_group.add_argument("--elastic-api-key", default=None,
-                          help="Elasticsearch API key")
+    es_group = parser.add_argument_group(
+        "Elasticsearch options (--database elasticsearch)"
+    )
+    es_group.add_argument(
+        "--elastic-host", default=DEFAULT_ELASTIC_HOST, help="Elasticsearch host"
+    )
+    es_group.add_argument(
+        "--elastic-port",
+        type=int,
+        default=DEFAULT_ELASTIC_PORT,
+        help="Elasticsearch port",
+    )
+    es_group.add_argument(
+        "--elastic-api-key", default=None, help="Elasticsearch API key"
+    )
 
     # Shared flags
     parser.add_argument(
@@ -498,8 +522,8 @@ def main():
         if use_elastic
         else args.clickhouse_url
     )
-    asap_url = (
-        args.asap_url or (DEFAULT_ASAP_ELASTIC_URL if use_elastic else DEFAULT_ASAP_CLICKHOUSE_URL)
+    asap_url = args.asap_url or (
+        DEFAULT_ASAP_ELASTIC_URL if use_elastic else DEFAULT_ASAP_CLICKHOUSE_URL
     )
 
     output_dir = Path(args.output_dir)
@@ -542,7 +566,9 @@ def main():
         )
 
     if args.mode == "both" and not args.no_plot:
-        _plot_comparison(asap_csv, baseline_csv, output_dir / f"{prefix}_comparison.png")
+        _plot_comparison(
+            asap_csv, baseline_csv, output_dir / f"{prefix}_comparison.png"
+        )
 
 
 if __name__ == "__main__":

From 9f32c3a6a56e3e014bc5f537dd67a58c6b68e370 Mon Sep 17 00:00:00 2001
From: Kavya Bhat <kavyabhat@gmail.com>
Date: Sat, 11 Apr 2026 07:49:19 -0600
Subject: [PATCH 05/10] rust format fix

---
 asap-query-engine/src/main.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/asap-query-engine/src/main.rs b/asap-query-engine/src/main.rs
index 842fe284..fa589aa0 100644
--- a/asap-query-engine/src/main.rs
+++ b/asap-query-engine/src/main.rs
@@ -541,4 +541,4 @@ fn setup_logging(
     info!("Logging initialized (respects RUST_LOG environment variable)");
     info!("Logs will be written to: {}/query_engine.log", output_dir);
     Ok(guard)
-}
\ No newline at end of file
+}

From 80909ddb3347ade3b445f8ad396c954a92b59ca3 Mon Sep 17 00:00:00 2001
From: benjamib112 <benjamin@BenPC>
Date: Wed, 15 Apr 2026 05:33:34 -0400
Subject: [PATCH 06/10] added automatic timestamp detection, updated query
 generation script to generate both query files in one run, and added
 automatic streaming/inference config generation

---
 .../execution-utilities/benchmark/README.md   | 193 +++++------
 .../benchmark/generate_queries.py             | 314 +++++++++++++-----
 2 files changed, 329 insertions(+), 178 deletions(-)

diff --git a/asap-tools/execution-utilities/benchmark/README.md b/asap-tools/execution-utilities/benchmark/README.md
index c45b171c..a63beded 100644
--- a/asap-tools/execution-utilities/benchmark/README.md
+++ b/asap-tools/execution-utilities/benchmark/README.md
@@ -19,6 +19,10 @@ data_file → export_to_database.py  run_benchmark.py → results/
           ClickHouse :8123 (baseline)
 ```
 
+**Key difference from the old pipeline:** Arroyo reads directly from a local
+file (`single_file_custom` connector) rather than from a Kafka input topic.
+Kafka is still required for the **sketch output** topic (`sketch_topic`).
+
 ---
 
 ## Prerequisites
@@ -27,8 +31,8 @@ data_file → export_to_database.py  run_benchmark.py → results/
 export INSTALL_DIR=/scratch/sketch_db_for_prometheus
 pip3 install --user -r requirements.txt
 
-# Build binaries (one-time) — workspace target is at ~/ASAPQuery/target/release/
-cd ~/ASAPQuery && cargo build --release
+# Build binaries (one-time)
+cd ~/ASAPQuery/asap-query-engine && cargo build --release
 ```
 
 ---
@@ -56,7 +60,6 @@ The Arroyo file source requires RFC3339 timestamps and string metadata columns.
 This step converts the raw ClickBench JSON:
 
 ```bash
-cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
 python prepare_data.py \
     --dataset clickbench \
     --input ./data/hits.json.gz \
@@ -71,19 +74,17 @@ This produces `hits_arroyo.json` with:
 
 ### Step 3 — Start infrastructure
 
-Skip any service that is already running.
-
 ```bash
-# Kafka — skip if `kafka-topics.sh --list` succeeds
+# Kafka
 ~/ASAPQuery/asap-tools/installation/kafka/run.sh $INSTALL_DIR/kafka
 
-# Create sketch output topic — skip if sketch_topic already exists
+# Create sketch output topic
 KAFKA=$INSTALL_DIR/kafka/bin
 $KAFKA/kafka-topics.sh --bootstrap-server localhost:9092 --create \
     --topic sketch_topic --partitions 1 --replication-factor 1 \
     --config max.message.bytes=20971520
 
-# ClickHouse — skip if port 8123 is already listening
+# ClickHouse
 ~/ASAPQuery/asap-tools/installation/clickhouse/run.sh $INSTALL_DIR
 ```
 
@@ -95,12 +96,36 @@ $KAFKA/kafka-topics.sh --bootstrap-server localhost:9092 --create \
     > /tmp/arroyo.log 2>&1 &
 ```
 
-### Step 5 — Launch Arroyo sketch pipeline (file source)
+### Step 5 — Generate queries and configs
+
+```bash
+python generate_queries.py \
+    --table-name hits \
+    --ts-column EventTime \
+    --value-column ResolutionWidth \
+    --group-by-columns RegionID,OS,UserAgent,TraficSourceID \
+    --window-size 10 \
+    --num-queries 50 \
+    --window-form dateadd \
+    --generate-configs \
+    --auto-detect-timestamps \
+    --data-file ./data/hits_arroyo.json \
+    --data-file-format json \
+    --output-prefix ./queries/clickbench
+```
+
+This writes:
+- `queries/clickbench_asap.sql` — ASAP queries (ISO timestamps)
+- `queries/clickbench_clickhouse.sql` — ClickHouse queries (datetime timestamps)
+- `queries/clickbench_streaming.yaml` — Arroyo streaming config
+- `queries/clickbench_inference.yaml` — QueryEngineRust inference config
+
+### Step 6 — Launch Arroyo sketch pipeline (file source)
 
 ```bash
-cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
 python export_to_arroyo.py \
-    --streaming-config ./configs/clickbench_streaming.yaml \
+    --streaming-config ./queries/clickbench_streaming.yaml \
+    --source-type file \
     --input-file ./data/hits_arroyo.json \
     --file-format json \
     --ts-format rfc3339 \
@@ -109,21 +134,21 @@ python export_to_arroyo.py \
     --output-dir ./arroyo_outputs
 ```
 
-### Step 6 — Start QueryEngineRust
+### Step 7 — Start QueryEngineRust
 
 ```bash
-cd ~/ASAPQuery
+cd ~/ASAPQuery/asap-query-engine
 nohup ./target/release/query_engine_rust \
     --kafka-topic sketch_topic --input-format json \
-    --config ~/ASAPQuery/asap-tools/execution-utilities/benchmark/configs/clickbench_inference.yaml \
-    --streaming-config ~/ASAPQuery/asap-tools/execution-utilities/benchmark/configs/clickbench_streaming.yaml \
+    --config ~/ASAPQuery/asap-tools/execution-utilities/benchmark/queries/clickbench_inference.yaml \
+    --streaming-config ~/ASAPQuery/asap-tools/execution-utilities/benchmark/queries/clickbench_streaming.yaml \
     --http-port 8088 --delete-existing-db --log-level DEBUG \
-    --output-dir ./asap-query-engine/output --streaming-engine arroyo \
+    --output-dir ./output --streaming-engine arroyo \
     --query-language SQL --lock-strategy per-key \
     --prometheus-scrape-interval 1 > /tmp/query_engine.log 2>&1 &
 ```
 
-### Step 7 — Load data into ClickHouse (baseline)
+### Step 8 — Load data into ClickHouse (baseline)
 
 ```bash
 cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
@@ -136,35 +161,14 @@ python export_to_database.py \
 
 Verify: `$INSTALL_DIR/clickhouse client --query "SELECT count(*) FROM hits"`
 
-### Step 8 — Generate SQL query files
-
-```bash
-cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
-python generate_queries.py \
-    --table-name hits \
-    --ts-column EventTime \
-    --value-column ResolutionWidth \
-    --group-by-columns RegionID,OS,UserAgent,TraficSourceID \
-    --window-size 10 \
-    --num-queries 50 \
-    --ts-format datetime \
-    --window-form dateadd \
-    --auto-detect-timestamps \
-    --data-file ./data/hits_arroyo.json \
-    --data-file-format json \
-    --output-prefix ./queries/clickbench
-```
-
-This writes `queries/clickbench.sql`.
-
 ### Step 9 — Run benchmark
 
 ```bash
-cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
 python run_benchmark.py \
     --mode both \
-    --asap-sql-file ./queries/clickbench.sql \
-    --baseline-sql-file ./queries/clickbench.sql \
+    --asap-sql-file ./queries/clickbench_asap.sql \
+    --baseline-sql-file ./queries/clickbench_clickhouse.sql \
+    --asap-url "http://localhost:8088/api/v1/query" \
     --output-dir ./results \
     --output-prefix clickbench
 ```
@@ -179,14 +183,12 @@ Results: `results/clickbench_asap.csv`, `results/clickbench_baseline.csv`,
 ### Step 1 — Download dataset
 
 ```bash
-cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
 python download_dataset.py --dataset h2o --output-dir ./data
 ```
 
 ### Step 2 — Prepare data for Arroyo file source
 
 ```bash
-cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
 python prepare_data.py \
     --dataset h2o \
     --input ./data/G1_1e7_1e2_0_0.csv \
@@ -196,12 +198,29 @@ python prepare_data.py \
 
 ### Steps 3–4 — Start infrastructure and Arroyo (same as ClickBench)
 
-### Step 5 — Launch Arroyo sketch pipeline
+### Step 5 — Generate queries and configs
+
+```bash
+python generate_queries.py \
+    --table-name h2o_groupby \
+    --ts-column timestamp \
+    --value-column v1 \
+    --group-by-columns id1,id2 \
+    --window-size 10 \
+    --num-queries 50 \
+    --generate-configs \
+    --auto-detect-timestamps \
+    --data-file ./data/h2o_arroyo.json \
+    --data-file-format json \
+    --output-prefix ./queries/h2o
+```
+
+### Step 6 — Launch Arroyo sketch pipeline
 
 ```bash
-cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
 python export_to_arroyo.py \
-    --streaming-config ./configs/h2o_streaming.yaml \
+    --streaming-config ./queries/h2o_streaming.yaml \
+    --source-type file \
     --input-file ./data/h2o_arroyo.json \
     --file-format json \
     --ts-format rfc3339 \
@@ -210,24 +229,23 @@ python export_to_arroyo.py \
     --output-dir ./arroyo_outputs
 ```
 
-### Step 6 — Start QueryEngineRust
+### Step 7 — Start QueryEngineRust
 
 ```bash
-cd ~/ASAPQuery
+cd ~/ASAPQuery/asap-query-engine
 nohup ./target/release/query_engine_rust \
     --kafka-topic sketch_topic --input-format json \
-    --config ~/ASAPQuery/asap-tools/execution-utilities/benchmark/configs/h2o_inference.yaml \
-    --streaming-config ~/ASAPQuery/asap-tools/execution-utilities/benchmark/configs/h2o_streaming.yaml \
+    --config ~/ASAPQuery/asap-tools/execution-utilities/benchmark/queries/h2o_inference.yaml \
+    --streaming-config ~/ASAPQuery/asap-tools/execution-utilities/benchmark/queries/h2o_streaming.yaml \
     --http-port 8088 --delete-existing-db --log-level DEBUG \
-    --output-dir ./asap-query-engine/output --streaming-engine arroyo \
+    --output-dir ./output --streaming-engine arroyo \
     --query-language SQL --lock-strategy per-key \
     --prometheus-scrape-interval 1 > /tmp/query_engine.log 2>&1 &
 ```
 
-### Step 7 — Load data into ClickHouse (baseline)
+### Step 8 — Load data into ClickHouse (baseline)
 
 ```bash
-cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
 python export_to_database.py \
     --dataset h2o \
     --file-path ./data/G1_1e7_1e2_0_0.csv \
@@ -235,32 +253,14 @@ python export_to_database.py \
     --max-rows 1000000
 ```
 
-### Step 8 — Generate SQL query files
-
-```bash
-cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
-python generate_queries.py \
-    --table-name h2o_groupby \
-    --ts-column timestamp \
-    --value-column v1 \
-    --group-by-columns id1,id2 \
-    --window-size 10 \
-    --num-queries 50 \
-    --ts-format iso \
-    --auto-detect-timestamps \
-    --data-file ./data/h2o_arroyo.json \
-    --data-file-format json \
-    --output-prefix ./queries/h2o
-```
-
 ### Step 9 — Run benchmark
 
 ```bash
-cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
 python run_benchmark.py \
     --mode both \
-    --asap-sql-file ./queries/h2o.sql \
-    --baseline-sql-file ./queries/h2o.sql \
+    --asap-sql-file ./queries/h2o_asap.sql \
+    --baseline-sql-file ./queries/h2o_clickhouse.sql \
+    --asap-url "http://localhost:8088/api/v1/query" \
     --output-dir ./results \
     --output-prefix h2o
 ```
@@ -270,8 +270,6 @@ python run_benchmark.py \
 ## Custom Dataset
 
 ```bash
-cd ~/ASAPQuery/asap-tools/execution-utilities/benchmark
-
 # 1. Download (any HTTP URL)
 python download_dataset.py --dataset custom \
     --custom-url https://example.com/mydata.json.gz \
@@ -279,39 +277,42 @@ python download_dataset.py --dataset custom \
 
 # 2. Prepare (edit prepare_data.py for your schema, or skip if already RFC3339)
 
-# 3. Export to Arroyo
+# 3. Generate queries and configs
+python generate_queries.py \
+    --table-name my_table \
+    --ts-column event_time \
+    --value-column metric_value \
+    --group-by-columns region,host \
+    --window-size 10 \
+    --num-queries 50 \
+    --generate-configs \
+    --auto-detect-timestamps \
+    --data-file ./data/mydata.json \
+    --output-prefix ./queries/my_dataset
+
+# 4. Export to Arroyo
 python export_to_arroyo.py \
-    --streaming-config ./configs/my_streaming.yaml \
+    --streaming-config ./queries/my_dataset_streaming.yaml \
+    --source-type file \
     --input-file ./data/mydata.json \
     --file-format json \
     --ts-format rfc3339 \
     --pipeline-name my_pipeline \
     --arroyosketch-dir ~/ASAPQuery/asap-summary-ingest
 
-# 4. Export to ClickHouse
+# 5. Export to ClickHouse
 python export_to_database.py \
     --dataset custom \
     --file-path ./data/mydata.json \
     --init-sql-file ./configs/my_init.sql \
     --table-name my_table
 
-# 5. Generate queries
-python generate_queries.py \
-    --table-name my_table \
-    --ts-column event_time \
-    --value-column metric_value \
-    --group-by-columns region,host \
-    --window-size 10 \
-    --num-queries 50 \
-    --auto-detect-timestamps \
-    --data-file ./data/mydata.json \
-    --output-prefix ./queries/my_dataset
-
 # 6. Run benchmark
 python run_benchmark.py \
     --mode both \
-    --asap-sql-file ./queries/my_dataset.sql \
-    --baseline-sql-file ./queries/my_dataset.sql \
+    --asap-sql-file ./queries/my_dataset_asap.sql \
+    --baseline-sql-file ./queries/my_dataset_clickhouse.sql \
+    --asap-url "http://localhost:8088/api/v1/query" \
     --output-dir ./results
 ```
 
@@ -344,8 +345,8 @@ $INSTALL_DIR/clickhouse client --query "TRUNCATE TABLE hits"
 |------|---------|
 | `download_dataset.py` | Download ClickBench, H2O, or custom datasets |
 | `prepare_data.py` | Convert raw data to Arroyo file source format (RFC3339, string columns) |
-| `export_to_arroyo.py` | Launch Arroyo sketch pipeline from a local file source |
+| `export_to_arroyo.py` | Launch Arroyo sketch pipeline (file or kafka source) |
 | `export_to_database.py` | Load data into ClickHouse for baseline |
-| `generate_queries.py` | Generate a single SQL query file (database-style, compatible with both ASAP and ClickHouse) |
+| `generate_queries.py` | Generate paired ASAP + ClickHouse SQL query files and streaming/inference YAML configs |
 | `run_benchmark.py` | Run queries and produce CSV results + plots |
-| `configs/` | Dataset-specific streaming/inference YAML and ClickHouse init SQL |
+| `configs/` | ClickHouse init SQL (CREATE TABLE statements) |
diff --git a/asap-tools/execution-utilities/benchmark/generate_queries.py b/asap-tools/execution-utilities/benchmark/generate_queries.py
index 003dc52d..0754a843 100644
--- a/asap-tools/execution-utilities/benchmark/generate_queries.py
+++ b/asap-tools/execution-utilities/benchmark/generate_queries.py
@@ -1,17 +1,35 @@
 #!/usr/bin/env python3
 """
-Generate a SQL query file for benchmarking ASAP and ClickHouse.
+Generate paired ASAP and ClickHouse SQL query files for benchmarking,
+and optionally generate streaming/inference YAML configs.
 
-Each query uses database-style quantile(q)(col) syntax, compatible with both
-QueryEngineRust and ClickHouse. Queries target fixed time windows and match the
+Each query targets a fixed time window (window-end timestamp) and matches the
 annotation format `-- T{NNN}: description` expected by run_benchmark.py.
 
-Output:
-  {prefix}.sql             quantile(q)(col) database-style syntax, compatible with both
-                           QueryEngineRust and ClickHouse baseline
+Output (always):
+  {prefix}_asap.sql            QUANTILE(q, col) syntax for QueryEngineRust
+  {prefix}_clickhouse.sql      quantile(q)(col) syntax for ClickHouse baseline
+
+Output (with --generate-configs):
+  {prefix}_streaming.yaml      Arroyo streaming config
+  {prefix}_inference.yaml      QueryEngineRust inference config
 
 Usage:
-    # Auto-detect timestamps from data file → ./queries/clickbench.sql
+    # Generate queries + configs in one shot
+    python generate_queries.py \\
+        --table-name h2o_groupby \\
+        --ts-column timestamp \\
+        --value-column v1 \\
+        --group-by-columns id1,id2 \\
+        --window-size 30 \\
+        --num-queries 50 \\
+        --generate-configs \\
+        --auto-detect-timestamps \\
+        --data-file ./data/h2o_arroyo_full.json \\
+        --data-file-format json \\
+        --output-prefix ./queries/h2o_30s
+
+    # Queries only (no configs)
     python generate_queries.py \\
         --table-name hits \\
         --ts-column EventTime \\
@@ -24,7 +42,7 @@
         --data-file-format json.gz \\
         --output-prefix ./queries/clickbench
 
-    # Explicit timestamp file (one ISO timestamp per line) → ./queries/h2o.sql
+    # Override timestamp format for both outputs
     python generate_queries.py \\
         --table-name h2o_groupby \\
         --ts-column timestamp \\
@@ -32,6 +50,7 @@
         --group-by-columns id1,id2 \\
         --window-size 10 \\
         --num-queries 50 \\
+        --ts-format iso \\
         --timestamps-file ./my_timestamps.txt \\
         --output-prefix ./queries/h2o
 """
@@ -45,8 +64,6 @@
 from typing import List, Optional
 
 
-SAMPLE_SIZE = 10_000  # rows to read for timestamp auto-detection
-
 
 def _parse_timestamp(value: str) -> Optional[datetime]:
     """Try to parse a timestamp string in common formats."""
@@ -73,17 +90,16 @@ def _parse_timestamp(value: str) -> Optional[datetime]:
     return None
 
 
-def _read_timestamps_from_json(
+def _scan_ts_range_json(
     file_path: str, ts_column: str, compressed: bool
-) -> List[datetime]:
-    """Read up to SAMPLE_SIZE timestamps from a JSON-lines file."""
-    timestamps = []
+) -> tuple:
+    """Scan a JSON-lines file and return (min_ts, max_ts, count)."""
+    min_ts = max_ts = None
+    count = 0
     opener = gzip.open if compressed else open
     mode = "rt" if compressed else "r"
     with opener(file_path, mode) as f:
-        for i, line in enumerate(f):
-            if i >= SAMPLE_SIZE:
-                break
+        for line in f:
             line = line.strip()
             if not line:
                 continue
@@ -93,17 +109,23 @@ def _read_timestamps_from_json(
                 if val is not None:
                     ts = _parse_timestamp(val)
                     if ts:
-                        timestamps.append(ts)
+                        count += 1
+                        if min_ts is None or ts < min_ts:
+                            min_ts = ts
+                        if max_ts is None or ts > max_ts:
+                            max_ts = ts
             except (json.JSONDecodeError, KeyError):
                 continue
-    return timestamps
+    return min_ts, max_ts, count
 
 
-def _read_timestamps_from_csv(file_path: str, ts_column: str) -> List[datetime]:
-    """Read up to SAMPLE_SIZE timestamps from a CSV file."""
+def _scan_ts_range_csv(
+    file_path: str, ts_column: str
+) -> tuple:
+    """Scan a CSV file and return (min_ts, max_ts, count)."""
     import csv
-
-    timestamps = []
+    min_ts = max_ts = None
+    count = 0
     with open(file_path, "r", newline="") as f:
         reader = csv.DictReader(f)
         if ts_column not in (reader.fieldnames or []):
@@ -111,37 +133,40 @@ def _read_timestamps_from_csv(file_path: str, ts_column: str) -> List[datetime]:
                 f"WARNING: Column '{ts_column}' not found in CSV. "
                 f"Available: {reader.fieldnames}"
             )
-            return []
-        for i, row in enumerate(reader):
-            if i >= SAMPLE_SIZE:
-                break
+            return None, None, 0
+        for row in reader:
             ts = _parse_timestamp(row[ts_column])
             if ts:
-                timestamps.append(ts)
-    return timestamps
-
-
-def detect_timestamps(data_file: str, data_file_format: str, ts_column: str) -> tuple:
-    """Return (min_ts, max_ts) from a sample of the data file."""
+                count += 1
+                if min_ts is None or ts < min_ts:
+                    min_ts = ts
+                if max_ts is None or ts > max_ts:
+                    max_ts = ts
+    return min_ts, max_ts, count
+
+
+def detect_timestamps(
+    data_file: str, data_file_format: str, ts_column: str
+) -> tuple:
+    """Return (min_ts, max_ts) by scanning the entire data file."""
     fmt = data_file_format.lower()
     if fmt in ("json.gz", "jsonl.gz"):
-        timestamps = _read_timestamps_from_json(data_file, ts_column, compressed=True)
+        min_ts, max_ts, count = _scan_ts_range_json(data_file, ts_column, compressed=True)
     elif fmt in ("json", "jsonl"):
-        timestamps = _read_timestamps_from_json(data_file, ts_column, compressed=False)
+        min_ts, max_ts, count = _scan_ts_range_json(data_file, ts_column, compressed=False)
     elif fmt == "csv":
-        timestamps = _read_timestamps_from_csv(data_file, ts_column)
+        min_ts, max_ts, count = _scan_ts_range_csv(data_file, ts_column)
     else:
         print(f"ERROR: Unsupported data file format: {data_file_format}")
         sys.exit(1)
 
-    if not timestamps:
+    if min_ts is None:
         print(
-            f"ERROR: No '{ts_column}' timestamps found in the first {SAMPLE_SIZE} "
-            f"rows of {data_file}"
+            f"ERROR: No '{ts_column}' timestamps found in {data_file}"
         )
         sys.exit(1)
 
-    return min(timestamps), max(timestamps)
+    return min_ts, max_ts
 
 
 def _snap_to_window_boundary(ts: datetime, window_size: int) -> datetime:
@@ -197,7 +222,7 @@ def format_ts(ts: datetime, ts_format: str) -> str:
         return ts.strftime("%Y-%m-%d %H:%M:%S")
 
 
-def generate_sql_file(
+def generate_sql_files(
     table_name: str,
     ts_column: str,
     value_column: str,
@@ -205,62 +230,144 @@ def generate_sql_file(
     quantile: float,
     window_size: int,
     window_ends: List[datetime],
-    ts_format: str,
+    ts_format_asap: str,
+    ts_format_db: str,
     window_form: str,
     output_prefix: str,
 ):
-    """Write a single SQL file compatible with both ASAP and ClickHouse."""
+    """Write the paired ASAP and ClickHouse SQL files."""
     group_by_clause = ", ".join(group_by_columns)
-    percentile = quantile * 100
-    # Strip trailing zero: 95.0 -> 95, 99.5 -> 99.5
-    percentile_str = f"{percentile:.1f}".rstrip("0").rstrip(".")
-
+    asap_lines = []
     ch_lines = []
-    es_lines = []
 
     for i, end_ts in enumerate(window_ends):
-        end_str = format_ts(end_ts, ts_format)
-        start_ts = end_ts - timedelta(seconds=window_size)
-        start_str = format_ts(start_ts, ts_format)
+        asap_end = format_ts(end_ts, ts_format_asap)
+        asap_start = format_ts(end_ts - timedelta(seconds=window_size), ts_format_asap)
+        db_end = format_ts(end_ts, ts_format_db)
+        db_start = format_ts(end_ts - timedelta(seconds=window_size), ts_format_db)
         label = f"T{i:03d}"
-        desc = f"quantile window ending at {end_str}"
+        desc_asap = f"quantile window ending at {asap_end}"
+        desc_db = f"quantile window ending at {db_end}"
 
         if window_form == "dateadd":
-            where_clause = f"{ts_column} BETWEEN DATEADD(s, -{window_size}, '{end_str}') AND '{end_str}'"
+            asap_where = (
+                f"{ts_column} BETWEEN DATEADD(s, -{window_size}, '{asap_end}') AND '{asap_end}'"
+            )
+            db_where = (
+                f"{ts_column} BETWEEN DATEADD(s, -{window_size}, '{db_end}') AND '{db_end}'"
+            )
         else:
-            where_clause = f"{ts_column} BETWEEN '{start_str}' AND '{end_str}'"
+            asap_where = (
+                f"{ts_column} BETWEEN '{asap_start}' AND '{asap_end}'"
+            )
+            db_where = (
+                f"{ts_column} BETWEEN '{db_start}' AND '{db_end}'"
+            )
 
-        # Elasticsearch uses DATEADD + CAST form
-        es_where = (
-            f"{ts_column} BETWEEN DATEADD('s', -{window_size}, CAST('{end_str}' AS DATETIME)) "
-            f"AND CAST('{end_str}' AS DATETIME)"
+        asap_sql = (
+            f"-- {label}: {desc_asap}\n"
+            f"SELECT QUANTILE({quantile}, {value_column}) FROM {table_name} "
+            f"WHERE {asap_where} GROUP BY {group_by_clause};"
         )
-
         ch_sql = (
-            f"-- {label}: {desc}\n"
+            f"-- {label}: {desc_db}\n"
             f"SELECT quantile({quantile})({value_column}) FROM {table_name} "
-            f"WHERE {where_clause} GROUP BY {group_by_clause};"
+            f"WHERE {db_where} GROUP BY {group_by_clause};"
         )
 
+        asap_lines.append(asap_sql)
         ch_lines.append(ch_sql)
-        es_lines.append(
-            f"-- {label}: {desc}\n"
-            f"SELECT PERCENTILE({value_column}, {percentile_str}) FROM {table_name} "
-            f"WHERE {es_where} GROUP BY {group_by_clause};"
-        )
 
+    asap_file = f"{output_prefix}_asap.sql"
     ch_file = f"{output_prefix}_clickhouse.sql"
-    es_file = f"{output_prefix}_elasticsearch.sql"
+
+    Path(asap_file).parent.mkdir(parents=True, exist_ok=True)
+
+    with open(asap_file, "w") as f:
+        f.write("\n".join(asap_lines) + "\n")
 
     with open(ch_file, "w") as f:
         f.write("\n".join(ch_lines) + "\n")
 
-    with open(es_file, "w") as f:
-        f.write("\n".join(es_lines) + "\n")
-
     print(f"Generated {len(window_ends)} queries:")
-    print(f"  ClickHouse:    {ch_file}")
-    print(f"  Elasticsearch: {es_file}")
+    print(f"  ASAP:       {asap_file}")
+    print(f"  ClickHouse: {ch_file}")
+
+
+def generate_config_files(
+    table_name: str,
+    ts_column: str,
+    value_column: str,
+    group_by_columns: List[str],
+    quantile: float,
+    window_size: int,
+    aggregation_id: int,
+    aggregation_k: int,
+    output_prefix: str,
+):
+    """Write paired streaming and inference YAML config files."""
+    meta_yaml = "[" + ", ".join(group_by_columns) + "]"
+    group_by_clause = ", ".join(group_by_columns)
+
+    streaming_content = f"""\
+tables:
+  - name: {table_name}
+    time_column: {ts_column}
+    metadata_columns: {meta_yaml}
+    value_columns: [{value_column}]
+
+aggregations:
+  - aggregationId: {aggregation_id}
+    aggregationType: DatasketchesKLL
+    aggregationSubType: ''
+    labels:
+      grouping: {meta_yaml}
+      rollup: []
+      aggregated: []
+    table_name: {table_name}
+    value_column: {value_column}
+    parameters:
+      K: {aggregation_k}
+    tumblingWindowSize: {window_size}
+    windowSize: {window_size}
+    windowType: tumbling
+    spatialFilter: ''
+"""
+
+    inference_content = f"""\
+tables:
+  - name: {table_name}
+    time_column: {ts_column}
+    metadata_columns: {meta_yaml}
+    value_columns: [{value_column}]
+
+cleanup_policy:
+  name: read_based
+
+queries:
+  - aggregations:
+    - aggregation_id: {aggregation_id}
+      read_count_threshold: 999999
+    query: |-
+      SELECT QUANTILE({quantile}, {value_column}) FROM {table_name}
+      WHERE {ts_column} BETWEEN DATEADD(s, -{window_size}, NOW()) AND NOW()
+      GROUP BY {group_by_clause};
+"""
+
+    streaming_file = f"{output_prefix}_streaming.yaml"
+    inference_file = f"{output_prefix}_inference.yaml"
+
+    Path(streaming_file).parent.mkdir(parents=True, exist_ok=True)
+
+    with open(streaming_file, "w") as f:
+        f.write(streaming_content)
+
+    with open(inference_file, "w") as f:
+        f.write(inference_content)
+
+    print(f"Generated configs:")
+    print(f"  Streaming: {streaming_file}")
+    print(f"  Inference: {inference_file}")
 
 
 def main():
@@ -272,9 +379,7 @@ def main():
     # Table/column config
     parser.add_argument("--table-name", required=True)
     parser.add_argument("--ts-column", required=True, help="Timestamp column name")
-    parser.add_argument(
-        "--value-column", required=True, help="Column to compute quantile on"
-    )
+    parser.add_argument("--value-column", required=True, help="Column to compute quantile on")
     parser.add_argument(
         "--group-by-columns",
         required=True,
@@ -282,15 +387,25 @@ def main():
     )
     # Query parameters
     parser.add_argument("--quantile", type=float, default=0.95)
+    parser.add_argument("--window-size", type=int, default=10, help="Window size in seconds")
+    parser.add_argument("--num-queries", type=int, default=50)
     parser.add_argument(
-        "--window-size", type=int, default=10, help="Window size in seconds"
+        "--ts-format-asap",
+        choices=["iso", "datetime"],
+        default="iso",
+        help="Timestamp format for ASAP SQL: iso='YYYY-MM-DDTHH:MM:SSZ', datetime='YYYY-MM-DD HH:MM:SS' (default: iso)",
+    )
+    parser.add_argument(
+        "--ts-format-db",
+        choices=["iso", "datetime"],
+        default="datetime",
+        help="Timestamp format for ClickHouse SQL: iso='YYYY-MM-DDTHH:MM:SSZ', datetime='YYYY-MM-DD HH:MM:SS' (default: datetime)",
     )
-    parser.add_argument("--num-queries", type=int, default=50)
     parser.add_argument(
         "--ts-format",
         choices=["iso", "datetime"],
-        default="iso",
-        help="Timestamp format in SQL: iso='YYYY-MM-DDTHH:MM:SSZ', datetime='YYYY-MM-DD HH:MM:SS' (default: iso)",
+        default=None,
+        help="Set both --ts-format-asap and --ts-format-db to the same value (overrides individual flags)",
     )
     parser.add_argument(
         "--window-form",
@@ -301,7 +416,7 @@ def main():
     parser.add_argument(
         "--output-prefix",
         required=True,
-        help="Output file prefix (e.g. ./queries/clickbench → clickbench.sql)",
+        help="Output file prefix (e.g. ./queries/clickbench → clickbench_asap.sql + clickbench_clickhouse.sql)",
     )
     # Timestamp sources (mutually exclusive)
     ts_group = parser.add_mutually_exclusive_group(required=True)
@@ -333,6 +448,24 @@ def main():
         default=None,
         help="Spacing between window-end timestamps (default: window-size * 3)",
     )
+    # Config generation
+    parser.add_argument(
+        "--generate-configs",
+        action="store_true",
+        help="Also generate streaming and inference YAML config files",
+    )
+    parser.add_argument(
+        "--aggregation-id",
+        type=int,
+        default=12,
+        help="Aggregation ID for config files (default: 12)",
+    )
+    parser.add_argument(
+        "--aggregation-k",
+        type=int,
+        default=200,
+        help="KLL sketch K parameter (default: 200)",
+    )
 
     args = parser.parse_args()
 
@@ -377,7 +510,10 @@ def main():
             f"(stride={stride}s, window={args.window_size}s)"
         )
 
-    generate_sql_file(
+    ts_format_asap = args.ts_format if args.ts_format else args.ts_format_asap
+    ts_format_db = args.ts_format if args.ts_format else args.ts_format_db
+
+    generate_sql_files(
         table_name=args.table_name,
         ts_column=args.ts_column,
         value_column=args.value_column,
@@ -385,11 +521,25 @@ def main():
         quantile=args.quantile,
         window_size=args.window_size,
         window_ends=window_ends,
-        ts_format=args.ts_format,
+        ts_format_asap=ts_format_asap,
+        ts_format_db=ts_format_db,
         window_form=args.window_form,
         output_prefix=args.output_prefix,
     )
 
+    if args.generate_configs:
+        generate_config_files(
+            table_name=args.table_name,
+            ts_column=args.ts_column,
+            value_column=args.value_column,
+            group_by_columns=group_by_columns,
+            quantile=args.quantile,
+            window_size=args.window_size,
+            aggregation_id=args.aggregation_id,
+            aggregation_k=args.aggregation_k,
+            output_prefix=args.output_prefix,
+        )
+
 
 if __name__ == "__main__":
     main()

From 7e1983b2282a52eadbb558b938988cf9e48c5fac Mon Sep 17 00:00:00 2001
From: benjamib112 <benjamin@BenPC>
Date: Sat, 18 Apr 2026 21:06:36 -0400
Subject: [PATCH 07/10] formatting

---
 .../benchmark/download_dataset.py             |  1 -
 .../benchmark/generate_queries.py             | 50 ++++++++-----------
 2 files changed, 21 insertions(+), 30 deletions(-)

diff --git a/asap-tools/execution-utilities/benchmark/download_dataset.py b/asap-tools/execution-utilities/benchmark/download_dataset.py
index 750b5502..26ee54d5 100644
--- a/asap-tools/execution-utilities/benchmark/download_dataset.py
+++ b/asap-tools/execution-utilities/benchmark/download_dataset.py
@@ -16,7 +16,6 @@
 import sys
 import urllib.request
 
-
 CLICKBENCH_URL = "https://datasets.clickhouse.com/hits_compatible/hits.json.gz"
 CLICKBENCH_FILENAME = "hits.json.gz"
 
diff --git a/asap-tools/execution-utilities/benchmark/generate_queries.py b/asap-tools/execution-utilities/benchmark/generate_queries.py
index 0754a843..462c1bd7 100644
--- a/asap-tools/execution-utilities/benchmark/generate_queries.py
+++ b/asap-tools/execution-utilities/benchmark/generate_queries.py
@@ -64,7 +64,6 @@
 from typing import List, Optional
 
 
-
 def _parse_timestamp(value: str) -> Optional[datetime]:
     """Try to parse a timestamp string in common formats."""
     value = str(value).strip()
@@ -90,9 +89,7 @@ def _parse_timestamp(value: str) -> Optional[datetime]:
     return None
 
 
-def _scan_ts_range_json(
-    file_path: str, ts_column: str, compressed: bool
-) -> tuple:
+def _scan_ts_range_json(file_path: str, ts_column: str, compressed: bool) -> tuple:
     """Scan a JSON-lines file and return (min_ts, max_ts, count)."""
     min_ts = max_ts = None
     count = 0
@@ -119,11 +116,10 @@ def _scan_ts_range_json(
     return min_ts, max_ts, count
 
 
-def _scan_ts_range_csv(
-    file_path: str, ts_column: str
-) -> tuple:
+def _scan_ts_range_csv(file_path: str, ts_column: str) -> tuple:
     """Scan a CSV file and return (min_ts, max_ts, count)."""
     import csv
+
     min_ts = max_ts = None
     count = 0
     with open(file_path, "r", newline="") as f:
@@ -145,15 +141,17 @@ def _scan_ts_range_csv(
     return min_ts, max_ts, count
 
 
-def detect_timestamps(
-    data_file: str, data_file_format: str, ts_column: str
-) -> tuple:
+def detect_timestamps(data_file: str, data_file_format: str, ts_column: str) -> tuple:
     """Return (min_ts, max_ts) by scanning the entire data file."""
     fmt = data_file_format.lower()
     if fmt in ("json.gz", "jsonl.gz"):
-        min_ts, max_ts, count = _scan_ts_range_json(data_file, ts_column, compressed=True)
+        min_ts, max_ts, count = _scan_ts_range_json(
+            data_file, ts_column, compressed=True
+        )
     elif fmt in ("json", "jsonl"):
-        min_ts, max_ts, count = _scan_ts_range_json(data_file, ts_column, compressed=False)
+        min_ts, max_ts, count = _scan_ts_range_json(
+            data_file, ts_column, compressed=False
+        )
     elif fmt == "csv":
         min_ts, max_ts, count = _scan_ts_range_csv(data_file, ts_column)
     else:
@@ -161,9 +159,7 @@ def detect_timestamps(
         sys.exit(1)
 
     if min_ts is None:
-        print(
-            f"ERROR: No '{ts_column}' timestamps found in {data_file}"
-        )
+        print(f"ERROR: No '{ts_column}' timestamps found in {data_file}")
         sys.exit(1)
 
     return min_ts, max_ts
@@ -250,19 +246,11 @@ def generate_sql_files(
         desc_db = f"quantile window ending at {db_end}"
 
         if window_form == "dateadd":
-            asap_where = (
-                f"{ts_column} BETWEEN DATEADD(s, -{window_size}, '{asap_end}') AND '{asap_end}'"
-            )
-            db_where = (
-                f"{ts_column} BETWEEN DATEADD(s, -{window_size}, '{db_end}') AND '{db_end}'"
-            )
+            asap_where = f"{ts_column} BETWEEN DATEADD(s, -{window_size}, '{asap_end}') AND '{asap_end}'"
+            db_where = f"{ts_column} BETWEEN DATEADD(s, -{window_size}, '{db_end}') AND '{db_end}'"
         else:
-            asap_where = (
-                f"{ts_column} BETWEEN '{asap_start}' AND '{asap_end}'"
-            )
-            db_where = (
-                f"{ts_column} BETWEEN '{db_start}' AND '{db_end}'"
-            )
+            asap_where = f"{ts_column} BETWEEN '{asap_start}' AND '{asap_end}'"
+            db_where = f"{ts_column} BETWEEN '{db_start}' AND '{db_end}'"
 
         asap_sql = (
             f"-- {label}: {desc_asap}\n"
@@ -379,7 +367,9 @@ def main():
     # Table/column config
     parser.add_argument("--table-name", required=True)
     parser.add_argument("--ts-column", required=True, help="Timestamp column name")
-    parser.add_argument("--value-column", required=True, help="Column to compute quantile on")
+    parser.add_argument(
+        "--value-column", required=True, help="Column to compute quantile on"
+    )
     parser.add_argument(
         "--group-by-columns",
         required=True,
@@ -387,7 +377,9 @@ def main():
     )
     # Query parameters
     parser.add_argument("--quantile", type=float, default=0.95)
-    parser.add_argument("--window-size", type=int, default=10, help="Window size in seconds")
+    parser.add_argument(
+        "--window-size", type=int, default=10, help="Window size in seconds"
+    )
     parser.add_argument("--num-queries", type=int, default=50)
     parser.add_argument(
         "--ts-format-asap",

From 9b0c7d62ba3f1208b6c319022d039d6bd5e7adbc Mon Sep 17 00:00:00 2001
From: benjamib112 <benjamib@andrew.cmu.edu>
Date: Wed, 22 Apr 2026 09:39:58 -0400
Subject: [PATCH 08/10] added query accuracy comparison between baseline and
 asap

---
 .../benchmark/run_benchmark.py                | 107 ++++++++++++++++--
 1 file changed, 97 insertions(+), 10 deletions(-)

diff --git a/asap-tools/execution-utilities/benchmark/run_benchmark.py b/asap-tools/execution-utilities/benchmark/run_benchmark.py
index 696dee0a..5501e7e7 100644
--- a/asap-tools/execution-utilities/benchmark/run_benchmark.py
+++ b/asap-tools/execution-utilities/benchmark/run_benchmark.py
@@ -178,6 +178,7 @@ def run_query(
             return (
                 latency_ms,
                 None,
+                0,
                 f"HTTP {response.status_code}: {response.text[:200]}",
             )
     except requests.Timeout:
@@ -300,7 +301,7 @@ def run_benchmark(
                 )
                 plot_latencies.append(0.0)
             else:
-                preview = last_result.replace("\n", " | ")[:200] if last_result else ""
+                preview = last_result.replace("\n", " | ") if last_result else ""
                 latencies_ok.append(latency_ms)
                 plot_latencies.append(latency_ms)
                 print(f"{latency_ms:.2f}ms ({last_row_count} rows)")
@@ -341,18 +342,56 @@ def _plot_single(latencies: List[float], mode: str, out_path: Path):
     print(f"Plot saved to {out_path}")
 
 
-def _plot_comparison(asap_csv: Path, baseline_csv: Path, out_path: Path):
-    """Two-panel comparison plot: per-query bars + speedup bars.
+def _parse_result_values(result_full: str) -> List[float]:
+    """Extract numeric values from a pipe-separated result_full string."""
+    if not result_full:
+        return []
+    values = []
+    for part in result_full.split(" | "):
+        part = part.strip()
+        if not part:
+            continue
+        cols = part.split("\t")
+        try:
+            values.append(float(cols[-1]))
+        except (ValueError, IndexError):
+            continue
+    return values
+
+
+def _compute_result_error(
+    baseline_values: List[float], asap_values: List[float]
+) -> Optional[float]:
+    """Mean absolute relative error between two sorted result sets."""
+    if not baseline_values or not asap_values:
+        return None
+    b = sorted(baseline_values)
+    a = sorted(asap_values)
+    n = min(len(b), len(a))
+    if n == 0:
+        return None
+    b, a = b[:n], a[:n]
+    errors = []
+    for bv, av in zip(b, a):
+        if bv == 0:
+            errors.append(0.0 if av == 0 else abs(av))
+        else:
+            errors.append(abs(av - bv) / abs(bv))
+    return sum(errors) / len(errors)
 
-    Adapted from asap_query_latency/plot_latency.py.
-    """
+
+def _plot_comparison(asap_csv: Path, baseline_csv: Path, out_path: Path):
+    """Three-panel comparison: latency bars, speedup, and result accuracy."""
 
     def _load(path):
         rows = {}
         with open(path) as f:
             for row in csv.DictReader(f):
                 if not row["error"]:
-                    rows[row["query_id"]] = float(row["latency_ms"])
+                    rows[row["query_id"]] = {
+                        "latency": float(row["latency_ms"]),
+                        "result": row.get("result_full", ""),
+                    }
         return rows
 
     asap = _load(asap_csv)
@@ -363,13 +402,28 @@ def _load(path):
         return
 
     x = np.arange(len(qids))
-    a_vals = [asap[q] for q in qids]
-    b_vals = [base[q] for q in qids]
+    a_vals = [asap[q]["latency"] for q in qids]
+    b_vals = [base[q]["latency"] for q in qids]
     speedup = [b / a if a > 0 else 0 for a, b in zip(a_vals, b_vals)]
 
-    fig, (ax1, ax2) = plt.subplots(
-        2, 1, figsize=(14, 7), gridspec_kw={"height_ratios": [3, 1]}
+    errors_pct = []
+    for q in qids:
+        b_results = _parse_result_values(base[q]["result"])
+        a_results = _parse_result_values(asap[q]["result"])
+        err = _compute_result_error(b_results, a_results)
+        errors_pct.append((err or 0.0) * 100)
+
+    has_accuracy = any(e > 0 for e in errors_pct)
+    n_panels = 3 if has_accuracy else 2
+    ratios = [3, 1, 1.5] if has_accuracy else [3, 1]
+
+    fig, axes = plt.subplots(
+        n_panels,
+        1,
+        figsize=(14, 4 + 3 * n_panels),
+        gridspec_kw={"height_ratios": ratios},
     )
+    ax1, ax2 = axes[0], axes[1]
 
     w = 0.4
     ax1.bar(x - w / 2, b_vals, w, label="Baseline", color="#f4a460")
@@ -398,11 +452,44 @@ def _load(path):
     ax2.legend(fontsize=8)
     ax2.set_xlim(-0.6, len(qids) - 0.4)
 
+    if has_accuracy:
+        ax3 = axes[2]
+        colors = [
+            "#d9534f" if e > 10 else "#f0ad4e" if e > 5 else "#5cb85c"
+            for e in errors_pct
+        ]
+        ax3.bar(
+            x, errors_pct, color=colors, width=0.7, edgecolor="black", linewidth=0.3
+        )
+        mean_err = np.mean(errors_pct)
+        ax3.axhline(
+            mean_err,
+            color="red",
+            linewidth=1,
+            linestyle="--",
+            label=f"mean {mean_err:.2f}%",
+        )
+        ax3.set_xticks(x)
+        ax3.set_xticklabels(qids, rotation=90, fontsize=7)
+        ax3.set_ylabel("Relative Error (%)")
+        ax3.set_title("Result accuracy: ASAP estimate vs baseline exact answer")
+        ax3.legend(fontsize=8)
+        ax3.set_xlim(-0.6, len(qids) - 0.4)
+
     plt.tight_layout()
     plt.savefig(out_path, dpi=150)
     plt.close()
     print(f"Comparison plot saved to {out_path}")
 
+    if has_accuracy:
+        s = sorted(errors_pct)
+        n = len(s)
+        print(
+            f"Result error: mean={np.mean(s):.2f}%  "
+            f"p50={s[int(n*0.50)]:.2f}%  p95={s[int(n*0.95)]:.2f}%  "
+            f"max={s[-1]:.2f}%"
+        )
+
 
 # ---------------------------------------------------------------------------
 # Main

From 39eb0d4bb522f8465143e8ec196d086a1ece593d Mon Sep 17 00:00:00 2001
From: Kavya Bhat <kavyabhat@gmail.com>
Date: Thu, 30 Apr 2026 11:08:53 -0400
Subject: [PATCH 09/10] Add Elasticsearch instructions to README

---
 .../execution-utilities/benchmark/README.md   | 60 +++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/asap-tools/execution-utilities/benchmark/README.md b/asap-tools/execution-utilities/benchmark/README.md
index a63beded..3f63ffcd 100644
--- a/asap-tools/execution-utilities/benchmark/README.md
+++ b/asap-tools/execution-utilities/benchmark/README.md
@@ -264,7 +264,67 @@ python run_benchmark.py \
     --output-dir ./results \
     --output-prefix h2o
 ```
+---
+## Elasticsearch End-to-End Example using H2O Dataset
+
+### Step 1-5:
+Follow the same instructions from the H2O GroupBy example above.
 
+### Step 6 — Launch Arroyo sketch pipeline
+
+```bash
+python export_to_arroyo.py \
+    --streaming-config ./configs/h2o_streaming.yaml \
+    --source-type file \
+    --input-file ./data/h2o_arroyo.json \
+    --file-format json \
+    --ts-format unix_millis \
+    --pipeline-name h2o_pipeline \
+    --arroyosketch-dir ~/ASAPQuery/asap-summary-ingest \
+    --output-dir ./arroyo_outputs
+```
+
+### Step 7 — Start QueryEngineRust
+
+```bash
+cd ~/ASAPQuery/asap-query-engine
+
+./target/release/query_engine_rust \
+    --kafka-topic sketch_topic 
+    --input-format json \
+    --config ~/ASAPQuery/asap-tools/execution-utilities/benchmark/configs/h2o_inference.yaml \
+    --streaming-config ~/ASAPQuery/asap-tools/execution-utilities/benchmark/configs/h2o_streaming.yaml \
+    --http-port 8088 --delete-existing-db --log-level DEBUG \
+    --output-dir ./output --streaming-engine arroyo \
+    --query-language SQL --lock-strategy per-key \
+    --prometheus-scrape-interval 1 > /tmp/query_engine.log 2>&1 &
+```
+
+### Step 8 — Load data into Elasticsearch (baseline)
+
+```bash
+python export_to_database.py 
+    --dataset h2o 
+    --file-path ./data/G1_1e7_1e2_0_0.csv 
+    --es-host localhost 
+    --es-port 9200 
+    --es-index h2o_groupby 
+    --es-api-key your-api-key
+    --es-bulk-size 5000
+```
+
+### Step 9 — Run benchmark
+
+```bash
+python run_benchmark.py 
+    --mode asap 
+    --asap-sql-file ./queries/h2o_asap.sql 
+    --baseline-sql-file ./queries/h2o_elasticsearch.sql 
+    --elastic-host localhost 
+    --elastic-port 9200 
+    --elastic-api-key your-api-key
+    --output-dir ./results --output-prefix h2o
+```
 ---
 
 ## Custom Dataset

From ab6465de48a62bd735aae491ffb516ceb4ce4ffa Mon Sep 17 00:00:00 2001
From: Milind Srivastava <milindsrivastava1997@gmail.com>
Date: Wed, 6 May 2026 22:26:01 -0400
Subject: [PATCH 10/10] updated scripts

---
 .../src/ast_matching/sqlparser_test.rs        |  59 +++++++++
 .../src/ast_matching/sqlpattern_parser.rs     |   5 +
 .../execution-utilities/benchmark/README.md   |  49 ++++----
 .../benchmark/generate_queries.py             | 117 ++++++------------
 4 files changed, 128 insertions(+), 102 deletions(-)

diff --git a/asap-common/dependencies/rs/sql_utilities/src/ast_matching/sqlparser_test.rs b/asap-common/dependencies/rs/sql_utilities/src/ast_matching/sqlparser_test.rs
index 72b0940b..37a24506 100644
--- a/asap-common/dependencies/rs/sql_utilities/src/ast_matching/sqlparser_test.rs
+++ b/asap-common/dependencies/rs/sql_utilities/src/ast_matching/sqlparser_test.rs
@@ -496,6 +496,65 @@ mod tests {
         );
     }
 
+    // ── ClickHouse parametric syntax + explicit BETWEEN timestamps ────────────
+    // These verify that a fully ClickHouse-compatible query (no DATEADD, no NOW())
+    // is parseable by ASAP: quantile(q)(col) + BETWEEN 'start' AND 'end'.
+
+    #[test]
+    fn test_clickhouse_explicit_datetime_temporal_quantile() {
+        check_query(
+            "SELECT quantile(0.95)(value) FROM cpu_usage WHERE time BETWEEN '2025-10-01 00:00:00' AND '2025-10-01 00:00:10' GROUP BY L1, L2, L3, L4",
+            vec![QueryType::TemporalQuantile],
+            None,
+        );
+    }
+
+    #[test]
+    // ASAP-only: parse_datetime accepts the Z suffix (interprets as UTC), but ClickHouse
+    // rejects it with TYPE_MISMATCH when comparing against a DateTime column.
+    // Do not use Z-suffix strings in queries intended for both systems.
+    fn test_asap_only_iso_z_temporal_quantile() {
+        check_query(
+            "SELECT quantile(0.95)(value) FROM cpu_usage WHERE time BETWEEN '2025-10-01T00:00:00Z' AND '2025-10-01T00:00:10Z' GROUP BY L1, L2, L3, L4",
+            vec![QueryType::TemporalQuantile],
+            None,
+        );
+    }
+
+    #[test]
+    // Both ASAP (parse_datetime) and ClickHouse treat ISO-without-Z as local server time.
+    // They agree only when running in the same timezone; prefer 'YYYY-MM-DD HH:MM:SS'
+    // (space format) to avoid this implicit dependency.
+    fn test_iso_no_z_treated_as_local_time_temporal_quantile() {
+        check_query(
+            "SELECT quantile(0.95)(value) FROM cpu_usage WHERE time BETWEEN '2025-10-01T00:00:00' AND '2025-10-01T00:00:10' GROUP BY L1, L2, L3, L4",
+            vec![QueryType::TemporalQuantile],
+            None,
+        );
+    }
+
+    #[test]
+    fn test_clickhouse_explicit_datetime_spatial_quantile() {
+        check_query(
+            "SELECT quantile(0.95)(value) FROM cpu_usage WHERE time BETWEEN '2025-10-01 00:00:00' AND '2025-10-01 00:00:01' GROUP BY L1",
+            vec![QueryType::Spatial],
+            None,
+        );
+    }
+
+    #[test]
+    fn test_clickhouse_explicit_matches_now_template() {
+        // A ClickHouse-style query (explicit timestamps, parametric quantile) must
+        // match a stored DATEADD(NOW()) template of the same shape.
+        let template = parse_sql_query(
+            "SELECT quantile(0.95)(value) FROM cpu_usage WHERE time BETWEEN DATEADD(s, -10, NOW()) AND NOW() GROUP BY L1, L2, L3, L4"
+        ).unwrap();
+        let incoming = parse_sql_query(
+            "SELECT quantile(0.95)(value) FROM cpu_usage WHERE time BETWEEN '2025-10-01 00:00:00' AND '2025-10-01 00:00:10' GROUP BY L1, L2, L3, L4"
+        ).unwrap();
+        assert!(incoming.matches_sql_pattern(&template));
+    }
+
     // ── Error cases ──────────────────────────────────────────────────────────
 
     #[test]
diff --git a/asap-common/dependencies/rs/sql_utilities/src/ast_matching/sqlpattern_parser.rs b/asap-common/dependencies/rs/sql_utilities/src/ast_matching/sqlpattern_parser.rs
index 3c833a08..1c145a96 100644
--- a/asap-common/dependencies/rs/sql_utilities/src/ast_matching/sqlpattern_parser.rs
+++ b/asap-common/dependencies/rs/sql_utilities/src/ast_matching/sqlpattern_parser.rs
@@ -320,6 +320,11 @@ impl SQLPatternParser {
     }
 
     fn get_timestamp_from_datetime_str(datetime_str: &str) -> Option<f64> {
+        // parse_datetime treats timezone-naive strings (e.g. "2025-10-01 00:00:00",
+        // "2025-10-01T00:00:00") as local server time, matching ClickHouse's behavior —
+        // but only when both run in the same timezone. Z-suffix strings (e.g.
+        // "2025-10-01T00:00:00Z") are interpreted as UTC here but rejected by ClickHouse.
+        // Use space-format datetime strings ("YYYY-MM-DD HH:MM:SS") for portability.
         let parsed_datetime = parse_datetime(datetime_str).ok()?;
         Some(parsed_datetime.timestamp().as_second() as f64)
     }
diff --git a/asap-tools/execution-utilities/benchmark/README.md b/asap-tools/execution-utilities/benchmark/README.md
index 3f63ffcd..9ee62f77 100644
--- a/asap-tools/execution-utilities/benchmark/README.md
+++ b/asap-tools/execution-utilities/benchmark/README.md
@@ -35,6 +35,12 @@ pip3 install --user -r requirements.txt
 cd ~/ASAPQuery/asap-query-engine && cargo build --release
 ```
 
+> **UTC requirement:** Both ASAP and ClickHouse must run in UTC so that bare
+> datetime strings (`'YYYY-MM-DD HH:MM:SS'`) are interpreted identically by both
+> systems. Set `TZ=UTC` in the environment for ASAP processes and ensure
+> ClickHouse's `timezone` config is set to `UTC`. If the two systems run in
+> different timezones, queries will target different time windows on each side.
+
 ---
 
 ## ClickBench + ClickHouse End-to-End Example
@@ -115,8 +121,7 @@ python generate_queries.py \
 ```
 
 This writes:
-- `queries/clickbench_asap.sql` — ASAP queries (ISO timestamps)
-- `queries/clickbench_clickhouse.sql` — ClickHouse queries (datetime timestamps)
+- `queries/clickbench.sql` — shared query file for both ASAP and ClickHouse
 - `queries/clickbench_streaming.yaml` — Arroyo streaming config
 - `queries/clickbench_inference.yaml` — QueryEngineRust inference config
 
@@ -166,8 +171,8 @@ Verify: `$INSTALL_DIR/clickhouse client --query "SELECT count(*) FROM hits"`
 ```bash
 python run_benchmark.py \
     --mode both \
-    --asap-sql-file ./queries/clickbench_asap.sql \
-    --baseline-sql-file ./queries/clickbench_clickhouse.sql \
+    --asap-sql-file ./queries/clickbench.sql \
+    --baseline-sql-file ./queries/clickbench.sql \
     --asap-url "http://localhost:8088/api/v1/query" \
     --output-dir ./results \
     --output-prefix clickbench
@@ -258,8 +263,8 @@ python export_to_database.py \
 ```bash
 python run_benchmark.py \
     --mode both \
-    --asap-sql-file ./queries/h2o_asap.sql \
-    --baseline-sql-file ./queries/h2o_clickhouse.sql \
+    --asap-sql-file ./queries/h2o.sql \
+    --baseline-sql-file ./queries/h2o.sql \
     --asap-url "http://localhost:8088/api/v1/query" \
     --output-dir ./results \
     --output-prefix h2o
@@ -290,7 +295,7 @@ python export_to_arroyo.py \
 cd ~/ASAPQuery/asap-query-engine
 
 ./target/release/query_engine_rust \
-    --kafka-topic sketch_topic 
+    --kafka-topic sketch_topic
     --input-format json \
     --config ~/ASAPQuery/asap-tools/execution-utilities/benchmark/configs/h2o_inference.yaml \
     --streaming-config ~/ASAPQuery/asap-tools/execution-utilities/benchmark/configs/h2o_streaming.yaml \
@@ -303,12 +308,12 @@ cd ~/ASAPQuery/asap-query-engine
 ### Step 8 — Load data into Elasticsearch (baseline)
 
 ```bash
-python export_to_database.py 
-    --dataset h2o 
-    --file-path ./data/G1_1e7_1e2_0_0.csv 
-    --es-host localhost 
-    --es-port 9200 
-    --es-index h2o_groupby 
+python export_to_database.py
+    --dataset h2o
+    --file-path ./data/G1_1e7_1e2_0_0.csv
+    --es-host localhost
+    --es-port 9200
+    --es-index h2o_groupby
     --es-api-key your-api-key
     --es-bulk-size 5000
 ```
@@ -316,12 +321,12 @@ python export_to_database.py
 ### Step 9 — Run benchmark
 
 ```bash
-python run_benchmark.py 
-    --mode asap 
-    --asap-sql-file ./queries/h2o_asap.sql 
-    --baseline-sql-file ./queries/h2o_elasticsearch.sql 
-    --elastic-host localhost 
-    --elastic-port 9200 
+python run_benchmark.py
+    --mode asap
+    --asap-sql-file ./queries/h2o.sql
+    --baseline-sql-file ./queries/h2o.sql
+    --elastic-host localhost
+    --elastic-port 9200
     --elastic-api-key your-api-key
     --output-dir ./results --output-prefix h2o
 ```
@@ -370,8 +375,8 @@ python export_to_database.py \
 # 6. Run benchmark
 python run_benchmark.py \
     --mode both \
-    --asap-sql-file ./queries/my_dataset_asap.sql \
-    --baseline-sql-file ./queries/my_dataset_clickhouse.sql \
+    --asap-sql-file ./queries/my_dataset.sql \
+    --baseline-sql-file ./queries/my_dataset.sql \
     --asap-url "http://localhost:8088/api/v1/query" \
     --output-dir ./results
 ```
@@ -407,6 +412,6 @@ $INSTALL_DIR/clickhouse client --query "TRUNCATE TABLE hits"
 | `prepare_data.py` | Convert raw data to Arroyo file source format (RFC3339, string columns) |
 | `export_to_arroyo.py` | Launch Arroyo sketch pipeline (file or kafka source) |
 | `export_to_database.py` | Load data into ClickHouse for baseline |
-| `generate_queries.py` | Generate paired ASAP + ClickHouse SQL query files and streaming/inference YAML configs |
+| `generate_queries.py` | Generate a shared SQL query file (ClickHouse-compatible syntax, used for both ASAP and ClickHouse) and optional streaming/inference YAML configs |
 | `run_benchmark.py` | Run queries and produce CSV results + plots |
 | `configs/` | ClickHouse init SQL (CREATE TABLE statements) |
diff --git a/asap-tools/execution-utilities/benchmark/generate_queries.py b/asap-tools/execution-utilities/benchmark/generate_queries.py
index 462c1bd7..eb1b5d4e 100644
--- a/asap-tools/execution-utilities/benchmark/generate_queries.py
+++ b/asap-tools/execution-utilities/benchmark/generate_queries.py
@@ -1,14 +1,21 @@
 #!/usr/bin/env python3
 """
-Generate paired ASAP and ClickHouse SQL query files for benchmarking,
+Generate ASAP/ClickHouse SQL query files for benchmarking,
 and optionally generate streaming/inference YAML configs.
 
+Both ASAP and ClickHouse receive identical queries using native ClickHouse syntax:
+  - quantile(q)(col)  parametric aggregate
+  - 'YYYY-MM-DD HH:MM:SS'  datetime timestamps (no Z suffix)
+
+This works because after PR #166 ASAP's parser accepts ClickHouse parametric syntax,
+and both systems interpret bare datetime strings as local server time — which is
+unambiguous only when both run in UTC. See README for the UTC requirement.
+
 Each query targets a fixed time window (window-end timestamp) and matches the
 annotation format `-- T{NNN}: description` expected by run_benchmark.py.
 
 Output (always):
-  {prefix}_asap.sql            QUANTILE(q, col) syntax for QueryEngineRust
-  {prefix}_clickhouse.sql      quantile(q)(col) syntax for ClickHouse baseline
+  {prefix}.sql                  shared query file for both ASAP and ClickHouse
 
 Output (with --generate-configs):
   {prefix}_streaming.yaml      Arroyo streaming config
@@ -42,7 +49,7 @@
         --data-file-format json.gz \\
         --output-prefix ./queries/clickbench
 
-    # Override timestamp format for both outputs
+    # Use a pre-built timestamps file
     python generate_queries.py \\
         --table-name h2o_groupby \\
         --ts-column timestamp \\
@@ -50,7 +57,6 @@
         --group-by-columns id1,id2 \\
         --window-size 10 \\
         --num-queries 50 \\
-        --ts-format iso \\
         --timestamps-file ./my_timestamps.txt \\
         --output-prefix ./queries/h2o
 """
@@ -210,15 +216,7 @@ def generate_window_ends(
     return ends
 
 
-def format_ts(ts: datetime, ts_format: str) -> str:
-    """Format a timestamp for SQL injection."""
-    if ts_format == "iso":
-        return ts.strftime("%Y-%m-%dT%H:%M:%SZ")
-    else:  # datetime
-        return ts.strftime("%Y-%m-%d %H:%M:%S")
-
-
-def generate_sql_files(
+def generate_sql_file(
     table_name: str,
     ts_column: str,
     value_column: str,
@@ -226,60 +224,42 @@ def generate_sql_files(
     quantile: float,
     window_size: int,
     window_ends: List[datetime],
-    ts_format_asap: str,
-    ts_format_db: str,
     window_form: str,
     output_prefix: str,
 ):
-    """Write the paired ASAP and ClickHouse SQL files."""
+    """Write a single SQL file using ClickHouse-compatible syntax.
+
+    Uses quantile(q)(col) and 'YYYY-MM-DD HH:MM:SS' datetime strings.
+    Both ASAP and ClickHouse accept this format when running in UTC.
+    """
     group_by_clause = ", ".join(group_by_columns)
-    asap_lines = []
-    ch_lines = []
+    lines = []
 
     for i, end_ts in enumerate(window_ends):
-        asap_end = format_ts(end_ts, ts_format_asap)
-        asap_start = format_ts(end_ts - timedelta(seconds=window_size), ts_format_asap)
-        db_end = format_ts(end_ts, ts_format_db)
-        db_start = format_ts(end_ts - timedelta(seconds=window_size), ts_format_db)
+        end_str = end_ts.strftime("%Y-%m-%d %H:%M:%S")
+        start_str = (end_ts - timedelta(seconds=window_size)).strftime(
+            "%Y-%m-%d %H:%M:%S"
+        )
         label = f"T{i:03d}"
-        desc_asap = f"quantile window ending at {asap_end}"
-        desc_db = f"quantile window ending at {db_end}"
 
         if window_form == "dateadd":
-            asap_where = f"{ts_column} BETWEEN DATEADD(s, -{window_size}, '{asap_end}') AND '{asap_end}'"
-            db_where = f"{ts_column} BETWEEN DATEADD(s, -{window_size}, '{db_end}') AND '{db_end}'"
+            where = f"{ts_column} BETWEEN DATEADD(s, -{window_size}, '{end_str}') AND '{end_str}'"
         else:
-            asap_where = f"{ts_column} BETWEEN '{asap_start}' AND '{asap_end}'"
-            db_where = f"{ts_column} BETWEEN '{db_start}' AND '{db_end}'"
+            where = f"{ts_column} BETWEEN '{start_str}' AND '{end_str}'"
 
-        asap_sql = (
-            f"-- {label}: {desc_asap}\n"
-            f"SELECT QUANTILE({quantile}, {value_column}) FROM {table_name} "
-            f"WHERE {asap_where} GROUP BY {group_by_clause};"
-        )
-        ch_sql = (
-            f"-- {label}: {desc_db}\n"
+        lines.append(
+            f"-- {label}: quantile window ending at {end_str}\n"
             f"SELECT quantile({quantile})({value_column}) FROM {table_name} "
-            f"WHERE {db_where} GROUP BY {group_by_clause};"
+            f"WHERE {where} GROUP BY {group_by_clause};"
         )
 
-        asap_lines.append(asap_sql)
-        ch_lines.append(ch_sql)
-
-    asap_file = f"{output_prefix}_asap.sql"
-    ch_file = f"{output_prefix}_clickhouse.sql"
-
-    Path(asap_file).parent.mkdir(parents=True, exist_ok=True)
+    sql_file = f"{output_prefix}.sql"
+    Path(sql_file).parent.mkdir(parents=True, exist_ok=True)
 
-    with open(asap_file, "w") as f:
-        f.write("\n".join(asap_lines) + "\n")
+    with open(sql_file, "w") as f:
+        f.write("\n".join(lines) + "\n")
 
-    with open(ch_file, "w") as f:
-        f.write("\n".join(ch_lines) + "\n")
-
-    print(f"Generated {len(window_ends)} queries:")
-    print(f"  ASAP:       {asap_file}")
-    print(f"  ClickHouse: {ch_file}")
+    print(f"Generated {len(window_ends)} queries → {sql_file}")
 
 
 def generate_config_files(
@@ -337,7 +317,7 @@ def generate_config_files(
     - aggregation_id: {aggregation_id}
       read_count_threshold: 999999
     query: |-
-      SELECT QUANTILE({quantile}, {value_column}) FROM {table_name}
+      SELECT quantile({quantile})({value_column}) FROM {table_name}
       WHERE {ts_column} BETWEEN DATEADD(s, -{window_size}, NOW()) AND NOW()
       GROUP BY {group_by_clause};
 """
@@ -353,14 +333,14 @@ def generate_config_files(
     with open(inference_file, "w") as f:
         f.write(inference_content)
 
-    print(f"Generated configs:")
+    print("Generated configs:")
     print(f"  Streaming: {streaming_file}")
     print(f"  Inference: {inference_file}")
 
 
 def main():
     parser = argparse.ArgumentParser(
-        description="Generate paired ASAP + ClickHouse SQL query files",
+        description="Generate ASAP + ClickHouse SQL query files (shared syntax)",
         formatter_class=argparse.RawDescriptionHelpFormatter,
         epilog=__doc__,
     )
@@ -381,24 +361,6 @@ def main():
         "--window-size", type=int, default=10, help="Window size in seconds"
     )
     parser.add_argument("--num-queries", type=int, default=50)
-    parser.add_argument(
-        "--ts-format-asap",
-        choices=["iso", "datetime"],
-        default="iso",
-        help="Timestamp format for ASAP SQL: iso='YYYY-MM-DDTHH:MM:SSZ', datetime='YYYY-MM-DD HH:MM:SS' (default: iso)",
-    )
-    parser.add_argument(
-        "--ts-format-db",
-        choices=["iso", "datetime"],
-        default="datetime",
-        help="Timestamp format for ClickHouse SQL: iso='YYYY-MM-DDTHH:MM:SSZ', datetime='YYYY-MM-DD HH:MM:SS' (default: datetime)",
-    )
-    parser.add_argument(
-        "--ts-format",
-        choices=["iso", "datetime"],
-        default=None,
-        help="Set both --ts-format-asap and --ts-format-db to the same value (overrides individual flags)",
-    )
     parser.add_argument(
         "--window-form",
         choices=["explicit", "dateadd"],
@@ -408,7 +370,7 @@ def main():
     parser.add_argument(
         "--output-prefix",
         required=True,
-        help="Output file prefix (e.g. ./queries/clickbench → clickbench_asap.sql + clickbench_clickhouse.sql)",
+        help="Output file prefix (e.g. ./queries/clickbench → clickbench.sql)",
     )
     # Timestamp sources (mutually exclusive)
     ts_group = parser.add_mutually_exclusive_group(required=True)
@@ -502,10 +464,7 @@ def main():
             f"(stride={stride}s, window={args.window_size}s)"
         )
 
-    ts_format_asap = args.ts_format if args.ts_format else args.ts_format_asap
-    ts_format_db = args.ts_format if args.ts_format else args.ts_format_db
-
-    generate_sql_files(
+    generate_sql_file(
         table_name=args.table_name,
         ts_column=args.ts_column,
         value_column=args.value_column,
@@ -513,8 +472,6 @@ def main():
         quantile=args.quantile,
         window_size=args.window_size,
         window_ends=window_ends,
-        ts_format_asap=ts_format_asap,
-        ts_format_db=ts_format_db,
         window_form=args.window_form,
         output_prefix=args.output_prefix,
     )