From 9665b7a9498f1fba96201a86c64a96cd3d887f2e Mon Sep 17 00:00:00 2001 From: Milind Srivastava Date: Thu, 9 Apr 2026 08:33:43 -0400 Subject: [PATCH 1/6] Added function to discovery all metrics and labels from Prometheus --- asap-planner-rs/src/lib.rs | 52 +++++++++++++++++++ asap-planner-rs/src/main.rs | 8 +-- asap-planner-rs/src/prometheus_client.rs | 63 ++++++++++++++++++++++++ 3 files changed, 120 insertions(+), 3 deletions(-) diff --git a/asap-planner-rs/src/lib.rs b/asap-planner-rs/src/lib.rs index a0608c30..6f8d5ea9 100644 --- a/asap-planner-rs/src/lib.rs +++ b/asap-planner-rs/src/lib.rs @@ -389,6 +389,58 @@ impl Controller { }) } + /// Build a `Controller` by auto-discovering all metrics from Prometheus and generating + /// default quantile queries for each metric. + /// + /// This is the zero-config path: no config file or query log is needed. + pub fn from_prometheus( + prometheus_url: &str, + opts: RuntimeOptions, + ) -> Result { + let metric_names = prometheus_client::fetch_all_metric_names(prometheus_url)?; + debug!( + "Discovered {} metric(s) from Prometheus", + metric_names.len() + ); + + // Generate default quantile queries for each metric. + let queries: Vec = metric_names + .iter() + .flat_map(|m| { + [0.50, 0.90, 0.99] + .iter() + .map(move |q| format!("quantile({}, {})", q, m)) + }) + .collect(); + + let schema = prometheus_client::build_schema_from_prometheus(prometheus_url, &queries)?; + + let config = ControllerConfig { + query_groups: vec![config::input::QueryGroup { + id: Some(1), + queries, + repetition_delay: 10, + controller_options: config::input::ControllerOptions { + accuracy_sla: 0.99, + latency_sla: 1.0, + }, + step: None, + range_duration: None, + }], + sketch_parameters: None, + aggregate_cleanup: Some(config::input::AggregateCleanupConfig { + policy: Some("read_based".to_string()), + }), + metrics: None, + }; + + Ok(Self { + config, + schema, + options: opts, + }) + } + /// Build a `Controller` from a Prometheus query log file, fetching metric labels from /// Prometheus. /// diff --git a/asap-planner-rs/src/main.rs b/asap-planner-rs/src/main.rs index bd7fcba1..e3374854 100644 --- a/asap-planner-rs/src/main.rs +++ b/asap-planner-rs/src/main.rs @@ -98,15 +98,17 @@ fn main() -> anyhow::Result<()> { (None, Some(log_path), Some(url)) => { Controller::from_query_log(&log_path, opts, &url)? } + (None, None, Some(url)) => Controller::from_prometheus(&url, opts)?, (None, Some(_log_path), None) => { anyhow::bail!( "--prometheus-url is required when using --query-log \ (query logs have no metrics hint to fall back on)" ) } - _ => anyhow::bail!( - "exactly one of --input_config or --query-log must be provided for PromQL mode" - ), + (None, None, None) => { + anyhow::bail!("provide one of --input_config, --query-log, or --prometheus-url") + } + _ => unreachable!("clap conflicts_with prevents this combination"), }; controller.generate_to_dir(&args.output_dir)?; } diff --git a/asap-planner-rs/src/prometheus_client.rs b/asap-planner-rs/src/prometheus_client.rs index b8e8d288..689e480d 100644 --- a/asap-planner-rs/src/prometheus_client.rs +++ b/asap-planner-rs/src/prometheus_client.rs @@ -158,6 +158,69 @@ fn fetch_labels_for_metric( ))) } +/// Query Prometheus `GET /api/v1/label/__name__/values` and return all metric names. +pub fn fetch_all_metric_names(prometheus_url: &str) -> Result, ControllerError> { + let url = format!( + "{}/api/v1/label/__name__/values", + prometheus_url.trim_end_matches('/') + ); + let client = reqwest::blocking::Client::new(); + + for attempt in 1..=MAX_RETRIES { + let response = client.get(&url).send().map_err(|e| { + ControllerError::PrometheusClient(format!( + "HTTP request failed for metric names: {}", + e + )) + })?; + + let status = response.status(); + + if status == reqwest::StatusCode::SERVICE_UNAVAILABLE { + warn!( + "Prometheus returned 503 for metric names (attempt {}/{}); retrying in {}s", + attempt, + MAX_RETRIES, + RETRY_DELAY.as_secs(), + ); + thread::sleep(RETRY_DELAY); + continue; + } + + if !status.is_success() { + return Err(ControllerError::PrometheusClient(format!( + "Prometheus returned HTTP {} for metric names", + status + ))); + } + + let body: serde_json::Value = response.json().map_err(|e| { + ControllerError::PrometheusClient(format!( + "Failed to parse Prometheus response for metric names: {}", + e + )) + })?; + + let data = match body.get("data").and_then(|d| d.as_array()) { + Some(arr) => arr, + None => { + warn!("Prometheus returned no 'data' array for metric names"); + return Ok(Vec::new()); + } + }; + + return Ok(data + .iter() + .filter_map(|v| v.as_str().map(String::from)) + .collect()); + } + + Err(ControllerError::PrometheusClient(format!( + "Prometheus returned 503 for metric names after {} attempts; giving up", + MAX_RETRIES + ))) +} + /// Build a `PromQLSchema` by querying Prometheus for each metric name found in the given /// PromQL queries. Metrics with no series in Prometheus are skipped with a warning. pub fn build_schema_from_prometheus( From 91f154161daf445d0194c81d2fcff66eb518b849 Mon Sep 17 00:00:00 2001 From: Milind Srivastava Date: Thu, 9 Apr 2026 09:01:23 -0400 Subject: [PATCH 2/6] Added asap-dropin configs and docker compose --- asap-dropin/.env | 21 +++ asap-dropin/README.md | 90 +++++++++++ asap-dropin/config/arroyo-config.yaml | 3 + asap-dropin/docker-compose.dev.yml | 22 +++ asap-dropin/docker-compose.yml | 208 ++++++++++++++++++++++++++ 5 files changed, 344 insertions(+) create mode 100644 asap-dropin/.env create mode 100644 asap-dropin/README.md create mode 100644 asap-dropin/config/arroyo-config.yaml create mode 100644 asap-dropin/docker-compose.dev.yml create mode 100644 asap-dropin/docker-compose.yml diff --git a/asap-dropin/.env b/asap-dropin/.env new file mode 100644 index 00000000..79e0f408 --- /dev/null +++ b/asap-dropin/.env @@ -0,0 +1,21 @@ +# ── User's existing Prometheus ──────────────────────────────────────────────── +# URL of your running Prometheus instance, reachable from inside Docker. +# - Docker Desktop (Mac/Windows): http://host.docker.internal:9090 +# - Linux (host networking): http://172.17.0.1:9090 (default Docker bridge gateway) +# - Prometheus in another compose: use a shared Docker network and the service name +PROMETHEUS_URL=http://host.docker.internal:9090 + +# Scrape interval configured in your Prometheus (in seconds). +PROMETHEUS_SCRAPE_INTERVAL=15 + +# ── Exposed ports ──────────────────────────────────────────────────────────── +# Port on the host where the remote-write receiver listens. +# Add this to your prometheus.yml: +# remote_write: +# - url: http://localhost:${REMOTE_WRITE_PORT}/receive +REMOTE_WRITE_PORT=9091 + +# Port on the host where the ASAPQuery query engine listens. +# Point your Grafana Prometheus datasource here: +# http://localhost:${QUERY_ENGINE_PORT} +QUERY_ENGINE_PORT=8088 diff --git a/asap-dropin/README.md b/asap-dropin/README.md new file mode 100644 index 00000000..d2a17f9d --- /dev/null +++ b/asap-dropin/README.md @@ -0,0 +1,90 @@ +# ASAPQuery Drop-in for Existing Prometheus + Grafana Stacks + +A self-contained Docker Compose that adds ASAPQuery to an existing Prometheus and Grafana deployment. No additional services need to be installed or managed beyond what is in this compose file. + +ASAPQuery auto-discovers all metrics from your Prometheus and generates quantile sketches for them — no query configuration needed. + +## Prerequisites + +- Docker and Docker Compose +- A running Prometheus instance +- A running Grafana instance (with a Prometheus datasource) + +## Quick Start + +### 1. Set environment variables + +Copy and edit the `.env` file: + +| Variable | Default | Description | +|---|---|---| +| `PROMETHEUS_URL` | `http://host.docker.internal:9090` | URL of your Prometheus, reachable from inside Docker | +| `PROMETHEUS_SCRAPE_INTERVAL` | `15` | Your Prometheus scrape interval in seconds | +| `REMOTE_WRITE_PORT` | `9091` | Host port for the remote-write receiver | +| `QUERY_ENGINE_PORT` | `8088` | Host port for the ASAPQuery query engine | + +**Finding the right `PROMETHEUS_URL`:** +- **Docker Desktop (Mac/Windows):** `http://host.docker.internal:9090` (default) +- **Linux (Prometheus on host):** `http://172.17.0.1:9090` (default Docker bridge gateway) +- **Prometheus in another Docker Compose:** create a shared external network + +### 2. Start ASAPQuery + +```bash +docker compose up -d +``` + +### 3. Add remote_write to your Prometheus + +Add this to your `prometheus.yml` and reload Prometheus: + +```yaml +remote_write: + - url: http://localhost:9091/receive + queue_config: + batch_send_deadline: 1s + sample_age_limit: 5m +``` + +### 4. Point Grafana at ASAPQuery + +Change your Grafana Prometheus datasource URL from your Prometheus address to: + +``` +http://localhost:8088 +``` + +ASAPQuery speaks the Prometheus query API. Queries it can accelerate are answered from sketches; all others are transparently forwarded to your upstream Prometheus. + +## Architecture + +``` +Your Prometheus ──remote_write──▸ Arroyo (:9091/receive) + │ + ▼ + Kafka + │ + ▼ +Your Grafana ◂──query──── ASAPQuery Query Engine (:8088) + │ + ▼ (fallback) + Your Prometheus +``` + +## Future: single-container mode + +This compose currently runs six containers (Kafka, Arroyo, planner, summary-ingest, query engine, plus kafka-init). Once issues #242, #243, and #244 are completed and the precompute_engine cutover is done, the query engine will be able to: + +- Receive Prometheus remote_write directly (precompute engine, `--enable-prometheus-remote-write`) +- Auto-discover metrics and run the planner in-process (already embedded via #271/#272) +- Hot-reload sketch and ingest configs at runtime (#242/#243/#244) + +At that point this compose should be collapsed to a single `queryengine` container — Kafka, Arroyo, the planner init container, and asap-summary-ingest all become unnecessary. + +## Development + +To build from local source instead of pulling pre-built images: + +```bash +docker compose -f docker-compose.yml -f docker-compose.dev.yml up +``` diff --git a/asap-dropin/config/arroyo-config.yaml b/asap-dropin/config/arroyo-config.yaml new file mode 100644 index 00000000..534b31c2 --- /dev/null +++ b/asap-dropin/config/arroyo-config.yaml @@ -0,0 +1,3 @@ +# Arroyo streaming engine configuration +compiler: + use-local-udf-crate: true diff --git a/asap-dropin/docker-compose.dev.yml b/asap-dropin/docker-compose.dev.yml new file mode 100644 index 00000000..03076a53 --- /dev/null +++ b/asap-dropin/docker-compose.dev.yml @@ -0,0 +1,22 @@ +name: asapquery-dropin + +# Development override: builds ASAP services from local source instead of +# pulling pre-built images from ghcr. +# +# Usage: +# docker compose -f docker-compose.yml -f docker-compose.dev.yml up + +services: + asap-planner-rs: + build: + context: .. + dockerfile: asap-planner-rs/Dockerfile + + asap-summary-ingest: + build: + context: ../asap-summary-ingest + + queryengine: + build: + context: .. + dockerfile: asap-query-engine/Dockerfile diff --git a/asap-dropin/docker-compose.yml b/asap-dropin/docker-compose.yml new file mode 100644 index 00000000..b1bc681f --- /dev/null +++ b/asap-dropin/docker-compose.yml @@ -0,0 +1,208 @@ +name: asapquery-dropin + +# Self-contained ASAPQuery stack for existing Prometheus + Grafana deployments. +# +# Usage: +# 1. Adjust PROMETHEUS_URL in .env to point at your Prometheus. +# 2. docker compose up -d +# 3. Add remote_write to your prometheus.yml -> http://localhost:${REMOTE_WRITE_PORT}/receive +# 4. Point your Grafana datasource URL -> http://localhost:${QUERY_ENGINE_PORT} + +networks: + asap-network: + driver: bridge + +volumes: + kafka-data: + asap-planner-output: + +services: + ############################################################################# + # INFRASTRUCTURE + ############################################################################# + + kafka: + image: apache/kafka:3.7.0 + container_name: asap-dropin-kafka + hostname: kafka + networks: + - asap-network + environment: + KAFKA_NODE_ID: 1 + KAFKA_PROCESS_ROLES: broker,controller + KAFKA_LISTENERS: PLAINTEXT://0.0.0.0:9092,CONTROLLER://0.0.0.0:9093 + KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092 + KAFKA_CONTROLLER_QUORUM_VOTERS: 1@kafka:9093 + KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1 + KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1 + KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0 + KAFKA_MESSAGE_MAX_BYTES: 20971520 + KAFKA_REPLICA_FETCH_MAX_BYTES: 20971520 + KAFKA_LOG_RETENTION_HOURS: 1 + KAFKA_LOG_DIRS: /tmp/kraft-combined-logs + CLUSTER_ID: MkU3OEVBNTcwNTJENDM2Qk + volumes: + - kafka-data:/tmp/kraft-combined-logs + user: "0:0" + entrypoint: /bin/bash + command: + - -c + - | + chown -R appuser:appuser /tmp/kraft-combined-logs + chmod -R 755 /tmp/kraft-combined-logs + exec su appuser -c "/etc/kafka/docker/run" + healthcheck: + test: ["CMD-SHELL", "/opt/kafka/bin/kafka-broker-api-versions.sh --bootstrap-server localhost:9092 || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 30s + restart: unless-stopped + + kafka-init: + image: apache/kafka:3.7.0 + container_name: asap-dropin-kafka-init + networks: + - asap-network + depends_on: + kafka: + condition: service_healthy + entrypoint: /bin/bash + command: + - -c + - | + echo "Creating Kafka topics..." + /opt/kafka/bin/kafka-topics.sh --bootstrap-server kafka:9092 \ + --create --if-not-exists --topic flink_input \ + --partitions 1 --replication-factor 1 \ + --config max.message.bytes=20971520 + + /opt/kafka/bin/kafka-topics.sh --bootstrap-server kafka:9092 \ + --create --if-not-exists --topic flink_output \ + --partitions 1 --replication-factor 1 \ + --config max.message.bytes=20971520 + + echo "Kafka topics created successfully" + restart: "no" + + arroyo: + image: ghcr.io/projectasap/asap-arroyo:v0.1.0 + container_name: asap-dropin-arroyo + hostname: arroyo + networks: + - asap-network + ports: + - "${REMOTE_WRITE_PORT:-9091}:9091" + volumes: + - ./config/arroyo-config.yaml:/config.yaml:ro + command: ["--config", "/config.yaml", "cluster"] + environment: + - ARROYO__API__RUN_HTTP_PORT=5115 + - KAFKA_BOOTSTRAP_SERVERS=kafka:9092 + depends_on: + kafka: + condition: service_healthy + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:5115/api/v1/pipelines || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + restart: unless-stopped + + ############################################################################# + # INIT CONTAINERS + ############################################################################# + + asap-planner-rs: + image: ghcr.io/projectasap/asap-planner-rs:v0.2.0 + container_name: asap-dropin-planner + hostname: asap-planner-rs + networks: + - asap-network + command: + - "--output_dir=/asap-planner-output" + - "--prometheus_scrape_interval=${PROMETHEUS_SCRAPE_INTERVAL:-15}" + - "--streaming_engine=arroyo" + - "--range-duration=300" + - "--step=10" + - "--prometheus-url=${PROMETHEUS_URL:-http://host.docker.internal:9090}" + - "--query-language=promql" + - "-v" + volumes: + - asap-planner-output:/asap-planner-output + restart: "no" + + asap-summary-ingest: + image: ghcr.io/projectasap/asap-summary-ingest:v0.2.0 + container_name: asap-dropin-summary-ingest + hostname: asap-summary-ingest + networks: + - asap-network + command: + - "--config_file_path=/asap-planner-output/streaming_config.yaml" + - "--source_type=prometheus_remote_write" + - "--prometheus_base_port=9091" + - "--prometheus_path=/receive" + - "--prometheus_bind_ip=0.0.0.0" + - "--parallelism=1" + - "--output_kafka_topic=flink_output" + - "--output_format=json" + - "--pipeline_name=asap-dropin" + - "--output_dir=/asap-summary-ingest-output" + - "--arroyo_url=http://arroyo:5115/api/v1" + - "--bootstrap_servers=kafka:9092" + volumes: + - asap-planner-output:/asap-planner-output:ro + - ./output/asap-summary-ingest:/asap-summary-ingest-output + depends_on: + asap-planner-rs: + condition: service_completed_successfully + arroyo: + condition: service_healthy + restart: "no" + + ############################################################################# + # CORE SERVICE + ############################################################################# + + queryengine: + image: ghcr.io/projectasap/asap-query-engine:v0.2.0 + container_name: asap-dropin-queryengine + hostname: queryengine + networks: + - asap-network + ports: + - "${QUERY_ENGINE_PORT:-8088}:8088" + environment: + - RUST_LOG=INFO + - RUST_BACKTRACE=1 + volumes: + - asap-planner-output:/asap-planner-output:ro + - ./output/queryengine:/app/outputs + command: + - "--kafka-topic=flink_output" + - "--kafka-broker=kafka:9092" + - "--input-format=json" + - "--config=/asap-planner-output/inference_config.yaml" + - "--streaming-config=/asap-planner-output/streaming_config.yaml" + - "--prometheus-server=${PROMETHEUS_URL:-http://host.docker.internal:9090}" + - "--prometheus-scrape-interval=${PROMETHEUS_SCRAPE_INTERVAL:-15}" + - "--streaming-engine=arroyo" + - "--delete-existing-db" + - "--log-level=INFO" + - "--output-dir=/app/outputs" + - "--query-language=PROMQL" + - "--lock-strategy=per-key" + - "--decompress-json" + - "--forward-unsupported-queries" + depends_on: + asap-summary-ingest: + condition: service_completed_successfully + kafka: + condition: service_healthy + kafka-init: + condition: service_completed_successfully + restart: unless-stopped From 709dd6271213a5303452e1a92cf4bb62ab70b14d Mon Sep 17 00:00:00 2001 From: Milind Srivastava Date: Mon, 13 Apr 2026 17:29:38 -0400 Subject: [PATCH 3/6] Updated docker compose to use v0.3.0 images and to use asap-query-engine's precompute engine --- asap-dropin/config/arroyo-config.yaml | 3 - asap-dropin/docker-compose.yml | 149 +++----------------------- 2 files changed, 12 insertions(+), 140 deletions(-) delete mode 100644 asap-dropin/config/arroyo-config.yaml diff --git a/asap-dropin/config/arroyo-config.yaml b/asap-dropin/config/arroyo-config.yaml deleted file mode 100644 index 534b31c2..00000000 --- a/asap-dropin/config/arroyo-config.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# Arroyo streaming engine configuration -compiler: - use-local-udf-crate: true diff --git a/asap-dropin/docker-compose.yml b/asap-dropin/docker-compose.yml index b1bc681f..5404da48 100644 --- a/asap-dropin/docker-compose.yml +++ b/asap-dropin/docker-compose.yml @@ -13,111 +13,15 @@ networks: driver: bridge volumes: - kafka-data: asap-planner-output: services: - ############################################################################# - # INFRASTRUCTURE - ############################################################################# - - kafka: - image: apache/kafka:3.7.0 - container_name: asap-dropin-kafka - hostname: kafka - networks: - - asap-network - environment: - KAFKA_NODE_ID: 1 - KAFKA_PROCESS_ROLES: broker,controller - KAFKA_LISTENERS: PLAINTEXT://0.0.0.0:9092,CONTROLLER://0.0.0.0:9093 - KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092 - KAFKA_CONTROLLER_QUORUM_VOTERS: 1@kafka:9093 - KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 - KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1 - KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1 - KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0 - KAFKA_MESSAGE_MAX_BYTES: 20971520 - KAFKA_REPLICA_FETCH_MAX_BYTES: 20971520 - KAFKA_LOG_RETENTION_HOURS: 1 - KAFKA_LOG_DIRS: /tmp/kraft-combined-logs - CLUSTER_ID: MkU3OEVBNTcwNTJENDM2Qk - volumes: - - kafka-data:/tmp/kraft-combined-logs - user: "0:0" - entrypoint: /bin/bash - command: - - -c - - | - chown -R appuser:appuser /tmp/kraft-combined-logs - chmod -R 755 /tmp/kraft-combined-logs - exec su appuser -c "/etc/kafka/docker/run" - healthcheck: - test: ["CMD-SHELL", "/opt/kafka/bin/kafka-broker-api-versions.sh --bootstrap-server localhost:9092 || exit 1"] - interval: 10s - timeout: 5s - retries: 5 - start_period: 30s - restart: unless-stopped - - kafka-init: - image: apache/kafka:3.7.0 - container_name: asap-dropin-kafka-init - networks: - - asap-network - depends_on: - kafka: - condition: service_healthy - entrypoint: /bin/bash - command: - - -c - - | - echo "Creating Kafka topics..." - /opt/kafka/bin/kafka-topics.sh --bootstrap-server kafka:9092 \ - --create --if-not-exists --topic flink_input \ - --partitions 1 --replication-factor 1 \ - --config max.message.bytes=20971520 - - /opt/kafka/bin/kafka-topics.sh --bootstrap-server kafka:9092 \ - --create --if-not-exists --topic flink_output \ - --partitions 1 --replication-factor 1 \ - --config max.message.bytes=20971520 - - echo "Kafka topics created successfully" - restart: "no" - - arroyo: - image: ghcr.io/projectasap/asap-arroyo:v0.1.0 - container_name: asap-dropin-arroyo - hostname: arroyo - networks: - - asap-network - ports: - - "${REMOTE_WRITE_PORT:-9091}:9091" - volumes: - - ./config/arroyo-config.yaml:/config.yaml:ro - command: ["--config", "/config.yaml", "cluster"] - environment: - - ARROYO__API__RUN_HTTP_PORT=5115 - - KAFKA_BOOTSTRAP_SERVERS=kafka:9092 - depends_on: - kafka: - condition: service_healthy - healthcheck: - test: ["CMD-SHELL", "curl -f http://localhost:5115/api/v1/pipelines || exit 1"] - interval: 10s - timeout: 5s - retries: 5 - restart: unless-stopped - ############################################################################# # INIT CONTAINERS ############################################################################# asap-planner-rs: - image: ghcr.io/projectasap/asap-planner-rs:v0.2.0 + image: ghcr.io/projectasap/asap-planner-rs:v0.3.0 container_name: asap-dropin-planner hostname: asap-planner-rs networks: @@ -135,47 +39,19 @@ services: - asap-planner-output:/asap-planner-output restart: "no" - asap-summary-ingest: - image: ghcr.io/projectasap/asap-summary-ingest:v0.2.0 - container_name: asap-dropin-summary-ingest - hostname: asap-summary-ingest - networks: - - asap-network - command: - - "--config_file_path=/asap-planner-output/streaming_config.yaml" - - "--source_type=prometheus_remote_write" - - "--prometheus_base_port=9091" - - "--prometheus_path=/receive" - - "--prometheus_bind_ip=0.0.0.0" - - "--parallelism=1" - - "--output_kafka_topic=flink_output" - - "--output_format=json" - - "--pipeline_name=asap-dropin" - - "--output_dir=/asap-summary-ingest-output" - - "--arroyo_url=http://arroyo:5115/api/v1" - - "--bootstrap_servers=kafka:9092" - volumes: - - asap-planner-output:/asap-planner-output:ro - - ./output/asap-summary-ingest:/asap-summary-ingest-output - depends_on: - asap-planner-rs: - condition: service_completed_successfully - arroyo: - condition: service_healthy - restart: "no" - ############################################################################# # CORE SERVICE ############################################################################# queryengine: - image: ghcr.io/projectasap/asap-query-engine:v0.2.0 + image: ghcr.io/projectasap/asap-query-engine:v0.3.0 container_name: asap-dropin-queryengine hostname: queryengine networks: - asap-network ports: - "${QUERY_ENGINE_PORT:-8088}:8088" + - "${REMOTE_WRITE_PORT:-9091}:9091" environment: - RUST_LOG=INFO - RUST_BACKTRACE=1 @@ -183,26 +59,25 @@ services: - asap-planner-output:/asap-planner-output:ro - ./output/queryengine:/app/outputs command: - - "--kafka-topic=flink_output" - - "--kafka-broker=kafka:9092" - - "--input-format=json" - "--config=/asap-planner-output/inference_config.yaml" - "--streaming-config=/asap-planner-output/streaming_config.yaml" - "--prometheus-server=${PROMETHEUS_URL:-http://host.docker.internal:9090}" - "--prometheus-scrape-interval=${PROMETHEUS_SCRAPE_INTERVAL:-15}" - - "--streaming-engine=arroyo" + - "--streaming-engine=precompute" + - "--prometheus-remote-write-port=9091" - "--delete-existing-db" - "--log-level=INFO" - "--output-dir=/app/outputs" - "--query-language=PROMQL" - "--lock-strategy=per-key" - - "--decompress-json" - "--forward-unsupported-queries" + healthcheck: + test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/localhost/8088' 2>/dev/null || exit 1"] + interval: 10s + timeout: 5s + retries: 10 + start_period: 15s depends_on: - asap-summary-ingest: - condition: service_completed_successfully - kafka: - condition: service_healthy - kafka-init: + asap-planner-rs: condition: service_completed_successfully restart: unless-stopped From 79e5f4c19125941b79dd3825a451ad77b575c864 Mon Sep 17 00:00:00 2001 From: Milind Srivastava Date: Thu, 16 Apr 2026 21:20:59 -0400 Subject: [PATCH 4/6] Made config options optional, updated drop-in docker compose --- asap-dropin/docker-compose.yml | 42 +++++----------------------------- asap-planner-rs/src/lib.rs | 4 +++- asap-query-engine/src/main.rs | 40 ++++++++++++++++++++------------ 3 files changed, 35 insertions(+), 51 deletions(-) diff --git a/asap-dropin/docker-compose.yml b/asap-dropin/docker-compose.yml index 5404da48..8ce572b1 100644 --- a/asap-dropin/docker-compose.yml +++ b/asap-dropin/docker-compose.yml @@ -7,42 +7,16 @@ name: asapquery-dropin # 2. docker compose up -d # 3. Add remote_write to your prometheus.yml -> http://localhost:${REMOTE_WRITE_PORT}/receive # 4. Point your Grafana datasource URL -> http://localhost:${QUERY_ENGINE_PORT} +# +# The query engine starts with an empty plan and forwards all queries to Prometheus. +# After the observation window (default 10 min), it automatically generates a plan +# based on real query patterns and begins precomputing sketches. networks: asap-network: driver: bridge -volumes: - asap-planner-output: - services: - ############################################################################# - # INIT CONTAINERS - ############################################################################# - - asap-planner-rs: - image: ghcr.io/projectasap/asap-planner-rs:v0.3.0 - container_name: asap-dropin-planner - hostname: asap-planner-rs - networks: - - asap-network - command: - - "--output_dir=/asap-planner-output" - - "--prometheus_scrape_interval=${PROMETHEUS_SCRAPE_INTERVAL:-15}" - - "--streaming_engine=arroyo" - - "--range-duration=300" - - "--step=10" - - "--prometheus-url=${PROMETHEUS_URL:-http://host.docker.internal:9090}" - - "--query-language=promql" - - "-v" - volumes: - - asap-planner-output:/asap-planner-output - restart: "no" - - ############################################################################# - # CORE SERVICE - ############################################################################# - queryengine: image: ghcr.io/projectasap/asap-query-engine:v0.3.0 container_name: asap-dropin-queryengine @@ -56,11 +30,8 @@ services: - RUST_LOG=INFO - RUST_BACKTRACE=1 volumes: - - asap-planner-output:/asap-planner-output:ro - ./output/queryengine:/app/outputs command: - - "--config=/asap-planner-output/inference_config.yaml" - - "--streaming-config=/asap-planner-output/streaming_config.yaml" - "--prometheus-server=${PROMETHEUS_URL:-http://host.docker.internal:9090}" - "--prometheus-scrape-interval=${PROMETHEUS_SCRAPE_INTERVAL:-15}" - "--streaming-engine=precompute" @@ -71,13 +42,12 @@ services: - "--query-language=PROMQL" - "--lock-strategy=per-key" - "--forward-unsupported-queries" + - "--enable-query-tracker" + - "--tracker-observation-window-secs=600" healthcheck: test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/localhost/8088' 2>/dev/null || exit 1"] interval: 10s timeout: 5s retries: 10 start_period: 15s - depends_on: - asap-planner-rs: - condition: service_completed_successfully restart: unless-stopped diff --git a/asap-planner-rs/src/lib.rs b/asap-planner-rs/src/lib.rs index c36f2dfb..cea747c3 100644 --- a/asap-planner-rs/src/lib.rs +++ b/asap-planner-rs/src/lib.rs @@ -416,9 +416,11 @@ impl Controller { }], sketch_parameters: None, aggregate_cleanup: Some(config::input::AggregateCleanupConfig { - policy: Some("read_based".to_string()), + policy: Some(asap_types::enums::CleanupPolicy::ReadBased), }), metrics: None, + existing_streaming_config: None, + existing_inference_config: None, }; Ok(Self { diff --git a/asap-query-engine/src/main.rs b/asap-query-engine/src/main.rs index 78385c63..92aade49 100644 --- a/asap-query-engine/src/main.rs +++ b/asap-query-engine/src/main.rs @@ -7,11 +7,15 @@ use tracing::{error, info, warn}; use sketch_core::config::{self, ImplMode}; -use query_engine_rust::data_model::enums::{InputFormat, LockStrategy, StreamingEngine}; +use asap_types::streaming_config::StreamingConfig; +use query_engine_rust::data_model::enums::{ + CleanupPolicy, InputFormat, LockStrategy, StreamingEngine, +}; use query_engine_rust::drivers::AdapterConfig; use query_engine_rust::precompute_engine::config::LateDataPolicy; use query_engine_rust::precompute_engine::PrecomputeWorkerDiagnostics; use query_engine_rust::utils::file_io::{read_inference_config, read_streaming_config}; +use query_engine_rust::InferenceConfig; use query_engine_rust::{ HttpServer, HttpServerConfig, KafkaConsumer, KafkaConsumerConfig, OtlpReceiver, OtlpReceiverConfig, PrecomputeEngine, PrecomputeEngineConfig, PrecomputeEngineHandle, Result, @@ -29,13 +33,13 @@ struct Args { #[arg(long, value_enum)] input_format: Option, - /// Configuration file path + /// Inference config file path (optional; starts with empty config when omitted, requires --enable-query-tracker) #[arg(long)] - config: String, + config: Option, - /// File path for streaming_config + /// Streaming config file path (optional; starts with empty config when omitted, requires --enable-query-tracker) #[arg(long)] - streaming_config: String, + streaming_config: Option, /// Streaming engine to use #[arg(long, value_enum, default_value = "arroyo")] @@ -182,26 +186,34 @@ async fn main() -> Result<()> { let _log_guard = setup_logging(&args.output_dir, &args.log_level)?; info!("Starting Query Engine Rust"); - info!("Config file: {}", args.config); info!("Output directory: {}", args.output_dir); - // Read config (equivalent to utils.file_io.read_inference_config) - let inference_config = read_inference_config(&args.config, args.query_language)?; + let inference_config = match &args.config { + Some(path) => { + info!("Config file: {}", path); + read_inference_config(path, args.query_language)? + } + None => { + info!("No config file provided; starting with empty inference config"); + InferenceConfig::new(args.query_language, CleanupPolicy::NoCleanup) + } + }; info!( "Loaded inference config with {} query configs", inference_config.query_configs.len() ); - info!("Inference config: {:?}", inference_config); - let streaming_config = Arc::new(read_streaming_config( - &args.streaming_config, - &inference_config, - )?); + let streaming_config = Arc::new(match &args.streaming_config { + Some(path) => read_streaming_config(path, &inference_config)?, + None => { + info!("No streaming config file provided; starting with empty streaming config"); + StreamingConfig::default() + } + }); info!( "Loaded streaming config with {} entries", streaming_config.get_all_aggregation_configs().len() ); - info!("Streaming config: {:?}", streaming_config); // Shared config refs — passed to QueryTracker so it can populate ControllerConfig // with the current configs as context for the planner. The applier task updates From c1f0fe71f1b8715a8ae78a477a5e7ce05adbbc5e Mon Sep 17 00:00:00 2001 From: Milind Srivastava Date: Sat, 18 Apr 2026 14:58:10 -0400 Subject: [PATCH 5/6] Removed unnecessary components from docker compose --- asap-dropin/docker-compose.dev.yml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/asap-dropin/docker-compose.dev.yml b/asap-dropin/docker-compose.dev.yml index 03076a53..2a997840 100644 --- a/asap-dropin/docker-compose.dev.yml +++ b/asap-dropin/docker-compose.dev.yml @@ -7,15 +7,6 @@ name: asapquery-dropin # docker compose -f docker-compose.yml -f docker-compose.dev.yml up services: - asap-planner-rs: - build: - context: .. - dockerfile: asap-planner-rs/Dockerfile - - asap-summary-ingest: - build: - context: ../asap-summary-ingest - queryengine: build: context: .. From 6b3451f16b59780217e0e4ed2380de2fc95e4cf0 Mon Sep 17 00:00:00 2001 From: Milind Srivastava Date: Sat, 18 Apr 2026 15:18:34 -0400 Subject: [PATCH 6/6] Updated README, removed unused code from asap-planner --- asap-dropin/README.md | 33 +++++-------- asap-planner-rs/src/lib.rs | 54 -------------------- asap-planner-rs/src/main.rs | 5 +- asap-planner-rs/src/prometheus_client.rs | 63 ------------------------ 4 files changed, 13 insertions(+), 142 deletions(-) diff --git a/asap-dropin/README.md b/asap-dropin/README.md index d2a17f9d..bb16ec99 100644 --- a/asap-dropin/README.md +++ b/asap-dropin/README.md @@ -1,8 +1,8 @@ # ASAPQuery Drop-in for Existing Prometheus + Grafana Stacks -A self-contained Docker Compose that adds ASAPQuery to an existing Prometheus and Grafana deployment. No additional services need to be installed or managed beyond what is in this compose file. +A self-contained single-container Docker Compose that adds ASAPQuery to an existing Prometheus and Grafana deployment. -ASAPQuery auto-discovers all metrics from your Prometheus and generates quantile sketches for them — no query configuration needed. +On startup, all queries are forwarded transparently to your upstream Prometheus. After one observation window (default 10 min), the engine automatically plans and activates sketch-based acceleration based on the real queries it observed from Grafana. ## Prerequisites @@ -12,9 +12,9 @@ ASAPQuery auto-discovers all metrics from your Prometheus and generates quantile ## Quick Start -### 1. Set environment variables +### 1. Configure environment -Copy and edit the `.env` file: +Edit `.env`: | Variable | Default | Description | |---|---|---| @@ -59,27 +59,16 @@ ASAPQuery speaks the Prometheus query API. Queries it can accelerate are answere ## Architecture ``` -Your Prometheus ──remote_write──▸ Arroyo (:9091/receive) - │ - ▼ - Kafka - │ - ▼ +Your Prometheus ──remote_write──▸ ASAPQuery (:9091/receive) + │ + ▼ Your Grafana ◂──query──── ASAPQuery Query Engine (:8088) - │ - ▼ (fallback) - Your Prometheus + │ + ▼ (fallback / passthrough) + Your Prometheus ``` -## Future: single-container mode - -This compose currently runs six containers (Kafka, Arroyo, planner, summary-ingest, query engine, plus kafka-init). Once issues #242, #243, and #244 are completed and the precompute_engine cutover is done, the query engine will be able to: - -- Receive Prometheus remote_write directly (precompute engine, `--enable-prometheus-remote-write`) -- Auto-discover metrics and run the planner in-process (already embedded via #271/#272) -- Hot-reload sketch and ingest configs at runtime (#242/#243/#244) - -At that point this compose should be collapsed to a single `queryengine` container — Kafka, Arroyo, the planner init container, and asap-summary-ingest all become unnecessary. +The query engine embeds the planner and runs it automatically after observing real Grafana queries for one observation window. No separate planner container, no Kafka, no Arroyo. ## Development diff --git a/asap-planner-rs/src/lib.rs b/asap-planner-rs/src/lib.rs index cea747c3..5aa3196b 100644 --- a/asap-planner-rs/src/lib.rs +++ b/asap-planner-rs/src/lib.rs @@ -376,60 +376,6 @@ impl Controller { }) } - /// Build a `Controller` by auto-discovering all metrics from Prometheus and generating - /// default quantile queries for each metric. - /// - /// This is the zero-config path: no config file or query log is needed. - pub fn from_prometheus( - prometheus_url: &str, - opts: RuntimeOptions, - ) -> Result { - let metric_names = prometheus_client::fetch_all_metric_names(prometheus_url)?; - debug!( - "Discovered {} metric(s) from Prometheus", - metric_names.len() - ); - - // Generate default quantile queries for each metric. - let queries: Vec = metric_names - .iter() - .flat_map(|m| { - [0.50, 0.90, 0.99] - .iter() - .map(move |q| format!("quantile({}, {})", q, m)) - }) - .collect(); - - let schema = prometheus_client::build_schema_from_prometheus(prometheus_url, &queries)?; - - let config = ControllerConfig { - query_groups: vec![config::input::QueryGroup { - id: Some(1), - queries, - repetition_delay: 10, - controller_options: config::input::ControllerOptions { - accuracy_sla: 0.99, - latency_sla: 1.0, - }, - step: None, - range_duration: None, - }], - sketch_parameters: None, - aggregate_cleanup: Some(config::input::AggregateCleanupConfig { - policy: Some(asap_types::enums::CleanupPolicy::ReadBased), - }), - metrics: None, - existing_streaming_config: None, - existing_inference_config: None, - }; - - Ok(Self { - config, - schema, - options: opts, - }) - } - /// Build a `Controller` from a Prometheus query log file, fetching metric labels from /// Prometheus. /// diff --git a/asap-planner-rs/src/main.rs b/asap-planner-rs/src/main.rs index d6408b6e..863f4d4e 100644 --- a/asap-planner-rs/src/main.rs +++ b/asap-planner-rs/src/main.rs @@ -98,15 +98,14 @@ fn main() -> anyhow::Result<()> { (None, Some(log_path), Some(url)) => { Controller::from_query_log(&log_path, opts, &url)? } - (None, None, Some(url)) => Controller::from_prometheus(&url, opts)?, (None, Some(_log_path), None) => { anyhow::bail!( "--prometheus-url is required when using --query-log \ (query logs have no metrics hint to fall back on)" ) } - (None, None, None) => { - anyhow::bail!("provide one of --input_config, --query-log, or --prometheus-url") + (None, None, _) => { + anyhow::bail!("provide one of --input_config or --query-log") } _ => unreachable!("clap conflicts_with prevents this combination"), }; diff --git a/asap-planner-rs/src/prometheus_client.rs b/asap-planner-rs/src/prometheus_client.rs index 963ff36f..76958fd8 100644 --- a/asap-planner-rs/src/prometheus_client.rs +++ b/asap-planner-rs/src/prometheus_client.rs @@ -158,69 +158,6 @@ fn fetch_labels_for_metric( ))) } -/// Query Prometheus `GET /api/v1/label/__name__/values` and return all metric names. -pub fn fetch_all_metric_names(prometheus_url: &str) -> Result, ControllerError> { - let url = format!( - "{}/api/v1/label/__name__/values", - prometheus_url.trim_end_matches('/') - ); - let client = reqwest::blocking::Client::new(); - - for attempt in 1..=MAX_RETRIES { - let response = client.get(&url).send().map_err(|e| { - ControllerError::PrometheusClient(format!( - "HTTP request failed for metric names: {}", - e - )) - })?; - - let status = response.status(); - - if status == reqwest::StatusCode::SERVICE_UNAVAILABLE { - warn!( - "Prometheus returned 503 for metric names (attempt {}/{}); retrying in {}s", - attempt, - MAX_RETRIES, - RETRY_DELAY.as_secs(), - ); - thread::sleep(RETRY_DELAY); - continue; - } - - if !status.is_success() { - return Err(ControllerError::PrometheusClient(format!( - "Prometheus returned HTTP {} for metric names", - status - ))); - } - - let body: serde_json::Value = response.json().map_err(|e| { - ControllerError::PrometheusClient(format!( - "Failed to parse Prometheus response for metric names: {}", - e - )) - })?; - - let data = match body.get("data").and_then(|d| d.as_array()) { - Some(arr) => arr, - None => { - warn!("Prometheus returned no 'data' array for metric names"); - return Ok(Vec::new()); - } - }; - - return Ok(data - .iter() - .filter_map(|v| v.as_str().map(String::from)) - .collect()); - } - - Err(ControllerError::PrometheusClient(format!( - "Prometheus returned 503 for metric names after {} attempts; giving up", - MAX_RETRIES - ))) -} - /// Build a `PromQLSchema` by querying Prometheus for each metric name found in the given /// PromQL queries. Metrics with no series in Prometheus are skipped with a warning. pub fn build_schema_from_prometheus(