From 7a35d3bbf9405e38fc53a8d0bd269aa61e78f8f0 Mon Sep 17 00:00:00 2001 From: Maneesha Herath <145188213+Maneeshaherath@users.noreply.github.com> Date: Sat, 9 May 2026 17:57:22 +0530 Subject: [PATCH] fix(worker): remove duplicate metrics bind; add Prometheus scrape for worker --- infra/k8s/base/kustomization.yaml | 2 ++ infra/k8s/base/worker-deployment.yaml | 3 +++ infra/k8s/base/worker-metrics-service.yaml | 15 +++++++++++++++ infra/k8s/base/worker-servicemonitor.yaml | 16 ++++++++++++++++ src/d2/config.py | 5 +---- 5 files changed, 37 insertions(+), 4 deletions(-) create mode 100644 infra/k8s/base/worker-metrics-service.yaml create mode 100644 infra/k8s/base/worker-servicemonitor.yaml diff --git a/infra/k8s/base/kustomization.yaml b/infra/k8s/base/kustomization.yaml index 6fea0ec..cffcc1d 100644 --- a/infra/k8s/base/kustomization.yaml +++ b/infra/k8s/base/kustomization.yaml @@ -13,5 +13,7 @@ resources: - api-ingress-tls.yaml - api-service.yaml - api-servicemonitor.yaml + - worker-metrics-service.yaml + - worker-servicemonitor.yaml - migrate-job.yaml - worker-deployment.yaml diff --git a/infra/k8s/base/worker-deployment.yaml b/infra/k8s/base/worker-deployment.yaml index 3177687..4214091 100644 --- a/infra/k8s/base/worker-deployment.yaml +++ b/infra/k8s/base/worker-deployment.yaml @@ -30,6 +30,9 @@ spec: - name: worker image: ghcr.io/healthcare-monitoring-system/d2-data-intelligence:sha-a280ecc715a943d47c8b3ed635dc61e7f9f69704 imagePullPolicy: Always + ports: + - name: metrics + containerPort: 8001 command: - python - -m diff --git a/infra/k8s/base/worker-metrics-service.yaml b/infra/k8s/base/worker-metrics-service.yaml new file mode 100644 index 0000000..78e51ba --- /dev/null +++ b/infra/k8s/base/worker-metrics-service.yaml @@ -0,0 +1,15 @@ +# Headless Prometheus scrape target for worker /metrics (ML + Kafka pipeline counters). +apiVersion: v1 +kind: Service +metadata: + name: d2-worker-metrics + labels: + app: d2-worker + d2-metrics: worker +spec: + selector: + app: d2-worker + ports: + - name: metrics + port: 8001 + targetPort: metrics diff --git a/infra/k8s/base/worker-servicemonitor.yaml b/infra/k8s/base/worker-servicemonitor.yaml new file mode 100644 index 0000000..48371dd --- /dev/null +++ b/infra/k8s/base/worker-servicemonitor.yaml @@ -0,0 +1,16 @@ +# Prometheus Operator: scrape d2-worker /metrics when release label matches the kube-stack. +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: d2-worker + labels: + release: monitoring +spec: + selector: + matchLabels: + app: d2-worker + d2-metrics: worker + endpoints: + - port: metrics + path: /metrics + interval: 30s diff --git a/src/d2/config.py b/src/d2/config.py index 2b84e84..4fcd05d 100644 --- a/src/d2/config.py +++ b/src/d2/config.py @@ -85,7 +85,7 @@ def influx_configured(self) -> bool: # Observability worker_metrics_enabled: bool = True worker_metrics_host: str = "0.0.0.0" - worker_metrics_port: int = 9102 + worker_metrics_port: int = 8001 # Optional Keycloak checks (D4 will supply values) keycloak_well_known_url: str | None = None @@ -104,9 +104,6 @@ def ensure_asyncpg_driver_in_database_url(self) -> Self: self.database_url = "postgresql+asyncpg://" + raw.removeprefix("postgres://") return self - # Observability — port the worker exposes Prometheus metrics on. Set to 0 to disable. - worker_metrics_port: int = 8001 - @lru_cache def get_settings() -> Settings: