diff --git a/infra/k8s/base/kustomization.yaml b/infra/k8s/base/kustomization.yaml index 6fea0ec..cffcc1d 100644 --- a/infra/k8s/base/kustomization.yaml +++ b/infra/k8s/base/kustomization.yaml @@ -13,5 +13,7 @@ resources: - api-ingress-tls.yaml - api-service.yaml - api-servicemonitor.yaml + - worker-metrics-service.yaml + - worker-servicemonitor.yaml - migrate-job.yaml - worker-deployment.yaml diff --git a/infra/k8s/base/worker-deployment.yaml b/infra/k8s/base/worker-deployment.yaml index 3177687..4214091 100644 --- a/infra/k8s/base/worker-deployment.yaml +++ b/infra/k8s/base/worker-deployment.yaml @@ -30,6 +30,9 @@ spec: - name: worker image: ghcr.io/healthcare-monitoring-system/d2-data-intelligence:sha-a280ecc715a943d47c8b3ed635dc61e7f9f69704 imagePullPolicy: Always + ports: + - name: metrics + containerPort: 8001 command: - python - -m diff --git a/infra/k8s/base/worker-metrics-service.yaml b/infra/k8s/base/worker-metrics-service.yaml new file mode 100644 index 0000000..78e51ba --- /dev/null +++ b/infra/k8s/base/worker-metrics-service.yaml @@ -0,0 +1,15 @@ +# Headless Prometheus scrape target for worker /metrics (ML + Kafka pipeline counters). +apiVersion: v1 +kind: Service +metadata: + name: d2-worker-metrics + labels: + app: d2-worker + d2-metrics: worker +spec: + selector: + app: d2-worker + ports: + - name: metrics + port: 8001 + targetPort: metrics diff --git a/infra/k8s/base/worker-servicemonitor.yaml b/infra/k8s/base/worker-servicemonitor.yaml new file mode 100644 index 0000000..48371dd --- /dev/null +++ b/infra/k8s/base/worker-servicemonitor.yaml @@ -0,0 +1,16 @@ +# Prometheus Operator: scrape d2-worker /metrics when release label matches the kube-stack. +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: d2-worker + labels: + release: monitoring +spec: + selector: + matchLabels: + app: d2-worker + d2-metrics: worker + endpoints: + - port: metrics + path: /metrics + interval: 30s diff --git a/src/d2/config.py b/src/d2/config.py index 2b84e84..4fcd05d 100644 --- a/src/d2/config.py +++ b/src/d2/config.py @@ -85,7 +85,7 @@ def influx_configured(self) -> bool: # Observability worker_metrics_enabled: bool = True worker_metrics_host: str = "0.0.0.0" - worker_metrics_port: int = 9102 + worker_metrics_port: int = 8001 # Optional Keycloak checks (D4 will supply values) keycloak_well_known_url: str | None = None @@ -104,9 +104,6 @@ def ensure_asyncpg_driver_in_database_url(self) -> Self: self.database_url = "postgresql+asyncpg://" + raw.removeprefix("postgres://") return self - # Observability — port the worker exposes Prometheus metrics on. Set to 0 to disable. - worker_metrics_port: int = 8001 - @lru_cache def get_settings() -> Settings: