Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions infra/k8s/base/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,7 @@ resources:
- api-ingress-tls.yaml
- api-service.yaml
- api-servicemonitor.yaml
- worker-metrics-service.yaml
- worker-servicemonitor.yaml
- migrate-job.yaml
- worker-deployment.yaml
3 changes: 3 additions & 0 deletions infra/k8s/base/worker-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ spec:
- name: worker
image: ghcr.io/healthcare-monitoring-system/d2-data-intelligence:sha-a280ecc715a943d47c8b3ed635dc61e7f9f69704
imagePullPolicy: Always
ports:
- name: metrics
containerPort: 8001
command:
- python
- -m
Expand Down
15 changes: 15 additions & 0 deletions infra/k8s/base/worker-metrics-service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Headless Prometheus scrape target for worker /metrics (ML + Kafka pipeline counters).
apiVersion: v1
kind: Service
metadata:
name: d2-worker-metrics
labels:
app: d2-worker
d2-metrics: worker
spec:
selector:
app: d2-worker
ports:
- name: metrics
port: 8001
targetPort: metrics
16 changes: 16 additions & 0 deletions infra/k8s/base/worker-servicemonitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Prometheus Operator: scrape d2-worker /metrics when release label matches the kube-stack.
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: d2-worker
labels:
release: monitoring
spec:
selector:
matchLabels:
app: d2-worker
d2-metrics: worker
endpoints:
- port: metrics
path: /metrics
interval: 30s
5 changes: 1 addition & 4 deletions src/d2/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def influx_configured(self) -> bool:
# Observability
worker_metrics_enabled: bool = True
worker_metrics_host: str = "0.0.0.0"
worker_metrics_port: int = 9102
worker_metrics_port: int = 8001

# Optional Keycloak checks (D4 will supply values)
keycloak_well_known_url: str | None = None
Expand All @@ -104,9 +104,6 @@ def ensure_asyncpg_driver_in_database_url(self) -> Self:
self.database_url = "postgresql+asyncpg://" + raw.removeprefix("postgres://")
return self

# Observability — port the worker exposes Prometheus metrics on. Set to 0 to disable.
worker_metrics_port: int = 8001


@lru_cache
def get_settings() -> Settings:
Expand Down
Loading