diff --git a/charts/kubex-automation-engine/docs/Configuration-Reference.md b/charts/kubex-automation-engine/docs/Configuration-Reference.md index 26b8479..95c355b 100644 --- a/charts/kubex-automation-engine/docs/Configuration-Reference.md +++ b/charts/kubex-automation-engine/docs/Configuration-Reference.md @@ -19,6 +19,7 @@ This document maps the current Helm chart values to the resources created by the | --- | --- | | `Deployment` | Runs the controller manager and gateway sidecar | | `Service` | Exposes metrics and webhook endpoints | +| `ServiceMonitor` | Optional Prometheus scrape target for the metrics Service | | `MutatingWebhookConfiguration` | Registers pod mutation webhook | | `ValidatingWebhookConfiguration` | Registers validation webhook | | `Secret` | Stores gateway credentials and TLS material when `createSecrets=true` | @@ -103,10 +104,16 @@ Note: `kubexCredentials.userSecretName` is currently not consumed by this chart. | `webhook.certManager.enabled` | `false` | Use cert-manager instead of self-signed TLS | | `selfSignedCert.validity` | `3650` | Self-signed certificate validity in days | | `controllerManager.globalConfigReconcileInterval` | `1m` | Base reconcile cadence for global config controller | +| `controllerManager.metricsBindAddress` | `:8080` | Metrics bind address used by the controller manager | | `kubex.requestTimeout` | `30s` | Kubex API request timeout | | `podSecurityContext` | chart default | Pod-level security context for the controller Deployment; defaults to `65534` for `runAsUser`, `runAsGroup`, and `fsGroup`, plus `runAsNonRoot=true` and `seccompProfile.type=RuntimeDefault` | | `openshift.enabled` | `false` | Enable OpenShift-oriented pod security context defaults and cleanup job settings without changing the default Kubernetes installation path | | `openshift.fsGroup` | `null` | Optional `fsGroup` applied when `openshift.enabled=true` unless already set in `podSecurityContext` | +| `metrics.serviceMonitor.enabled` | `false` | Create a Prometheus Operator `ServiceMonitor` for the metrics Service | +| `metrics.port` | `8080` | Service port for the metrics Service | +| `metrics.serviceMonitor.namespaceSelector` | `[]` | Namespace names to watch from the ServiceMonitor; empty means all namespaces | +| `metrics.serviceMonitor.scheme` | `http` | Scrape scheme for the controller metrics endpoint; HTTP pairs with `--metrics-secure=false` in the manager args | +| `metrics.serviceMonitor.scrapeTimeout` | `""` | Optional scrape timeout; must be less than or equal to `metrics.serviceMonitor.interval` when set | | `gateway.securityContext` | chart default | Gateway sidecar container security context | | `cleanup.podSecurityContext` | `{}` | Optional pod security context for the pre-delete cleanup job | | `cleanup.securityContext` | chart default | Container security context for the pre-delete cleanup job | diff --git a/charts/kubex-automation-engine/templates/_helpers.tpl b/charts/kubex-automation-engine/templates/_helpers.tpl index bb4d792..ec25c2c 100644 --- a/charts/kubex-automation-engine/templates/_helpers.tpl +++ b/charts/kubex-automation-engine/templates/_helpers.tpl @@ -119,12 +119,22 @@ Controller manager container args - --health-probe-bind-address={{ .Values.controllerManager.healthProbeBindAddress }} {{- if .Values.metrics.enabled }} - --metrics-bind-address={{ .Values.controllerManager.metricsBindAddress }} +{{- include "kubex-automation-engine.metricsSecureArg" . }} {{- end }} {{- range .Values.controllerManager.extraArgs }} - {{ . }} {{- end }} {{- end }} +{{/* +Controller metrics security flag. +*/}} +{{- define "kubex-automation-engine.metricsSecureArg" -}} +{{- if and .Values.metrics.enabled (eq .Values.metrics.serviceMonitor.scheme "http") }} +- --metrics-secure=false +{{- end }} +{{- end }} + {{/* Generate or retrieve self-signed certificates for webhook Returns a dict with ca, cert, and key diff --git a/charts/kubex-automation-engine/templates/metrics-service.yaml b/charts/kubex-automation-engine/templates/metrics-service.yaml index 2e11d6a..4f4ca6e 100644 --- a/charts/kubex-automation-engine/templates/metrics-service.yaml +++ b/charts/kubex-automation-engine/templates/metrics-service.yaml @@ -8,7 +8,7 @@ metadata: {{- include "kubex-automation-engine.labels" . | nindent 4 }} {{- with .Values.metrics.annotations }} annotations: - {{- toYaml . | nindent 4 }} + {{- tpl (toYaml .) $ | nindent 4 }} {{- end }} spec: type: {{ .Values.metrics.type }} diff --git a/charts/kubex-automation-engine/templates/metrics-servicemonitor.yaml b/charts/kubex-automation-engine/templates/metrics-servicemonitor.yaml new file mode 100644 index 0000000..a7c252a --- /dev/null +++ b/charts/kubex-automation-engine/templates/metrics-servicemonitor.yaml @@ -0,0 +1,38 @@ +{{- if and .Values.metrics.enabled .Values.metrics.serviceMonitor.enabled -}} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "kubex-automation-engine.fullname" . }}-metrics + namespace: {{ include "kubex-automation-engine.namespace" . }} + labels: + {{- include "kubex-automation-engine.labels" . | nindent 4 }} + {{- with .Values.metrics.serviceMonitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with .Values.metrics.serviceMonitor.namespaceSelector }} + namespaceSelector: + matchNames: + {{- toYaml . | nindent 6 }} + {{- else }} + namespaceSelector: + any: true + {{- end }} + selector: + matchLabels: + {{- include "kubex-automation-engine.selectorLabels" . | nindent 6 }} + endpoints: + - port: metrics + path: {{ .Values.metrics.serviceMonitor.path }} + scheme: {{ .Values.metrics.serviceMonitor.scheme }} + # tlsConfig: + # caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + # insecureSkipVerify: true + # bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + {{- with .Values.metrics.serviceMonitor.interval }} + interval: {{ . }} + {{- end }} + {{- with .Values.metrics.serviceMonitor.scrapeTimeout }} + scrapeTimeout: {{ . }} + {{- end }} +{{- end }} diff --git a/charts/kubex-automation-engine/values.schema.json b/charts/kubex-automation-engine/values.schema.json index 1d28c06..c1b9086 100644 --- a/charts/kubex-automation-engine/values.schema.json +++ b/charts/kubex-automation-engine/values.schema.json @@ -104,6 +104,53 @@ } } }, + "metrics": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "type": { + "type": "string" + }, + "port": { + "type": "integer" + }, + "annotations": { + "type": "object" + }, + "serviceMonitor": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "additionalLabels": { + "type": "object" + }, + "namespaceSelector": { + "type": "array", + "items": { + "type": "string" + } + }, + "scheme": { + "type": "string", + "enum": ["http", "https"] + }, + "path": { + "type": "string" + }, + "interval": { + "type": "string" + }, + "scrapeTimeout": { + "type": "string" + } + } + } + } + }, "openshift": { "type": "object", "properties": { diff --git a/charts/kubex-automation-engine/values.yaml b/charts/kubex-automation-engine/values.yaml index 58b0472..2838495 100644 --- a/charts/kubex-automation-engine/values.yaml +++ b/charts/kubex-automation-engine/values.yaml @@ -126,7 +126,7 @@ controllerManager: healthProbeBindAddress: ":8081" # -- Metrics bind address - metricsBindAddress: ":8443" + metricsBindAddress: ":8080" # -- Additional arguments for the controller manager extraArgs: @@ -180,9 +180,30 @@ metrics: # -- Service type for metrics type: ClusterIP # -- Service port for metrics - port: 8443 + port: 8080 # -- Service annotations - annotations: {} + annotations: + # Controller metrics are served over plain HTTP without authentication in this chart; the manager args set --metrics-secure=false when this scheme is used. + prometheus.io/scrape: "true" + prometheus.io/port: '{{ .Values.metrics.port }}' + prometheus.io/path: '{{ .Values.metrics.serviceMonitor.path }}' + prometheus.io/scheme: '{{ .Values.metrics.serviceMonitor.scheme }}' + # -- Optional ServiceMonitor configuration for Prometheus Operator + serviceMonitor: + # -- Create a ServiceMonitor for the metrics Service + enabled: false + # -- Additional labels applied to the ServiceMonitor + additionalLabels: {} + # -- Namespace names to discover for the ServiceMonitor. Leave empty to watch all namespaces. + namespaceSelector: [] + # -- Scrape scheme used by Prometheus + scheme: http + # -- Metrics endpoint path + path: /metrics + # -- Optional scrape interval + interval: "" + # -- Optional scrape timeout. Must be <= interval when interval is set. + scrapeTimeout: "" # Webhook configuration webhook: