diff --git a/.github/workflows/lint-test-common-services.yaml b/.github/workflows/lint-test-common-services.yaml index 70a756f3..75619d82 100644 --- a/.github/workflows/lint-test-common-services.yaml +++ b/.github/workflows/lint-test-common-services.yaml @@ -109,10 +109,20 @@ jobs: kubectl cluster-info --context kind-chart-testing kubectl get nodes --context kind-chart-testing + - name: Install Prometheus Operator CRDs + run: | + curl -sSL -o /tmp/prom-crds.yaml https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/main/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml + curl -sSL -o /tmp/prom-podmonitor-crd.yaml https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/main/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml + curl -sSL -o /tmp/prom-rule-crd.yaml https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/main/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml + kubectl apply -f /tmp/prom-crds.yaml + kubectl apply -f /tmp/prom-podmonitor-crd.yaml + kubectl apply -f /tmp/prom-rule-crd.yaml + shell: bash + - name: Run chart-testing (install) run: | ct install --charts=charts/common-services \ - --helm-extra-args="--timeout=15m0s" \ + --helm-extra-args="--timeout=20m0s" \ --helm-extra-set-args="--set=velero-ui.enabled=false \ --set=elasticsearch.replicas=1 \ --set=elasticsearch.clusterHealthCheckParams=wait_for_status=yellow&timeout=60s \ @@ -132,7 +142,7 @@ jobs: contents: read packages: write runs-on: ubuntu-latest - needs: test + needs: lint steps: - name: Checkout uses: actions/checkout@v3 diff --git a/charts/common-services/Chart.yaml b/charts/common-services/Chart.yaml index 9149862d..b49d3629 100644 --- a/charts/common-services/Chart.yaml +++ b/charts/common-services/Chart.yaml @@ -10,7 +10,7 @@ type: application # Chart Version # ------------- # This version should be incremented following Semantic Versioning (https://semver.org/) whenever the chart's structure or templates change. -version: 2.0.0 +version: 2.0.1 # Application Version (Informational) # ----------------------------------- @@ -98,7 +98,7 @@ dependencies: condition: velero.enabled - name: cloudnative-pg repository: https://cloudnative-pg.github.io/charts - version: 0.26.0 + version: 0.27.1 condition: cloudnative-pg.enabled - name: velero-ui version: 0.x.x @@ -109,8 +109,8 @@ dependencies: repository: https://vesoft-inc.github.io/nebula-operator/charts condition: nebula-operator.enabled - name: flink-kubernetes-operator - version: 1.12.1 - repository: https://downloads.apache.org/flink/flink-kubernetes-operator-1.12.1 + version: 1.13.0 + repository: https://downloads.apache.org/flink/flink-kubernetes-operator-1.13.0 condition: flink-kubernetes-operator.enabled - name: loki repository: https://grafana.github.io/helm-charts @@ -120,3 +120,7 @@ dependencies: repository: https://grafana.github.io/helm-charts version: 1.2.1 condition: alloy.enabled +- name: prometheus-elasticsearch-exporter + version: 6.6.0 + repository: https://prometheus-community.github.io/helm-charts + condition: prometheus-elasticsearch-exporter.enabled diff --git a/charts/common-services/README.md b/charts/common-services/README.md index f2429431..921b959d 100644 --- a/charts/common-services/README.md +++ b/charts/common-services/README.md @@ -1,9 +1,50 @@ # common-services -![Version: 1.0.9](https://img.shields.io/badge/Version-1.0.9-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.3](https://img.shields.io/badge/AppVersion-1.3-informational?style=flat-square) +![Version: 2.0.1](https://img.shields.io/badge/Version-2.0.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.0](https://img.shields.io/badge/AppVersion-2.0-informational?style=flat-square) A Helm chart for deploying RadiantOne Common Services on Kubernetes +## What's New in 2.0.1 + +### Automatic Metrics & Dashboards + +Every enabled service now automatically: +- Exposes a Prometheus-compatible metrics endpoint +- Is scraped by Prometheus via service annotations +- Has a pre-configured Grafana dashboard + +| Service | Metrics Port | Dashboard | +|---------|-------------|-----------| +| ArgoCD | 8082, 8083, 8084 | ✅ | +| Elasticsearch (exporter) | 9108 | ✅ | +| Fluent-bit | 2020 | ✅ | +| Grafana | 3000 | ✅ | +| HAProxy | 8404 | ✅ | +| Kibana | 9684 | ✅ | +| Loki | 3100 | ✅ | +| OpenSearch | 9200 | ✅ | +| PostgreSQL | 9187 | ✅ | +| Velero | 8085 | ✅ | +| Zookeeper | 9141 | ✅ | +| CloudNative-PG | 8080 | ✅ | +| Alloy | 12345 | ✅ | +| Flink Operator | 9999 | ✅ | + +### Prometheus Scraping + +Services are configured with standard annotations for Prometheus discovery: + +```yaml +annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "" + prometheus.io/path: "/metrics" +``` + +### Dashboard Auto-Import + +Grafana dashboards are automatically provisioned via ConfigMaps with the `grafana_dashboard: "1"` label. The Grafana sidecar discovers and imports them on startup. + ## Maintainers | Name | Email | Url | diff --git a/charts/common-services/dashboards/alloy/README.md b/charts/common-services/dashboards/alloy/README.md new file mode 100644 index 00000000..e5ea1018 --- /dev/null +++ b/charts/common-services/dashboards/alloy/README.md @@ -0,0 +1,30 @@ +# Grafana Alloy Dashboards + +## Included Dashboard + +- `alloy-cluster-dashboard.json` - Cluster overview dashboard (Grafana Dashboard ID: 19624) + +This dashboard is automatically provisioned when: +- `grafana.enabled: true` +- `alloy.enabled: true` + +## Metrics + +Alloy exposes metrics on port 12345 at `/metrics`. The service is configured with Prometheus annotations for automatic scraping: + +```yaml +alloy: + service: + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "12345" + prometheus.io/path: "/metrics" +``` + +## Key Metrics + +| Metric | Description | +|--------|-------------| +| `alloy_build_info` | Build information | +| `alloy_component_*` | Component-level metrics | +| `alloy_resources_*` | Resource usage metrics | diff --git a/charts/common-services/dashboards/alloy/alloy-cluster-dashboard.json b/charts/common-services/dashboards/alloy/alloy-cluster-dashboard.json new file mode 100644 index 00000000..15d3c10e --- /dev/null +++ b/charts/common-services/dashboards/alloy/alloy-cluster-dashboard.json @@ -0,0 +1,4730 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "7.0.1" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Dashboard for Spring Boot2 Statistics(by micrometer-prometheus).\r\nThis is a fork of dashboard 6756, fixed for Spring Boot 2.3 and support for Jetty instead of Tomcat.", + "editable": true, + "gnetId": 19624, + "graphTooltip": 0, + "id": null, + "iteration": 1592010645913, + "links": [], + "panels": [ + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 54, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "#5195ce", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": 1, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 0, + "y": 5 + }, + "height": "", + "id": 52, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "70%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "process_uptime_seconds{app=\"orders\", app_kubernetes_io_instance=\"mushop\", app_kubernetes_io_name=\"orders\", instance=\"10.1.0.72:80\", job=\"kubernetes-pods\", kubernetes_namespace=\"mushop\", kubernetes_pod_name=\"mushop-orders-67bb8f45c8-5cr5p\", mockmode=\"false\", pod_template_hash=\"67bb8f45c8\", version=\"local\"}", + "targets": [ + { + "expr": "process_uptime_seconds{application=\"$application\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 14400 + } + ], + "thresholds": "", + "title": "Uptime", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": 1, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 5, + "x": 6, + "y": 5 + }, + "id": 58, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "70%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(jvm_memory_used_bytes{application=\"$application\", instance=\"$instance\", area=\"heap\"})*100/sum(jvm_memory_max_bytes{application=\"$application\",instance=\"$instance\", area=\"heap\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 14400 + } + ], + "thresholds": "70,90", + "title": "Heap Used", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": 1, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 5, + "x": 11, + "y": 5 + }, + "id": 60, + "interval": null, + "links": [], + "mappingType": 2, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "70%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + }, + { + "from": "-99999999999999999999999999999999", + "text": "N/A", + "to": "0" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(jvm_memory_used_bytes{application=\"$application\", instance=\"$instance\", area=\"nonheap\"})*100/sum(jvm_memory_max_bytes{application=\"$application\",instance=\"$instance\", area=\"nonheap\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 14400 + } + ], + "thresholds": "70,90", + "title": "Non-Heap Used", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "x", + "value": "" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 5 + }, + "hiddenSeries": false, + "id": 66, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_files_open{application=\"$application\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Open Files", + "refId": "A" + }, + { + "expr": "process_files_max{application=\"$application\", instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Max Files", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Process Open Files", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "locale", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "#5195ce", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "dateTimeAsIso", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 0, + "y": 8 + }, + "height": "", + "id": 56, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "70%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "{app=\"orders\", app_kubernetes_io_instance=\"mushop\", app_kubernetes_io_name=\"orders\", instance=\"10.1.0.72:80\", job=\"kubernetes-pods\", kubernetes_namespace=\"mushop\", kubernetes_pod_name=\"mushop-orders-67bb8f45c8-5cr5p\", mockmode=\"false\", pod_template_hash=\"67bb8f45c8\", version=\"local\"}", + "targets": [ + { + "expr": "process_start_time_seconds{application=\"$application\", instance=\"$instance\"}*1000", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 14400 + } + ], + "thresholds": "", + "title": "Start time", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 11 + }, + "hiddenSeries": false, + "id": 95, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "system_cpu_usage{instance=\"$instance\", application=\"$application\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "System CPU Usage", + "refId": "A" + }, + { + "expr": "process_cpu_usage{instance=\"$instance\", application=\"$application\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Process CPU Usage", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 11 + }, + "hiddenSeries": false, + "id": 96, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "system_load_average_1m{instance=\"$instance\", application=\"$application\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Load Average [1m]", + "refId": "A" + }, + { + "expr": "system_cpu_count{instance=\"$instance\", application=\"$application\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "CPU Core Size", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Load Average", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Basic Statistics", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 48, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 2 + }, + "hiddenSeries": false, + "id": 85, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "memory_pool_heap", + "repeatDirection": "h", + "scopedVars": { + "memory_pool_heap": { + "selected": false, + "text": "ZHeap", + "value": "ZHeap" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_memory_used_bytes{instance=\"$instance\", application=\"$application\", id=\"$memory_pool_heap\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Used", + "refId": "C" + }, + { + "expr": "jvm_memory_committed_bytes{instance=\"$instance\", application=\"$application\", id=\"$memory_pool_heap\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Commited", + "refId": "A" + }, + { + "expr": "jvm_memory_max_bytes{instance=\"$instance\", application=\"$application\", id=\"$memory_pool_heap\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Max", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "$memory_pool_heap (heap)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 10 + }, + "hiddenSeries": false, + "id": 88, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "memory_pool_nonheap", + "repeatDirection": "h", + "scopedVars": { + "memory_pool_nonheap": { + "selected": false, + "text": "CodeHeap 'non-nmethods'", + "value": "CodeHeap 'non-nmethods'" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_memory_used_bytes{instance=\"$instance\", application=\"$application\", id=\"$memory_pool_nonheap\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Used", + "refId": "C" + }, + { + "expr": "jvm_memory_committed_bytes{instance=\"$instance\", application=\"$application\", id=\"$memory_pool_nonheap\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Commited", + "refId": "A" + }, + { + "expr": "jvm_memory_max_bytes{instance=\"$instance\", application=\"$application\", id=\"$memory_pool_nonheap\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Max", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "$memory_pool_nonheap (non-heap)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 10 + }, + "hiddenSeries": false, + "id": 97, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": "h", + "repeatIteration": 1592010645910, + "repeatPanelId": 88, + "scopedVars": { + "memory_pool_nonheap": { + "selected": false, + "text": "CodeHeap 'non-profiled nmethods'", + "value": "CodeHeap 'non-profiled nmethods'" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_memory_used_bytes{instance=\"$instance\", application=\"$application\", id=\"$memory_pool_nonheap\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Used", + "refId": "C" + }, + { + "expr": "jvm_memory_committed_bytes{instance=\"$instance\", application=\"$application\", id=\"$memory_pool_nonheap\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Commited", + "refId": "A" + }, + { + "expr": "jvm_memory_max_bytes{instance=\"$instance\", application=\"$application\", id=\"$memory_pool_nonheap\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Max", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "$memory_pool_nonheap (non-heap)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 10 + }, + "hiddenSeries": false, + "id": 98, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": "h", + "repeatIteration": 1592010645910, + "repeatPanelId": 88, + "scopedVars": { + "memory_pool_nonheap": { + "selected": false, + "text": "CodeHeap 'profiled nmethods'", + "value": "CodeHeap 'profiled nmethods'" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_memory_used_bytes{instance=\"$instance\", application=\"$application\", id=\"$memory_pool_nonheap\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Used", + "refId": "C" + }, + { + "expr": "jvm_memory_committed_bytes{instance=\"$instance\", application=\"$application\", id=\"$memory_pool_nonheap\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Commited", + "refId": "A" + }, + { + "expr": "jvm_memory_max_bytes{instance=\"$instance\", application=\"$application\", id=\"$memory_pool_nonheap\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Max", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "$memory_pool_nonheap (non-heap)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 10 + }, + "hiddenSeries": false, + "id": 99, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": "h", + "repeatIteration": 1592010645910, + "repeatPanelId": 88, + "scopedVars": { + "memory_pool_nonheap": { + "selected": false, + "text": "Metaspace", + "value": "Metaspace" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_memory_used_bytes{instance=\"$instance\", application=\"$application\", id=\"$memory_pool_nonheap\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Used", + "refId": "C" + }, + { + "expr": "jvm_memory_committed_bytes{instance=\"$instance\", application=\"$application\", id=\"$memory_pool_nonheap\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Commited", + "refId": "A" + }, + { + "expr": "jvm_memory_max_bytes{instance=\"$instance\", application=\"$application\", id=\"$memory_pool_nonheap\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Max", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "$memory_pool_nonheap (non-heap)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": 0, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 18 + }, + "hiddenSeries": false, + "id": 50, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_classes_loaded{instance=\"$instance\", application=\"$application\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Classes Loaded", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Classes Loaded", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "locale", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 18 + }, + "hiddenSeries": false, + "id": 80, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(jvm_classes_unloaded_total{instance=\"$instance\", application=\"$application\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Classes Unloaded", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Classes Unloaded", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 26 + }, + "hiddenSeries": false, + "id": 82, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_buffer_memory_used_bytes{instance=\"$instance\", application=\"$application\", id=\"direct\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Used Bytes", + "refId": "A" + }, + { + "expr": "jvm_buffer_total_capacity_bytes{instance=\"$instance\", application=\"$application\", id=\"direct\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Capacity Bytes", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Direct Buffers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 26 + }, + "hiddenSeries": false, + "id": 83, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_buffer_memory_used_bytes{instance=\"$instance\", application=\"$application\", id=\"mapped\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Used Bytes", + "refId": "A" + }, + { + "expr": "jvm_buffer_total_capacity_bytes{instance=\"$instance\", application=\"$application\", id=\"mapped\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Capacity Bytes", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Mapped Buffers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 33 + }, + "hiddenSeries": false, + "id": 68, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_threads_daemon{instance=\"$instance\", application=\"$application\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Daemon", + "refId": "A" + }, + { + "expr": "jvm_threads_live{instance=\"$instance\", application=\"$application\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Live", + "refId": "B" + }, + { + "expr": "jvm_threads_peak{instance=\"$instance\", application=\"$application\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Peak", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Threads", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 33 + }, + "hiddenSeries": false, + "id": 78, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(jvm_gc_memory_allocated_bytes_total{instance=\"$instance\", application=\"$application\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "allocated", + "refId": "A" + }, + { + "expr": "irate(jvm_gc_memory_promoted_bytes_total{instance=\"$instance\", application=\"$application\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "promoted", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Allocate/Promote", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "JVM Statistics - Memory", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "id": 72, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 3 + }, + "hiddenSeries": false, + "id": 74, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(jvm_gc_pause_seconds_count{instance=\"$instance\", application=\"$application\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{action}} [{{cause}}]", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "GC Count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "locale", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 3 + }, + "hiddenSeries": false, + "id": 76, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(jvm_gc_pause_seconds_sum{instance=\"$instance\", application=\"$application\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{action}} [{{cause}}]", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "GC Stop the World Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "JVM Statistics - GC", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 34, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 4 + }, + "id": 44, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "hikaricp_connections{app=\"orders\", app_kubernetes_io_instance=\"mushop\", app_kubernetes_io_name=\"orders\", instance=\"10.1.0.72:80\", job=\"kubernetes-pods\", kubernetes_namespace=\"mushop\", kubernetes_pod_name=\"mushop-orders-67bb8f45c8-5cr5p\", mockmode=\"false\", pod_template_hash=\"67bb8f45c8\", pool=\"HikariPool-1\", version=\"local\"}", + "targets": [ + { + "expr": "hikaricp_connections{instance=\"$instance\", application=\"$application\", pool=\"$hikaricp\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Connections Size", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 20, + "x": 4, + "y": 4 + }, + "hiddenSeries": false, + "id": 36, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": false, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "hikaricp_connections_active{instance=\"$instance\", application=\"$application\", pool=\"$hikaricp\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Active", + "refId": "B" + }, + { + "expr": "hikaricp_connections_idle{instance=\"$instance\", application=\"$application\", pool=\"$hikaricp\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Idle", + "refId": "A" + }, + { + "expr": "hikaricp_connections_pending{instance=\"$instance\", application=\"$application\", pool=\"$hikaricp\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Pending", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Connections", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 8 + }, + "id": 46, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "hikaricp_connections_timeout_total{app=\"orders\", app_kubernetes_io_instance=\"mushop\", app_kubernetes_io_name=\"orders\", instance=\"10.1.0.72:80\", job=\"kubernetes-pods\", kubernetes_namespace=\"mushop\", kubernetes_pod_name=\"mushop-orders-67bb8f45c8-5cr5p\", mockmode=\"false\", pod_template_hash=\"67bb8f45c8\", pool=\"HikariPool-1\", version=\"local\"}", + "targets": [ + { + "expr": "hikaricp_connections_timeout_total{instance=\"$instance\", application=\"$application\", pool=\"$hikaricp\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Connection Timeout Count", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 12 + }, + "hiddenSeries": false, + "id": 38, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "hikaricp_connections_creation_seconds_sum{instance=\"$instance\", application=\"$application\", pool=\"$hikaricp\"} / hikaricp_connections_creation_seconds_count{instance=\"$instance\", application=\"$application\", pool=\"$hikaricp\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Creation Time", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Connection Creation Time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 12 + }, + "hiddenSeries": false, + "id": 42, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "hikaricp_connections_usage_seconds_sum{instance=\"$instance\", application=\"$application\", pool=\"$hikaricp\"} / hikaricp_connections_usage_seconds_count{instance=\"$instance\", application=\"$application\", pool=\"$hikaricp\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Usage Time", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Connection Usage Time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 12 + }, + "hiddenSeries": false, + "id": 40, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "hikaricp_connections_acquire_seconds_sum{instance=\"$instance\", application=\"$application\", pool=\"$hikaricp\"} / hikaricp_connections_acquire_seconds_count{instance=\"$instance\", application=\"$application\", pool=\"$hikaricp\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Acquire Time", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Connection Acquire Time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "HikariCP Statistics", + "type": "row" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 101, + "panels": [], + "title": "Hibernate Stats", + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": 1, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "nullValueMode": "connected", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 0, + "y": 5 + }, + "id": 102, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "last" + ], + "values": false + } + }, + "pluginVersion": "7.0.1", + "targets": [ + { + "expr": "hibernate_transactions_total{application=\"$application\", instance=\"$instance\", result=\"success\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 14400 + } + ], + "title": "Total Transactions", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": 1, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "nullValueMode": "connected", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 6, + "y": 5 + }, + "id": 106, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "last" + ], + "values": false + } + }, + "pluginVersion": "7.0.1", + "targets": [ + { + "expr": "hibernate_statements_total{application=\"$application\", instance=\"$instance\", status=\"prepared\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Statements", + "metric": "", + "refId": "A", + "step": 14400 + } + ], + "title": "Prepared Statements ", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": 1, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "nullValueMode": "connected", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 12, + "y": 5 + }, + "id": 107, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "last" + ], + "values": false + } + }, + "pluginVersion": "7.0.1", + "targets": [ + { + "expr": "hibernate_optimistic_failures_total", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 14400 + } + ], + "title": "Total optimistic locking failures", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": 1, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "nullValueMode": "connected", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 18, + "y": 5 + }, + "id": 108, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "last" + ], + "values": false + } + }, + "pluginVersion": "7.0.1", + "targets": [ + { + "expr": "hibernate_query_executions_total", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 14400 + } + ], + "title": "Total query executions", + "type": "stat" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "hiddenSeries": false, + "id": 104, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(hibernate_sessions_open_total{application=\"$application\", instance=\"$instance\"}[5m]) ", + "hide": false, + "interval": "", + "legendFormat": "sessions_opened", + "refId": "B" + }, + { + "expr": "rate(hibernate_sessions_closed_total{application=\"$application\", instance=\"$instance\"}[5m]) ", + "hide": false, + "interval": "", + "legendFormat": "sessions_closed", + "refId": "C" + }, + { + "expr": "rate(hibernate_flushes_total{application=\"$application\", instance=\"$instance\"}[5m]) ", + "interval": "", + "legendFormat": "flushes", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [ + { + "$$hashKey": "object:969", + "colorMode": "background6", + "fill": true, + "fillColor": "rgba(234, 112, 112, 0.12)", + "line": false, + "lineColor": "rgba(237, 46, 24, 0.60)", + "op": "time" + } + ], + "timeShift": null, + "title": "Entity Manager / Session Metrics", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:934", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:935", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {}, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "hiddenSeries": false, + "id": 105, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pluginVersion": "7.0.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(hibernate_collections_loads_total{application=\"$application\", instance=\"$instance\"}[2m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "collections loaded", + "refId": "B" + }, + { + "expr": "rate(hibernate_collections_updates_total{application=\"$application\", instance=\"$instance\"}[2m])", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "collections updated", + "refId": "C" + }, + { + "expr": "rate(hibernate_collections_fetches_total{application=\"$application\", instance=\"$instance\"}[2m])", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "collection fetches", + "refId": "A" + }, + { + "expr": "rate(hibernate_collections_deletes_total{application=\"$application\", instance=\"$instance\"}[2m])", + "interval": "", + "legendFormat": "remove", + "refId": "D" + }, + { + "expr": "rate(hibernate_collections_recreates_total{application=\"$application\", instance=\"$instance\"}[2m])", + "interval": "", + "legendFormat": "recreates", + "refId": "E" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Entity Collection Metrics", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:602", + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:603", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {}, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 16 + }, + "hiddenSeries": false, + "id": 109, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pluginVersion": "7.0.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(hibernate_collections_loads_total{application=\"$application\", instance=\"$instance\"}[2m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "entities loaded", + "refId": "B" + }, + { + "expr": "rate(hibernate_entities_updates_total{application=\"$application\", instance=\"$instance\"}[2m])", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "entities updated", + "refId": "C" + }, + { + "expr": "rate(hibernate_entities_fetches_total{application=\"$application\", instance=\"$instance\"}[2m])", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "entities fetched", + "refId": "A" + }, + { + "expr": "rate(hibernate_entities_deletes_total{application=\"$application\", instance=\"$instance\"}[2m])", + "interval": "", + "legendFormat": "entities removed", + "refId": "D" + }, + { + "expr": "rate(hibernate_entities_recreates_total{application=\"$application\", instance=\"$instance\"}[2m])", + "interval": "", + "legendFormat": "recreates", + "refId": "E" + }, + { + "expr": "rate(hibernate_entities_inserts_total{application=\"$application\", instance=\"$instance\"}[2m])", + "interval": "", + "legendFormat": "entities inserted", + "refId": "F" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Entity Metrics", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:602", + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:603", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 18, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 5 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(http_server_requests_seconds_count{instance=\"$instance\", application=\"$application\", uri!~\".*actuator.*\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{method}} [{{status}}] - {{uri}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Request Count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 12 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(http_server_requests_seconds_sum{instance=\"$instance\", application=\"$application\", exception=\"None\", uri!~\".*actuator.*\"}[5m]) / irate(http_server_requests_seconds_count{instance=\"$instance\", application=\"$application\", exception=\"None\", uri!~\".*actuator.*\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{method}} [{{status}}] - {{uri}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Response Time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "HTTP Statistics", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 25 + }, + "id": 22, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "locale", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 6 + }, + "id": 28, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "jetty_threads_config_max{app=\"orders\", app_kubernetes_io_instance=\"mushop\", app_kubernetes_io_name=\"orders\", instance=\"10.1.0.72:80\", job=\"kubernetes-pods\", kubernetes_namespace=\"mushop\", kubernetes_pod_name=\"mushop-orders-67bb8f45c8-5cr5p\", mockmode=\"false\", pod_template_hash=\"67bb8f45c8\", version=\"local\"}", + "targets": [ + { + "expr": "jetty_threads_config_max{instance=\"$instance\", application=\"$application\"} ", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Max Threads", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": 0, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 20, + "x": 4, + "y": 6 + }, + "hiddenSeries": false, + "id": 24, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jetty_threads_busy{instance=\"$instance\", application=\"$application\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "busy ", + "refId": "A" + }, + { + "expr": "jetty_threads_idle{instance=\"$instance\", application=\"$application\"}", + "interval": "", + "legendFormat": "idle", + "refId": "B" + }, + { + "expr": "jetty_threads_current{instance=\"$instance\", application=\"$application\"}", + "interval": "", + "legendFormat": "current", + "refId": "C" + }, + { + "expr": "jetty_threads_jobs{instance=\"$instance\", application=\"$application\"}", + "interval": "", + "legendFormat": "jobs", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Jetty Threads", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "none", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "locale", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 10 + }, + "id": 32, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "jetty_threads_config_min{app=\"orders\", app_kubernetes_io_instance=\"mushop\", app_kubernetes_io_name=\"orders\", instance=\"10.1.0.72:80\", job=\"kubernetes-pods\", kubernetes_namespace=\"mushop\", kubernetes_pod_name=\"mushop-orders-67bb8f45c8-5cr5p\", mockmode=\"false\", pod_template_hash=\"67bb8f45c8\", version=\"local\"}", + "targets": [ + { + "expr": "jetty_threads_config_min{instance=\"$instance\", application=\"$application\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Thread Config Min", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "title": "Jetty Statistics", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 8, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 104 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "", + "expr": "irate(logback_events_total{instance=\"$instance\", application=\"$application\", level=\"info\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "info", + "rawSql": "SELECT\n $__time(time_column),\n value1\nFROM\n metric_table\nWHERE\n $__timeFilter(time_column)\n", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "INFO logs", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 104 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "", + "expr": "irate(logback_events_total{instance=\"$instance\", application=\"$application\", level=\"error\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "error", + "rawSql": "SELECT\n $__time(time_column),\n value1\nFROM\n metric_table\nWHERE\n $__timeFilter(time_column)\n", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "ERROR logs", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 111 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "", + "expr": "irate(logback_events_total{instance=\"$instance\", application=\"$application\", level=\"warn\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "warn", + "rawSql": "SELECT\n $__time(time_column),\n value1\nFROM\n metric_table\nWHERE\n $__timeFilter(time_column)\n", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "WARN logs", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 111 + }, + "hiddenSeries": false, + "id": 16, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "", + "expr": "irate(logback_events_total{instance=\"$instance\", application=\"$application\", level=\"debug\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "debug", + "rawSql": "SELECT\n $__time(time_column),\n value1\nFROM\n metric_table\nWHERE\n $__timeFilter(time_column)\n", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "DEBUG logs", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 111 + }, + "hiddenSeries": false, + "id": 20, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "", + "expr": "irate(logback_events_total{instance=\"$instance\", application=\"$application\", level=\"trace\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "trace", + "rawSql": "SELECT\n $__time(time_column),\n value1\nFROM\n metric_table\nWHERE\n $__timeFilter(time_column)\n", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TRACE logs", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Logback Statistics", + "type": "row" + } + ], + "refresh": "5s", + "schemaVersion": 25, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(jvm_classes_loaded_classes, instance)", + "hide": 0, + "includeAll": false, + "label": "Instance", + "multi": false, + "name": "instance", + "options": [], + "query": "label_values(jvm_classes_loaded_classes, instance)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "", + "hide": 0, + "includeAll": false, + "label": "Application", + "multi": false, + "name": "application", + "options": [], + "query": "label_values(jvm_classes_loaded{instance=\"$instance\"}, application)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "", + "hide": 0, + "includeAll": false, + "label": "HikariCP-Pool", + "multi": false, + "name": "hikaricp", + "options": [], + "query": "label_values(hikaricp_connections{instance=\"$instance\", application=\"$application\"}, pool)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "", + "hide": 0, + "includeAll": true, + "label": "Memory Pool (heap)", + "multi": false, + "name": "memory_pool_heap", + "options": [], + "query": "label_values(jvm_memory_used_bytes{application=\"$application\", instance=\"$instance\", area=\"heap\"},id)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "", + "hide": 0, + "includeAll": true, + "label": "Memory Pool (nonheap)", + "multi": false, + "name": "memory_pool_nonheap", + "options": [], + "query": "label_values(jvm_memory_used_bytes{application=\"$application\", instance=\"$instance\", area=\"nonheap\"},id)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Spring Boot Statistics", + "uid": "OS7-NUiGz", + "version": 8 +} \ No newline at end of file diff --git a/charts/common-services/dashboards/argo-cd/argocd-dashboard.json b/charts/common-services/dashboards/argo-cd/argocd-dashboard.json new file mode 100644 index 00000000..a4199592 --- /dev/null +++ b/charts/common-services/dashboards/argo-cd/argocd-dashboard.json @@ -0,0 +1,499 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Argo CD GitOps monitoring dashboard - automatically shows application data when apps are deployed", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, + "id": 100, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "id": 1, + "type": "stat", + "title": "ArgoCD Version", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 6, "x": 0, "y": 1}, + "targets": [ + { + "expr": "argocd_info{namespace=~\"$namespace\"}", + "legendFormat": "{{version}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"thresholds": {"mode": "absolute", "steps": [{"color": "blue", "value": null}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "/^version$/", "values": false}, "colorMode": "value", "graphMode": "none", "justifyMode": "center", "textMode": "name"} + }, + { + "id": 2, + "type": "stat", + "title": "Total Applications", + "description": "Shows data when ArgoCD applications are deployed", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 4, "x": 6, "y": 1}, + "targets": [ + { + "expr": "count(argocd_app_info{namespace=~\"$namespace\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "short", "noValue": "0", "thresholds": {"mode": "absolute", "steps": [{"color": "blue", "value": null}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "none", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 3, + "type": "stat", + "title": "Healthy Apps", + "description": "Applications with health_status=Healthy", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 3, "x": 10, "y": 1}, + "targets": [ + { + "expr": "count(argocd_app_info{namespace=~\"$namespace\", health_status=\"Healthy\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "short", "noValue": "0", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "none", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 4, + "type": "stat", + "title": "Degraded Apps", + "description": "Applications with health_status=Degraded", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 3, "x": 13, "y": 1}, + "targets": [ + { + "expr": "count(argocd_app_info{namespace=~\"$namespace\", health_status=\"Degraded\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "short", "noValue": "0", "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "none", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 5, + "type": "stat", + "title": "Synced Apps", + "description": "Applications with sync_status=Synced", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 4, "x": 16, "y": 1}, + "targets": [ + { + "expr": "count(argocd_app_info{namespace=~\"$namespace\", sync_status=\"Synced\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "short", "noValue": "0", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "none", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 6, + "type": "stat", + "title": "OutOfSync Apps", + "description": "Applications with sync_status=OutOfSync", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 4, "x": 20, "y": 1}, + "targets": [ + { + "expr": "count(argocd_app_info{namespace=~\"$namespace\", sync_status=\"OutOfSync\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "short", "noValue": "0", "thresholds": {"mode": "absolute", "steps": [{"color": "yellow", "value": null}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "none", "justifyMode": "center", "textMode": "value"} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 5}, + "id": 101, + "panels": [], + "title": "Application Health & Sync Status", + "type": "row" + }, + { + "id": 7, + "type": "piechart", + "title": "Apps by Health Status", + "description": "Distribution of applications by health status", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 8, "x": 0, "y": 6}, + "targets": [ + { + "expr": "count by (health_status) (argocd_app_info{namespace=~\"$namespace\"})", + "legendFormat": "{{health_status}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "short"}, + "overrides": [ + {"matcher": {"id": "byName", "options": "Healthy"}, "properties": [{"id": "color", "value": {"fixedColor": "green", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "Degraded"}, "properties": [{"id": "color", "value": {"fixedColor": "red", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "Progressing"}, "properties": [{"id": "color", "value": {"fixedColor": "yellow", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "Suspended"}, "properties": [{"id": "color", "value": {"fixedColor": "blue", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "Missing"}, "properties": [{"id": "color", "value": {"fixedColor": "orange", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "Unknown"}, "properties": [{"id": "color", "value": {"fixedColor": "gray", "mode": "fixed"}}]} + ] + }, + "options": {"legend": {"displayMode": "list", "placement": "right"}, "pieType": "pie", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}} + }, + { + "id": 8, + "type": "piechart", + "title": "Apps by Sync Status", + "description": "Distribution of applications by sync status", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 8, "x": 8, "y": 6}, + "targets": [ + { + "expr": "count by (sync_status) (argocd_app_info{namespace=~\"$namespace\"})", + "legendFormat": "{{sync_status}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "short"}, + "overrides": [ + {"matcher": {"id": "byName", "options": "Synced"}, "properties": [{"id": "color", "value": {"fixedColor": "green", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "OutOfSync"}, "properties": [{"id": "color", "value": {"fixedColor": "yellow", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "Unknown"}, "properties": [{"id": "color", "value": {"fixedColor": "gray", "mode": "fixed"}}]} + ] + }, + "options": {"legend": {"displayMode": "list", "placement": "right"}, "pieType": "pie", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}} + }, + { + "id": 9, + "type": "table", + "title": "Application Status", + "description": "List of all ArgoCD applications with their status", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 8, "x": 16, "y": 6}, + "targets": [ + { + "expr": "argocd_app_info{namespace=~\"$namespace\"}", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "transformations": [ + {"id": "organize", "options": {"excludeByName": {"Time": true, "Value": true, "__name__": true, "instance": true, "job": true}, "indexByName": {}, "renameByName": {"dest_namespace": "Dest NS", "dest_server": "Dest Server", "health_status": "Health", "name": "Application", "namespace": "Namespace", "project": "Project", "repo": "Repository", "sync_status": "Sync"}}} + ], + "fieldConfig": { + "defaults": {}, + "overrides": [ + {"matcher": {"id": "byName", "options": "Health"}, "properties": [{"id": "mappings", "value": [{"options": {"Degraded": {"color": "red", "index": 1}, "Healthy": {"color": "green", "index": 0}, "Missing": {"color": "orange", "index": 3}, "Progressing": {"color": "yellow", "index": 2}, "Unknown": {"color": "gray", "index": 4}}, "type": "value"}]}]}, + {"matcher": {"id": "byName", "options": "Sync"}, "properties": [{"id": "mappings", "value": [{"options": {"OutOfSync": {"color": "yellow", "index": 1}, "Synced": {"color": "green", "index": 0}, "Unknown": {"color": "gray", "index": 2}}, "type": "value"}]}]} + ] + }, + "options": {"showHeader": true, "cellHeight": "sm"} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 14}, + "id": 102, + "panels": [], + "title": "Sync Operations", + "type": "row" + }, + { + "id": 10, + "type": "timeseries", + "title": "Sync Operations Rate", + "description": "Rate of sync operations by application", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 15}, + "targets": [ + { + "expr": "sum by (name) (rate(argocd_app_sync_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0, "noValue": "No sync data"}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 11, + "type": "timeseries", + "title": "Sync Operations by Phase", + "description": "Sync operations grouped by phase (Succeeded, Failed, Error)", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 15}, + "targets": [ + { + "expr": "sum by (phase) (rate(argocd_app_sync_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{phase}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "ops", "min": 0, "noValue": "No sync data"}, + "overrides": [ + {"matcher": {"id": "byName", "options": "Succeeded"}, "properties": [{"id": "color", "value": {"fixedColor": "green", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "Failed"}, "properties": [{"id": "color", "value": {"fixedColor": "red", "mode": "fixed"}}]}, + {"matcher": {"id": "byName", "options": "Error"}, "properties": [{"id": "color", "value": {"fixedColor": "orange", "mode": "fixed"}}]} + ] + }, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 23}, + "id": 103, + "panels": [], + "title": "Reconciliation", + "type": "row" + }, + { + "id": 12, + "type": "timeseries", + "title": "Reconciliation Rate", + "description": "Rate of application reconciliations", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 24}, + "targets": [ + { + "expr": "sum by (name) (rate(argocd_app_reconcile_count{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0, "noValue": "No reconciliation data"}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 13, + "type": "timeseries", + "title": "Reconciliation Duration (p99)", + "description": "99th percentile reconciliation duration", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 24}, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le, name) (rate(argocd_app_reconcile_bucket{namespace=~\"$namespace\"}[5m])))", + "legendFormat": "{{name}} p99", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "s", "min": 0, "noValue": "No reconciliation data"}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 32}, + "id": 104, + "panels": [], + "title": "Cluster & Resources", + "type": "row" + }, + { + "id": 14, + "type": "stat", + "title": "Managed Clusters", + "description": "Number of clusters managed by ArgoCD", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 6, "x": 0, "y": 33}, + "targets": [ + { + "expr": "count(argocd_cluster_info{namespace=~\"$namespace\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "short", "noValue": "0", "thresholds": {"mode": "absolute", "steps": [{"color": "blue", "value": null}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "none", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 15, + "type": "timeseries", + "title": "Cluster API Resources", + "description": "Number of API resources per cluster", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 9, "x": 6, "y": 33}, + "targets": [ + { + "expr": "sum by (server) (argocd_cluster_api_resources{namespace=~\"$namespace\"})", + "legendFormat": "{{server}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "short", "min": 0, "noValue": "No cluster data"}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 16, + "type": "timeseries", + "title": "Cluster Resource Objects", + "description": "Number of K8s resource objects per cluster", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 9, "x": 15, "y": 33}, + "targets": [ + { + "expr": "sum by (server) (argocd_cluster_api_resource_objects{namespace=~\"$namespace\"})", + "legendFormat": "{{server}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "short", "min": 0, "noValue": "No cluster data"}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 41}, + "id": 105, + "panels": [], + "title": "Controller Performance", + "type": "row" + }, + { + "id": 17, + "type": "timeseries", + "title": "Redis Request Rate", + "description": "Rate of Redis requests from controller and server", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 42}, + "targets": [ + { + "expr": "sum by (initiator, failed) (rate(argocd_redis_request_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{initiator}} (failed={{failed}})", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 18, + "type": "timeseries", + "title": "Redis Request Duration (avg)", + "description": "Average Redis request duration", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 42}, + "targets": [ + { + "expr": "sum by (initiator) (rate(argocd_redis_request_duration_sum{namespace=~\"$namespace\"}[5m])) / clamp_min(sum by (initiator) (rate(argocd_redis_request_duration_count{namespace=~\"$namespace\"}[5m])), 1)", + "legendFormat": "{{initiator}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "s", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 50}, + "id": 106, + "panels": [], + "title": "Git & Repository", + "type": "row" + }, + { + "id": 19, + "type": "timeseries", + "title": "Git Request Rate", + "description": "Rate of Git requests by repository", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 51}, + "targets": [ + { + "expr": "sum by (repo) (rate(argocd_git_request_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{repo}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0, "noValue": "No git data"}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 20, + "type": "timeseries", + "title": "Git Request Duration", + "description": "Git request duration by repository", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 51}, + "targets": [ + { + "expr": "sum by (repo) (rate(argocd_git_request_duration_seconds_sum{namespace=~\"$namespace\"}[5m])) / clamp_min(sum by (repo) (rate(argocd_git_request_duration_seconds_count{namespace=~\"$namespace\"}[5m])), 1)", + "legendFormat": "{{repo}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "s", "min": 0, "noValue": "No git data"}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + } + ], + "refresh": "30s", + "schemaVersion": 39, + "style": "dark", + "tags": ["argocd", "gitops", "kubernetes"], + "templating": { + "list": [ + { + "name": "namespace", + "type": "query", + "datasource": "Prometheus", + "query": "label_values(argocd_info, namespace)", + "refresh": 2, + "includeAll": true, + "allValue": ".*", + "current": {"text": "All", "value": "$__all"} + } + ] + }, + "time": {"from": "now-6h", "to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "ArgoCD GitOps Monitoring", + "uid": "argocd-gitops-monitoring", + "version": 1, + "weekStart": "" +} diff --git a/charts/common-services/dashboards/elasticsearch/elasticsearch-dashboard.json b/charts/common-services/dashboards/elasticsearch/elasticsearch-dashboard.json new file mode 100644 index 00000000..b36de331 --- /dev/null +++ b/charts/common-services/dashboards/elasticsearch/elasticsearch-dashboard.json @@ -0,0 +1,1607 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Elasticsearch monitoring dashboard for common-services", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 100, + "panels": [], + "title": "Cluster Health", + "type": "row" + }, + { + "id": 1, + "type": "stat", + "title": "Cluster Health Status", + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 1 + }, + "targets": [ + { + "expr": "elasticsearch_cluster_health_status{namespace=~\"$namespace\", color=\"green\"} == 1", + "legendFormat": "green", + "refId": "A" + }, + { + "expr": "elasticsearch_cluster_health_status{namespace=~\"$namespace\", color=\"yellow\"} == 1", + "legendFormat": "yellow", + "refId": "B" + }, + { + "expr": "elasticsearch_cluster_health_status{namespace=~\"$namespace\", color=\"red\"} == 1", + "legendFormat": "red", + "refId": "C" + } + ], + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "green": { + "color": "green", + "index": 0, + "text": "GREEN" + } + }, + "type": "value" + }, + { + "options": { + "yellow": { + "color": "yellow", + "index": 1, + "text": "YELLOW" + } + }, + "type": "value" + }, + { + "options": { + "red": { + "color": "red", + "index": 2, + "text": "RED" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null} + ] + } + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "/.*/", + "values": false + }, + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "textMode": "name" + } + }, + { + "id": 2, + "type": "stat", + "title": "Total Nodes", + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 1 + }, + "targets": [ + { + "expr": "max(elasticsearch_cluster_health_number_of_nodes{namespace=~\"$namespace\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "red", "value": null}, + {"color": "yellow", "value": 1}, + {"color": "green", "value": 3} + ] + } + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "textMode": "value" + } + }, + { + "id": 3, + "type": "stat", + "title": "Data Nodes", + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 1 + }, + "targets": [ + { + "expr": "max(elasticsearch_cluster_health_number_of_data_nodes{namespace=~\"$namespace\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "red", "value": null}, + {"color": "yellow", "value": 1}, + {"color": "green", "value": 3} + ] + } + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "textMode": "value" + } + }, + { + "id": 4, + "type": "stat", + "title": "Active Shards", + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 1 + }, + "targets": [ + { + "expr": "max(elasticsearch_cluster_health_active_shards{namespace=~\"$namespace\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "blue", "value": null} + ] + } + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "textMode": "value" + } + }, + { + "id": 5, + "type": "stat", + "title": "Unassigned Shards", + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 1 + }, + "targets": [ + { + "expr": "max(elasticsearch_cluster_health_unassigned_shards{namespace=~\"$namespace\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "yellow", "value": 1}, + {"color": "red", "value": 5} + ] + } + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "textMode": "value" + } + }, + { + "id": 6, + "type": "stat", + "title": "Pending Tasks", + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 1 + }, + "targets": [ + { + "expr": "max(elasticsearch_cluster_health_number_of_pending_tasks{namespace=~\"$namespace\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "yellow", "value": 5}, + {"color": "red", "value": 20} + ] + } + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "textMode": "value" + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 101, + "panels": [], + "title": "Index Statistics", + "type": "row" + }, + { + "id": 7, + "type": "stat", + "title": "Total Documents", + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 6 + }, + "targets": [ + { + "expr": "sum(elasticsearch_indices_docs{namespace=~\"$namespace\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "blue", "value": null} + ] + } + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "textMode": "value" + } + }, + { + "id": 8, + "type": "stat", + "title": "Store Size", + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 6 + }, + "targets": [ + { + "expr": "sum(elasticsearch_indices_store_size_bytes{namespace=~\"$namespace\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "bytes", + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "blue", "value": null} + ] + } + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "textMode": "value" + } + }, + { + "id": 9, + "type": "stat", + "title": "Segments Count", + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 6 + }, + "targets": [ + { + "expr": "sum(elasticsearch_indices_segments_count{namespace=~\"$namespace\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "blue", "value": null} + ] + } + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "textMode": "value" + } + }, + { + "id": 10, + "type": "stat", + "title": "Deleted Documents", + "datasource": "Prometheus", + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 6 + }, + "targets": [ + { + "expr": "sum(elasticsearch_indices_docs_deleted{namespace=~\"$namespace\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "yellow", "value": 10000}, + {"color": "red", "value": 100000} + ] + } + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "textMode": "value" + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 102, + "panels": [], + "title": "JVM & Memory", + "type": "row" + }, + { + "id": 11, + "type": "timeseries", + "title": "JVM Heap Used", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 11 + }, + "targets": [ + { + "expr": "sum by (name, node) (elasticsearch_jvm_memory_used_bytes{namespace=~\"$namespace\", area=\"heap\"})", + "legendFormat": "{{name}} {{node}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "bytes", + "min": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 12, + "type": "timeseries", + "title": "JVM Heap Max", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 11 + }, + "targets": [ + { + "expr": "sum by (name, node) (elasticsearch_jvm_memory_max_bytes{namespace=~\"$namespace\", area=\"heap\"})", + "legendFormat": "{{name}} {{node}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "bytes", + "min": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 13, + "type": "timeseries", + "title": "JVM GC Collection Rate", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 19 + }, + "targets": [ + { + "expr": "sum by (name, gc) (rate(elasticsearch_jvm_gc_collection_seconds_count{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{name}} {{gc}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "ops", + "min": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 14, + "type": "timeseries", + "title": "JVM GC Collection Time", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 19 + }, + "targets": [ + { + "expr": "sum by (name, gc) (rate(elasticsearch_jvm_gc_collection_seconds_sum{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{name}} {{gc}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "s", + "min": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 103, + "panels": [], + "title": "Search & Indexing", + "type": "row" + }, + { + "id": 15, + "type": "timeseries", + "title": "Search Query Rate", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 28 + }, + "targets": [ + { + "expr": "sum by (name) (rate(elasticsearch_indices_search_query_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "ops", + "min": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 16, + "type": "timeseries", + "title": "Search Query Time", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 28 + }, + "targets": [ + { + "expr": "sum by (name) (rate(elasticsearch_indices_search_query_time_seconds{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "s", + "min": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 17, + "type": "timeseries", + "title": "Indexing Rate", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 36 + }, + "targets": [ + { + "expr": "sum by (name) (rate(elasticsearch_indices_indexing_index_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "ops", + "min": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 18, + "type": "timeseries", + "title": "Indexing Time", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 36 + }, + "targets": [ + { + "expr": "sum by (name) (rate(elasticsearch_indices_indexing_index_time_seconds_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "s", + "min": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 44 + }, + "id": 104, + "panels": [], + "title": "Cache", + "type": "row" + }, + { + "id": 19, + "type": "timeseries", + "title": "Query Cache Hit Ratio", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 45 + }, + "targets": [ + { + "expr": "sum(rate(elasticsearch_indices_query_cache_count{namespace=~\"$namespace\"}[5m])) / clamp_min(sum(rate(elasticsearch_indices_query_cache_total{namespace=~\"$namespace\"}[5m])), 1e-10) * 100", + "legendFormat": "Query Cache Hit %", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 20, + "type": "timeseries", + "title": "Query Cache Memory", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 45 + }, + "targets": [ + { + "expr": "sum by (name) (elasticsearch_indices_query_cache_memory_size_bytes{namespace=~\"$namespace\"})", + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "bytes", + "min": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 21, + "type": "timeseries", + "title": "Cache Evictions", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 53 + }, + "targets": [ + { + "expr": "sum by (name) (rate(elasticsearch_indices_query_cache_evictions{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "query cache {{name}}", + "refId": "A" + }, + { + "expr": "sum by (name) (rate(elasticsearch_indices_fielddata_evictions{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "fielddata {{name}}", + "refId": "B" + }, + { + "expr": "sum by (name) (rate(elasticsearch_indices_request_cache_evictions{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "request cache {{name}}", + "refId": "C" + } + ], + "fieldConfig": { + "defaults": { + "unit": "ops", + "min": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 22, + "type": "timeseries", + "title": "Fielddata Memory", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 53 + }, + "targets": [ + { + "expr": "sum by (name) (elasticsearch_indices_fielddata_memory_size_bytes{namespace=~\"$namespace\"})", + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "bytes", + "min": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 61 + }, + "id": 105, + "panels": [], + "title": "System Resources", + "type": "row" + }, + { + "id": 23, + "type": "timeseries", + "title": "OS CPU %", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 62 + }, + "targets": [ + { + "expr": "sum by (name, node) (elasticsearch_os_cpu_percent{namespace=~\"$namespace\"})", + "legendFormat": "{{name}} {{node}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 24, + "type": "timeseries", + "title": "OS Load Average (1m)", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 62 + }, + "targets": [ + { + "expr": "sum by (name, node) (elasticsearch_os_load1{namespace=~\"$namespace\"})", + "legendFormat": "{{name}} {{node}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "min": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 25, + "type": "timeseries", + "title": "Process CPU %", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 62 + }, + "targets": [ + { + "expr": "sum by (name, node) (elasticsearch_process_cpu_percent{namespace=~\"$namespace\"})", + "legendFormat": "{{name}} {{node}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 26, + "type": "timeseries", + "title": "OS Memory Used", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 70 + }, + "targets": [ + { + "expr": "sum by (name, node) (elasticsearch_os_mem_used_bytes{namespace=~\"$namespace\"})", + "legendFormat": "{{name}} {{node}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "bytes", + "min": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 27, + "type": "timeseries", + "title": "File Descriptors", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 70 + }, + "targets": [ + { + "expr": "sum by (name, node) (elasticsearch_process_open_files_count{namespace=~\"$namespace\"})", + "legendFormat": "open {{name}} {{node}}", + "refId": "A" + }, + { + "expr": "sum by (name, node) (elasticsearch_process_max_files_descriptors{namespace=~\"$namespace\"})", + "legendFormat": "max {{name}} {{node}}", + "refId": "B" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "min": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 78 + }, + "id": 106, + "panels": [], + "title": "Disk & Transport", + "type": "row" + }, + { + "id": 28, + "type": "timeseries", + "title": "Filesystem Available", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 79 + }, + "targets": [ + { + "expr": "sum by (name, node) (elasticsearch_filesystem_data_available_bytes{namespace=~\"$namespace\"})", + "legendFormat": "{{name}} {{node}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "bytes", + "min": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 29, + "type": "timeseries", + "title": "Filesystem Size", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 79 + }, + "targets": [ + { + "expr": "sum by (name, node) (elasticsearch_filesystem_data_size_bytes{namespace=~\"$namespace\"})", + "legendFormat": "{{name}} {{node}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "bytes", + "min": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 30, + "type": "timeseries", + "title": "Transport TX Rate", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 87 + }, + "targets": [ + { + "expr": "sum by (name, node) (rate(elasticsearch_transport_tx_size_bytes_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{name}} {{node}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "Bps", + "min": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 31, + "type": "timeseries", + "title": "Transport RX Rate", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 87 + }, + "targets": [ + { + "expr": "sum by (name, node) (rate(elasticsearch_transport_rx_size_bytes_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{name}} {{node}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "Bps", + "min": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 95 + }, + "id": 107, + "panels": [], + "title": "Thread Pools", + "type": "row" + }, + { + "id": 32, + "type": "timeseries", + "title": "Thread Pool Active", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 96 + }, + "targets": [ + { + "expr": "sum by (name, type) (elasticsearch_thread_pool_active_count{namespace=~\"$namespace\"})", + "legendFormat": "{{name}} {{type}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "min": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 33, + "type": "timeseries", + "title": "Thread Pool Queue", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 96 + }, + "targets": [ + { + "expr": "sum by (name, type) (elasticsearch_thread_pool_queue_count{namespace=~\"$namespace\"})", + "legendFormat": "{{name}} {{type}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "short", + "min": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 34, + "type": "timeseries", + "title": "Thread Pool Rejected", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 104 + }, + "targets": [ + { + "expr": "sum by (name, type) (rate(elasticsearch_thread_pool_rejected_count{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{name}} {{type}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "ops", + "min": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 112 + }, + "id": 108, + "panels": [], + "title": "Circuit Breakers", + "type": "row" + }, + { + "id": 35, + "type": "timeseries", + "title": "Circuit Breaker Estimated Size", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 113 + }, + "targets": [ + { + "expr": "sum by (name, breaker) (elasticsearch_breakers_estimated_size_bytes{namespace=~\"$namespace\"})", + "legendFormat": "{{name}} {{breaker}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "bytes", + "min": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 36, + "type": "timeseries", + "title": "Circuit Breaker Limit", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 113 + }, + "targets": [ + { + "expr": "sum by (name, breaker) (elasticsearch_breakers_limit_size_bytes{namespace=~\"$namespace\"})", + "legendFormat": "{{name}} {{breaker}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "bytes", + "min": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 37, + "type": "timeseries", + "title": "Circuit Breaker Tripped", + "datasource": "Prometheus", + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 121 + }, + "targets": [ + { + "expr": "sum by (name, breaker) (rate(elasticsearch_breakers_tripped{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{name}} {{breaker}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "ops", + "min": 0 + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + } + ], + "refresh": "30s", + "schemaVersion": 39, + "style": "dark", + "tags": [ + "elasticsearch" + ], + "templating": { + "list": [ + { + "name": "namespace", + "type": "query", + "datasource": "Prometheus", + "query": "label_values(elasticsearch_cluster_health_status, namespace)", + "refresh": 2, + "includeAll": true, + "allValue": ".*", + "current": { + "text": "All", + "value": "$__all" + } + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Elasticsearch Monitoring", + "uid": "elasticsearch-monitoring", + "version": 1, + "weekStart": "" +} diff --git a/charts/common-services/dashboards/flink/flink-dashboard.json b/charts/common-services/dashboards/flink/flink-dashboard.json new file mode 100644 index 00000000..d9004c0f --- /dev/null +++ b/charts/common-services/dashboards/flink/flink-dashboard.json @@ -0,0 +1,7614 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "8.1.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Dashboard for Job Manager and Task Manager, prepared for versions: 1.12.X, 1.13.X", + "editable": true, + "gnetId": 14911, + "graphTooltip": 0, + "id": null, + "iteration": 1629751362277, + "links": [], + "panels": [ + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 113, + "panels": [ + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "id": 114, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_jobmanager_Status_JVM_CPU_Load{instance=~\"$jm_instance\"}", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Load", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ns" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "id": 115, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "delta(flink_jobmanager_Status_JVM_CPU_Time{instance=~\"$jm_instance\"}[1m])", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Time", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "id": 116, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_jobmanager_Status_JVM_Threads_Count{instance=~\"$jm_instance\"}", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Threads", + "type": "timeseries" + } + ], + "title": "Job Manager (JVM - CPU)", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 122, + "panels": [ + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 2 + }, + "id": 123, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_jobmanager_Status_JVM_Memory_Heap_Used{instance=~\"$jm_instance\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Heap", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 2 + }, + "id": 124, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_jobmanager_Status_JVM_Memory_NonHeap_Used{instance=~\"$jm_instance\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Heap", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 10 + }, + "id": 125, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_jobmanager_Status_JVM_Memory_Direct_MemoryUsed{instance=~\"$jm_instance\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Direct", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 8, + "y": 10 + }, + "id": 126, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_jobmanager_Status_JVM_Memory_Mapped_MemoryUsed{instance=~\"$jm_instance\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Mapped", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 16, + "y": 10 + }, + "id": 127, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_jobmanager_Status_JVM_Memory_Metaspace_Used{instance=~\"$jm_instance\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Metaspace", + "type": "timeseries" + } + ], + "title": "Job Manager (JVM - Memory Usage)", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "id": 118, + "panels": [ + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 3 + }, + "id": 119, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_jobmanager_Status_JVM_GarbageCollector_PS_MarkSweep_Time", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Mark Sweep Time", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 3 + }, + "id": 120, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "delta(flink_jobmanager_Status_JVM_GarbageCollector_PS_Scavenge_Time[1m])", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Scavenge Time", + "type": "timeseries" + } + ], + "title": "Job Manager (JVM - Garbage Collector)", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 10, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 3, + "x": 0, + "y": 4 + }, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": null, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.0.6", + "targets": [ + { + "exemplar": true, + "expr": "flink_jobmanager_numRegisteredTaskManagers{instance=~\"$jm_instance\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Task Managers", + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 7, + "x": 3, + "y": 4 + }, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": null, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.0.6", + "targets": [ + { + "exemplar": true, + "expr": "flink_jobmanager_taskSlotsTotal{instance=~\"$jm_instance\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Task Slots", + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 7, + "x": 10, + "y": 4 + }, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": null, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.0.6", + "targets": [ + { + "exemplar": true, + "expr": "flink_jobmanager_taskSlotsAvailable{instance=~\"$jm_instance\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Available Task Slots", + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 7, + "x": 17, + "y": 4 + }, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": null, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.0.6", + "targets": [ + { + "exemplar": true, + "expr": "flink_jobmanager_numRunningJobs{instance=~\"$jm_instance\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Runnings Jobs", + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 12 + }, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": null, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.0.6", + "targets": [ + { + "exemplar": true, + "expr": "flink_jobmanager_job_numRestarts{job_name=~\"$job_name\", instance=~\"$jm_instance\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{job_name}} ({{job_id}}) on {{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Job Restarts", + "transformations": [], + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 12 + }, + "id": 42, + "interval": null, + "links": [], + "maxDataPoints": null, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.0.6", + "targets": [ + { + "exemplar": true, + "expr": "flink_jobmanager_job_fullRestarts{job_name=~\"$job_name\", instance=~\"$jm_instance\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{job_name}} ({{job_id}}) on {{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Job Restarts", + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 12 + }, + "id": 43, + "interval": null, + "links": [], + "maxDataPoints": null, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.0.6", + "targets": [ + { + "exemplar": true, + "expr": "flink_jobmanager_job_restartingTime{job_name=~\"$job_name\", instance=~\"$jm_instance\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{job_name}} ({{job_id}}) on {{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Job Restartine Time", + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "dark-yellow", + "value": 3600000 + }, + { + "color": "dark-orange", + "value": 21600000 + }, + { + "color": "dark-purple", + "value": 86400000 + }, + { + "color": "dark-red", + "value": 259200000 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 21, + "x": 0, + "y": 20 + }, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.0", + "targets": [ + { + "exemplar": true, + "expr": "flink_jobmanager_job_uptime{job_name=~\"$job_name\", instance=~\"$jm_instance\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{job_id}} ({{instance}})", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Job Uptime", + "type": "stat" + }, + { + "datasource": null, + "description": "", + "gridPos": { + "h": 10, + "w": 3, + "x": 21, + "y": 20 + }, + "id": 46, + "options": { + "content": "# Uptime Description\n\n| Color | Status|\n|---|---|\n| Green | < 1 Hour |\n| Yellow | >= 1 Hour |\n| Orange | >= 6 Hours |\n| Purple | >= 1 Day |\n| Red | >= 3 days |\n", + "mode": "markdown" + }, + "pluginVersion": "8.1.0", + "targets": [ + { + "refId": "A", + "target": "" + } + ], + "title": "Uptime Derscription", + "type": "text" + } + ], + "title": "Job Manager (Slots & Jobs)", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 106, + "panels": [ + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 6, + "x": 0, + "y": 5 + }, + "id": 108, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "delta(flink_jobmanager_job_totalNumberOfCheckpoints{job_name=~\"$job_name\", instance=~\"$jm_instance\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}}) on {{instance}}", + "refId": "A" + } + ], + "title": "Checkpoints", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 6, + "x": 6, + "y": 5 + }, + "id": 109, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "delta(flink_jobmanager_job_numberOfCompletedCheckpoints{job_name=~\"$job_name\", instance=~\"$jm_instance\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}}) on {{instance}}", + "refId": "A" + } + ], + "title": "Completed Checkpoints", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 6, + "x": 12, + "y": 5 + }, + "id": 111, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "delta(flink_jobmanager_job_numberOfInProgressCheckpoints{job_name=~\"$job_name\", instance=~\"$jm_instance\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}}) on {{instance}}", + "refId": "A" + } + ], + "title": "In Progress Checkpoints", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 6, + "x": 18, + "y": 5 + }, + "id": 110, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "delta(flink_jobmanager_job_numberOfFailedCheckpoints{job_name=~\"$job_name\", instance=~\"$jm_instance\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}}) on {{instance}}", + "refId": "A" + } + ], + "title": "Failed Checkpoints", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 16 + }, + "id": 129, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_jobmanager_job_lastCheckpointSize{job_name=~\"$job_name\", instance=~\"$jm_instance\"}", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}}) on {{instance}}", + "refId": "A" + } + ], + "title": "Last Checkpoint Size", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 16 + }, + "id": 130, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_jobmanager_job_lastCheckpointDuration{job_name=~\"$job_name\", instance=~\"$jm_instance\"}", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}}) on {{instance}}", + "refId": "A" + } + ], + "title": "Last Checkpoint Duration", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 16 + }, + "id": 131, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "xx{job_name=~\"$job_name\", instance=~\"$jm_instance\"}", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}}) on {{instance}}", + "refId": "A" + } + ], + "type": "timeseries" + } + ], + "title": "Job Manager (Checkpoints)", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 91, + "panels": [ + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 6 + }, + "id": 93, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_Status_JVM_CPU_Load{tm_id=~\"${tm_id}\"}", + "interval": "", + "legendFormat": "{{tm_id}}", + "refId": "A" + } + ], + "title": "Load", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ns" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 6 + }, + "id": 94, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "delta(flink_taskmanager_Status_JVM_CPU_Time{tm_id=~\"$tm_id\"}[1m])", + "interval": "", + "legendFormat": "{{tm_id}}", + "refId": "A" + } + ], + "title": "Time", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 6 + }, + "id": 95, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_Status_JVM_Threads_Count{tm_id=~\"${tm_id}\"}", + "interval": "", + "legendFormat": "{{tm_id}}", + "refId": "A" + } + ], + "title": "Threads", + "type": "timeseries" + } + ], + "title": "Task Manager (JVM - CPU)", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 16, + "panels": [ + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 27, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_Status_JVM_Memory_Heap_Used{tm_id=~\"$tm_id\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{tm_id}}", + "refId": "A" + } + ], + "title": "Heap", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 28, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_Status_JVM_Memory_NonHeap_Used{tm_id=~\"$tm_id\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{tm_id}}", + "refId": "A" + } + ], + "title": "Non Heap", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 16 + }, + "id": 29, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_Status_JVM_Memory_Direct_MemoryUsed{tm_id=~\"$tm_id\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{tm_id}}", + "refId": "A" + } + ], + "title": "Direct", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 8, + "y": 16 + }, + "id": 89, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_Status_JVM_Memory_Mapped_MemoryUsed{tm_id=~\"$tm_id\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{tm_id}}", + "refId": "A" + } + ], + "title": "Mapped", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 16, + "y": 16 + }, + "id": 20, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_Status_JVM_Memory_Metaspace_Used{tm_id=~\"$tm_id\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{tm_id}}", + "refId": "A" + } + ], + "title": "Metaspace", + "type": "timeseries" + } + ], + "title": "Task Manager (JVM - Memory Usage)", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 48, + "panels": [ + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 44, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_Status_JVM_GarbageCollector_G1_Young_Generation_Time{tm_id=~\"$tm_id\"}", + "interval": "", + "legendFormat": "{{tm_id}}", + "refId": "A" + } + ], + "title": "Young Generation Time", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 41, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_Status_JVM_GarbageCollector_G1_Old_Generation_Time{tm_id=~\"$tm_id\"}", + "interval": "", + "legendFormat": "{{tm_id}}", + "refId": "A" + } + ], + "title": "Old Generation Time", + "type": "timeseries" + } + ], + "title": "Task Manager (JVM - Garbage Collector)", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 135, + "panels": [ + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 137, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_Status_Flink_Memory_Managed_Used{tm_id=~\"$tm_id\"}", + "interval": "", + "legendFormat": "{{tm_id}}", + "refId": "A" + } + ], + "title": "Managed", + "type": "timeseries" + } + ], + "title": "Task Manager (Memory - Flink)", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 55, + "panels": [ + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 132, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_Status_Shuffle_Netty_TotalMemory{tm_id=~\"$tm_id\"}", + "interval": "", + "legendFormat": "{{tm_id}}", + "refId": "A" + } + ], + "title": "Total Memory", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 133, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_Status_Shuffle_Netty_TotalMemorySegments{tm_id=~\"$tm_id\"}", + "interval": "", + "legendFormat": "{{tm_id}}", + "refId": "A" + } + ], + "title": "Total Memory Segments", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 0, + "y": 20 + }, + "id": 57, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_Status_Shuffle_Netty_UsedMemory{tm_id=~\"$tm_id\"}", + "interval": "", + "legendFormat": "{{tm_id}}", + "refId": "A" + } + ], + "title": "Used Memory", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 6, + "y": 20 + }, + "id": 76, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_Status_Shuffle_Netty_AvailableMemory{tm_id=~\"$tm_id\"}", + "interval": "", + "legendFormat": "{{tm_id}}", + "refId": "A" + } + ], + "title": "Available Memory", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 12, + "y": 20 + }, + "id": 77, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_Status_Shuffle_Netty_UsedMemorySegments{tm_id=~\"$tm_id\"}", + "interval": "", + "legendFormat": "{{tm_id}}", + "refId": "A" + } + ], + "title": "Used Memory Segments", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 18, + "y": 20 + }, + "id": 78, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_Status_Shuffle_Netty_AvailableMemorySegments{tm_id=~\"$tm_id\"}", + "interval": "", + "legendFormat": "{{tm_id}}", + "refId": "A" + } + ], + "title": "Available Memory Segments", + "type": "timeseries" + } + ], + "title": "Task Manager (Memory - Shuffle Netty)", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 53, + "panels": [ + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 0, + "y": 11 + }, + "id": 50, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_job_task_isBackPressured{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Is Back Pressured", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 6, + "y": 11 + }, + "id": 51, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "rate(flink_taskmanager_job_task_idleTimeMsPerSecond{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Idle Time Ms (per Second)", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ns" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 12, + "y": 11 + }, + "id": 80, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_job_task_checkpointStartDelayNanos{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Checkpoint Alignment Time", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 18, + "y": 11 + }, + "id": 79, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_job_task_checkpointAlignmentTime{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Checkpoint Alignment Time", + "type": "timeseries" + } + ], + "title": "Task Manager (Job Task - Diag)", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 139, + "panels": [ + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 0, + "y": 12 + }, + "id": 141, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_job_task_numBuffersInLocal{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Number Buffers in Local", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 6, + "y": 12 + }, + "id": 142, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "rate(flink_taskmanager_job_task_numBuffersInLocalPerSecond{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Number Buffers in Local (per Second)", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 12, + "y": 12 + }, + "id": 143, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_job_task_numBuffersInRemote{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Number Buffers in Remote", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 18, + "y": 12 + }, + "id": 144, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "rate(flink_taskmanager_job_task_numBuffersInRemotePerSecond{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Number Buffers in Remote (per Second)", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 21 + }, + "id": 145, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "delta(flink_taskmanager_job_task_numBuffersOut{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Number Buffers Out", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 21 + }, + "id": 146, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "rate(flink_taskmanager_job_task_numBuffersOutPerSecond{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Number Buffers Out (per Second)", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 4, + "x": 0, + "y": 29 + }, + "id": 147, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "delta(flink_taskmanager_job_task_numBytesIn{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Bytes In", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 4, + "x": 4, + "y": 29 + }, + "id": 148, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "rate(flink_taskmanager_job_task_numBytesInPerSecond{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Bytes In (per Second)", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 4, + "x": 8, + "y": 29 + }, + "id": 149, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "delta(flink_taskmanager_job_task_numBytesInLocal{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Bytes In Local", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 4, + "x": 12, + "y": 29 + }, + "id": 150, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "rate(flink_taskmanager_job_task_numBytesInLocalPerSecond{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Bytes In Local (per Second)", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 4, + "x": 16, + "y": 29 + }, + "id": 151, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "delta(flink_taskmanager_job_task_numBytesInRemote{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Bytes In Remote", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 4, + "x": 20, + "y": 29 + }, + "id": 152, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "rate(flink_taskmanager_job_task_numBytesInRemotePerSecond{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Bytes In Remote (per Second)", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 38 + }, + "id": 153, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "delta(flink_taskmanager_job_task_numBytesOut{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Bytes Out", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 38 + }, + "id": 154, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "rate(flink_taskmanager_job_task_numBytesOutPerSecond{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Bytes Out (per Second)", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 0, + "y": 47 + }, + "id": 155, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "delta(flink_taskmanager_job_task_numRecordsIn{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Records In", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 6, + "y": 47 + }, + "id": 156, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "rate(flink_taskmanager_job_task_numRecordsInPerSecond{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Bytes In (per Second)", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 12, + "y": 47 + }, + "id": 157, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "delta(flink_taskmanager_job_task_numRecordsOut{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Records Out", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 18, + "y": 47 + }, + "id": 158, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "rate(flink_taskmanager_job_task_numRecordsOutPerSecond{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Bytes Out (per Second)", + "type": "timeseries" + } + ], + "title": "Task Manager (Job Task - General)", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 97, + "panels": [ + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 13 + }, + "id": 99, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_job_task_buffers_inPoolUsage{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "In Pool Usage", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 13 + }, + "id": 103, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_job_task_buffers_inputQueueLength{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Input Queue Length", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 13 + }, + "id": 100, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_job_task_buffers_inputExclusiveBuffersUsage{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Input Exclusive Buffers Usage", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 23 + }, + "id": 102, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_job_task_buffers_outPoolUsage{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Out Pool Usage", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 23 + }, + "id": 104, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_job_task_buffers_outputQueueLength{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Output Queue Length", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 23 + }, + "id": 101, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_job_task_buffers_inputFloatingBuffersUsage{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Input Floating Buffers Usage", + "type": "timeseries" + } + ], + "title": "Task Manager (Job Task - Buffers)", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "id": 82, + "panels": [ + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 4, + "x": 0, + "y": 14 + }, + "id": 84, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_job_task_operator_numLateRecordsDropped", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Number Late Records Dropped", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 5, + "x": 4, + "y": 14 + }, + "id": 85, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "delta(flink_taskmanager_job_task_operator_numRecordsIn{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Number Records In", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 5, + "x": 9, + "y": 14 + }, + "id": 87, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "rate(flink_taskmanager_job_task_operator_numRecordsInPerSecond{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Number Records Out (per Second)", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 5, + "x": 14, + "y": 14 + }, + "id": 86, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "delta(flink_taskmanager_job_task_operator_numRecordsOut{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Number Records In", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 5, + "x": 19, + "y": 14 + }, + "id": 88, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "rate(flink_taskmanager_job_task_operator_numRecordsOutPerSecond{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Num Records Out (per Second)", + "type": "timeseries" + } + ], + "title": "Task Manager (Job Task - Operator)", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 60, + "panels": [ + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 58, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_job_task_Shuffle_Netty_Input_Buffers_inputQueueLength{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Input Queue Length", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 62, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_job_task_Shuffle_Netty_Output_Buffers_outputQueueLength{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Output Queue Length", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 0, + "y": 24 + }, + "id": 63, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_job_task_Shuffle_Netty_Input_Buffers_inPoolUsage{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "In Pool Usage", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 6, + "y": 24 + }, + "id": 67, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_job_task_Shuffle_Netty_Output_Buffers_outPoolUsage{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Out Pool Usage", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 12, + "y": 24 + }, + "id": 65, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_job_task_Shuffle_Netty_Input_Buffers_inputFloatingBuffersUsage{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Input Floating Buffers Usage", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 18, + "y": 24 + }, + "id": 66, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_job_task_Shuffle_Netty_Input_Buffers_inputExclusiveBuffersUsage{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Input Exclusive Buffers Usage", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 0, + "y": 33 + }, + "id": 68, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_job_task_Shuffle_Netty_Input_numBuffersInLocal{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Input Number Buffers In Local", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 6, + "y": 33 + }, + "id": 72, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "rate(flink_taskmanager_job_task_Shuffle_Netty_Input_numBuffersInLocalPerSecond{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Input Number Buffers In Local (per Second)", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 12, + "y": 33 + }, + "id": 70, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "flink_taskmanager_job_task_Shuffle_Netty_Input_numBuffersInRemote{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Input Number Buffers In Remote", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 18, + "y": 33 + }, + "id": 71, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "rate(flink_taskmanager_job_task_Shuffle_Netty_Input_numBuffersInRemotePerSecond{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Input Num Buffers In Remote (per Second)", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 0, + "y": 42 + }, + "id": 69, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "delta(flink_taskmanager_job_task_Shuffle_Netty_Input_numBytesInLocal{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Input Num Bytes In Local", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 6, + "y": 42 + }, + "id": 73, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "rate(flink_taskmanager_job_task_Shuffle_Netty_Input_numBytesInLocalPerSecond{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Input Number Bytes In Local (per Second)", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 12, + "y": 42 + }, + "id": 74, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "delta(flink_taskmanager_job_task_Shuffle_Netty_Input_numBytesInRemote{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Input Number Bytes In Remote", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 18, + "y": 42 + }, + "id": 75, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "rate(flink_taskmanager_job_task_Shuffle_Netty_Input_numBytesInRemotePerSecond{tm_id=~\"$tm_id\", job_name=~\"$job_name\", task_name=~\"$task_name\"}[1m])", + "interval": "", + "legendFormat": "{{job_name}} ({{job_id}} / {{task_id}}) {{tm_id}}", + "refId": "A" + } + ], + "title": "Input Number Bytes In Remote (per Second)", + "type": "timeseries" + } + ], + "title": "Task Manager (Job Task - Shuffle Netty)", + "type": "row" + } + ], + "refresh": false, + "schemaVersion": 30, + "style": "dark", + "tags": [ + "flink", + "task manager", + "job manager", + "apache" + ], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": [ + "Prometheus" + ], + "value": [ + "Prometheus" + ] + }, + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": true, + "name": "Source", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(tm_id)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "Task Manager", + "multi": true, + "name": "tm_id", + "options": [], + "query": { + "query": "label_values(tm_id)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(flink_jobmanager_numRegisteredTaskManagers,instance)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "Job Manager", + "multi": true, + "name": "jm_instance", + "options": [], + "query": { + "query": "label_values(flink_jobmanager_numRegisteredTaskManagers,instance)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(flink_taskmanager_job_task_isBackPressured{tm_id=~\"$tm_id\"},job_name)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "Job Name", + "multi": true, + "name": "job_name", + "options": [], + "query": { + "query": "label_values(flink_taskmanager_job_task_isBackPressured{tm_id=~\"$tm_id\"},job_name)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(flink_taskmanager_job_task_isBackPressured{job_name=~\"$job_name\"},task_name)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "Task Name", + "multi": true, + "name": "task_name", + "options": [], + "query": { + "query": "label_values(flink_taskmanager_job_task_isBackPressured{job_name=~\"$job_name\"},task_name)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "datasource": null, + "description": null, + "error": null, + "filters": [], + "hide": 0, + "label": "Ad Hoc", + "name": "ad_hoc", + "skipUrlSync": false, + "type": "adhoc" + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Apache Flink (2021) Dashboard for Job / Task Manager", + "uid": "wKbnD5Gnk", + "version": 1 +} \ No newline at end of file diff --git a/charts/common-services/dashboards/fluent-bit/fluent-bit-dashboard.json b/charts/common-services/dashboards/fluent-bit/fluent-bit-dashboard.json new file mode 100644 index 00000000..0706cf3d --- /dev/null +++ b/charts/common-services/dashboards/fluent-bit/fluent-bit-dashboard.json @@ -0,0 +1,310 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Fluent Bit log forwarder monitoring dashboard", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, + "id": 100, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "id": 1, + "type": "stat", + "title": "Uptime", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 6, "x": 0, "y": 1}, + "targets": [ + { + "expr": "max(fluentbit_uptime{namespace=~\"$namespace\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "s", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "area", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 2, + "type": "stat", + "title": "Input Records/s", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 6, "x": 6, "y": 1}, + "targets": [ + { + "expr": "sum(rate(fluentbit_input_records_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "ops", "thresholds": {"mode": "absolute", "steps": [{"color": "blue", "value": null}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "area", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 3, + "type": "stat", + "title": "Output Records/s", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 6, "x": 12, "y": 1}, + "targets": [ + { + "expr": "sum(rate(fluentbit_output_proc_records_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "ops", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "area", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 4, + "type": "stat", + "title": "Errors/s", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 6, "x": 18, "y": 1}, + "targets": [ + { + "expr": "sum(rate(fluentbit_output_errors_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "ops", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 0.1}, {"color": "red", "value": 1}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "area", "justifyMode": "center", "textMode": "value"} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 5}, + "id": 101, + "panels": [], + "title": "Input Plugins", + "type": "row" + }, + { + "id": 5, + "type": "timeseries", + "title": "Input Records Rate", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 6}, + "targets": [ + { + "expr": "sum by (name) (rate(fluentbit_input_records_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 6, + "type": "timeseries", + "title": "Input Bytes Rate", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 6}, + "targets": [ + { + "expr": "sum by (name) (rate(fluentbit_input_bytes_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "Bps", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 7, + "type": "timeseries", + "title": "Files Opened/Closed", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 14}, + "targets": [ + { + "expr": "sum by (name) (rate(fluentbit_input_files_opened_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{name}} opened", + "refId": "A" + }, + { + "expr": "sum by (name) (rate(fluentbit_input_files_closed_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{name}} closed", + "refId": "B" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 8, + "type": "timeseries", + "title": "Files Rotated", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 14}, + "targets": [ + { + "expr": "sum by (name) (rate(fluentbit_input_files_rotated_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 22}, + "id": 102, + "panels": [], + "title": "Output Plugins", + "type": "row" + }, + { + "id": 9, + "type": "timeseries", + "title": "Output Records Rate", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 23}, + "targets": [ + { + "expr": "sum by (name) (rate(fluentbit_output_proc_records_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 10, + "type": "timeseries", + "title": "Output Bytes Rate", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 23}, + "targets": [ + { + "expr": "sum by (name) (rate(fluentbit_output_proc_bytes_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "Bps", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 31}, + "id": 103, + "panels": [], + "title": "Errors & Retries", + "type": "row" + }, + { + "id": 11, + "type": "timeseries", + "title": "Output Errors", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 8, "x": 0, "y": 32}, + "targets": [ + { + "expr": "sum by (name) (rate(fluentbit_output_errors_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 12, + "type": "timeseries", + "title": "Retries", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 8, "x": 8, "y": 32}, + "targets": [ + { + "expr": "sum by (name) (rate(fluentbit_output_retries_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{name}} retries", + "refId": "A" + }, + { + "expr": "sum by (name) (rate(fluentbit_output_retries_failed_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{name}} failed", + "refId": "B" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 13, + "type": "timeseries", + "title": "Dropped Records", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 8, "x": 16, "y": 32}, + "targets": [ + { + "expr": "sum by (name) (rate(fluentbit_output_dropped_records_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + } + ], + "refresh": "30s", + "schemaVersion": 39, + "style": "dark", + "tags": ["fluent-bit", "logging"], + "templating": { + "list": [ + { + "name": "namespace", + "type": "query", + "datasource": "Prometheus", + "query": "label_values(fluentbit_uptime, namespace)", + "refresh": 2, + "includeAll": true, + "allValue": ".*", + "current": {"text": "All", "value": "$__all"} + } + ] + }, + "time": {"from": "now-6h", "to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "Fluent Bit Monitoring", + "uid": "fluentbit-monitoring", + "version": 1, + "weekStart": "" +} diff --git a/charts/common-services/dashboards/grafana/grafana-internals.json b/charts/common-services/dashboards/grafana/grafana-internals.json new file mode 100644 index 00000000..fe8dd491 --- /dev/null +++ b/charts/common-services/dashboards/grafana/grafana-internals.json @@ -0,0 +1,278 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { "type": "grafana", "uid": "-- Grafana --" }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "id": 1, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "none" + } + }, + "gridPos": { "h": 4, "w": 6, "x": 0, "y": 1 }, + "id": 2, + "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(grafana_alerting_active_alerts{namespace=~\"$namespace\"})", "refId": "A" }], + "title": "Active Alerts", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "none" + } + }, + "gridPos": { "h": 4, "w": 6, "x": 6, "y": 1 }, + "id": 3, + "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(grafana_alerting_active_configurations{namespace=~\"$namespace\"})", "refId": "A" }], + "title": "Active Configurations", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "short" + } + }, + "gridPos": { "h": 4, "w": 6, "x": 12, "y": 1 }, + "id": 4, + "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(grafana_access_evaluation_count{namespace=~\"$namespace\"}[5m]))", "refId": "A" }], + "title": "Access Evaluations/s", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "none" + } + }, + "gridPos": { "h": 4, "w": 6, "x": 18, "y": 1 }, + "id": 5, + "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(grafana_alerting_discovered_configurations{namespace=~\"$namespace\"})", "refId": "A" }], + "title": "Discovered Configurations", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 }, + "id": 6, + "panels": [], + "title": "Alerting", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "short" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 }, + "id": 7, + "options": { "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(grafana_alerting_active_alerts{namespace=~\"$namespace\"})", "legendFormat": "Active Alerts", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(grafana_alerting_alerts{namespace=~\"$namespace\"})", "legendFormat": "Total Alerts", "refId": "B" } + ], + "title": "Alerts Over Time", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "short" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 }, + "id": 8, + "options": { "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(grafana_alerting_alerts_received_total{namespace=~\"$namespace\"}[5m]))", "legendFormat": "Alerts Received/s", "refId": "A" }], + "title": "Alerts Received Rate", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 }, + "id": 9, + "panels": [], + "title": "Access Control", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "short" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 15 }, + "id": 10, + "options": { "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(grafana_access_evaluation_count{namespace=~\"$namespace\"}[5m]))", "legendFormat": "Evaluations/s", "refId": "A" }], + "title": "Access Evaluation Rate", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "s" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 15 }, + "id": 11, + "options": { "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(grafana_access_evaluation_duration_sum{namespace=~\"$namespace\"}[5m])) / clamp_min(sum(rate(grafana_access_evaluation_duration_count{namespace=~\"$namespace\"}[5m])), 1e-10)", "legendFormat": "Avg Duration", "refId": "A" }], + "title": "Access Evaluation Duration", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 23 }, + "id": 12, + "panels": [], + "title": "Cache & Permissions", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "short" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 24 }, + "id": 13, + "options": { "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum by (status) (grafana_access_permissions_cache_usage{namespace=~\"$namespace\"})", "legendFormat": "{{status}}", "refId": "A" }], + "title": "Permissions Cache Usage", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "s" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 24 }, + "id": 14, + "options": { "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(grafana_access_permissions_duration_sum{namespace=~\"$namespace\"}[5m])) / clamp_min(sum(rate(grafana_access_permissions_duration_count{namespace=~\"$namespace\"}[5m])), 1e-10)", "legendFormat": "Avg Permission Duration", "refId": "A" }], + "title": "Permissions Duration", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": ["grafana", "observability"], + "templating": { + "list": [ + { + "current": {}, + "hide": 0, + "includeAll": false, + "label": "Datasource", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": {}, + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "definition": "label_values(grafana_alerting_active_alerts, namespace)", + "hide": 0, + "includeAll": true, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": { "query": "label_values(grafana_alerting_active_alerts, namespace)", "refId": "StandardVariableQuery" }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { "from": "now-1h", "to": "now" }, + "timepicker": {}, + "timezone": "browser", + "title": "Grafana Metrics Overview", + "uid": "grafana-custom-001", + "version": 1 +} diff --git a/charts/common-services/dashboards/grafana/grafana-metrics-dashboard.json b/charts/common-services/dashboards/grafana/grafana-metrics-dashboard.json new file mode 100644 index 00000000..51a39fc0 --- /dev/null +++ b/charts/common-services/dashboards/grafana/grafana-metrics-dashboard.json @@ -0,0 +1,1694 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.6.0" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Grafana Internal Metics Dashboards", + "editable": true, + "gnetId": 3590, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": false, + "rows": [ + { + "collapse": false, + "height": 250, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "description": "start time of the process", + "format": "dateTimeFromNow", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 16, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "process_start_time_seconds{kubernetes_pod_name=\"$pod\"} * 1000", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "Start time", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "description": "total amount of orgs", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "grafana_stat_total_orgs{kubernetes_pod_name=\"$pod\"}", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "Organisation Count", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "description": "total amount of users", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 3, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "grafana_stat_total_users{kubernetes_pod_name=\"$pod\"}", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "User Count", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "description": "total amount of dashboards", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 5, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "grafana_stat_totals_dashboard{kubernetes_pod_name=\"$pod\"}", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "Dashboard Count", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "General Counters", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "Average user and system CPU time spent in seconds.", + "fill": 1, + "id": 14, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(rate(process_cpu_seconds_total{kubernetes_pod_name=\"$pod\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "CPU Time", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "Virtual and Resident memory size in bytes, averages over 5 min interval", + "fill": 1, + "id": 15, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(rate(process_resident_memory_bytes{kubernetes_pod_name=\"$pod\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Resident Mem", + "refId": "A" + }, + { + "expr": "avg(rate(process_virtual_memory_bytes{kubernetes_pod_name=\"$pod\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Virtual Mem", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average Memory Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of open file descriptors", + "fill": 1, + "id": 17, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_open_fds{kubernetes_pod_name=\"$pod\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Open FD", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Open File Descriptors", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU, Memory and File Descriptor Stats", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "http response status", + "fill": 1, + "id": 18, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "grafana_api_response_status_total{kubernetes_pod_name=\"$pod\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "API {{ code }}", + "refId": "A" + }, + { + "expr": "grafana_page_response_status_total{kubernetes_pod_name=\"$pod\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Page {{ code }}", + "refId": "B" + }, + { + "expr": "grafana_proxy_response_status_total{kubernetes_pod_name=\"$pod\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Proxy {{ code }}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total Response Statuses", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "http request counter", + "fill": 1, + "id": 20, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "http_request_total{kubernetes_pod_name=\"$pod\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{handler}} - {{method}} - {{statuscode}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "HTTP Req/Resp Details", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "api login counters", + "fill": 1, + "id": 19, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "grafana_api_login_post_total{kubernetes_pod_name=\"$pod\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "POST", + "refId": "A" + }, + { + "expr": "grafana_api_login_oauth_total{kubernetes_pod_name=\"$pod\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "OAuth", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Login Counts", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Responce Codes and Logins", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "summary for dataproxy request duration", + "fill": 1, + "id": 6, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "grafana_api_dataproxy_request_all_milliseconds{kubernetes_pod_name=\"$pod\", quantile=\"0.5\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "0.5 ", + "refId": "A" + }, + { + "expr": "grafana_api_dataproxy_request_all_milliseconds{kubernetes_pod_name=\"$pod\", quantile=\"0.9\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "0.9", + "refId": "B" + }, + { + "expr": "grafana_api_dataproxy_request_all_milliseconds{kubernetes_pod_name=\"$pod\", quantile=\"0.99\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "0.99", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Dataproxy Request Quantiles", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "average dataproxy request duration over 5 minutes", + "fill": 1, + "id": 7, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(grafana_api_dataproxy_request_all_milliseconds_sum{kubernetes_pod_name=\"$pod\"}[5m]) / rate(grafana_api_dataproxy_request_all_milliseconds_count{kubernetes_pod_name=\"$pod\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average Requests Duration", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Dataproxy Request Avg [5 min]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Dataproxy Stats", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "summary for dashboard get duration", + "fill": 1, + "id": 8, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "grafana_api_dashboard_get_milliseconds{kubernetes_pod_name=\"$pod\", quantile=\"0.5\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "0.5", + "refId": "A" + }, + { + "expr": "grafana_api_dashboard_get_milliseconds{kubernetes_pod_name=\"$pod\", quantile=\"0.9\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "0.9", + "refId": "B" + }, + { + "expr": "grafana_api_dashboard_get_milliseconds{kubernetes_pod_name=\"$pod\", quantile=\"0.99\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "0.99", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Dashboard Get Duration Quantiles", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "average dashboard get duration over 5 minutes", + "fill": 1, + "id": 9, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(grafana_api_dashboard_get_milliseconds_sum{kubernetes_pod_name=\"$pod\"}[5m]) / rate(grafana_api_dashboard_get_milliseconds_count{kubernetes_pod_name=\"$pod\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "0.5", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Dashboard Get Duration Avg [5 min]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Dashboard Get Stats", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "summary for dashboard save duration", + "fill": 1, + "id": 10, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "grafana_api_dashboard_save_milliseconds{kubernetes_pod_name=\"$pod\", quantile=\"0.5\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "0.5", + "refId": "A" + }, + { + "expr": "grafana_api_dashboard_save_milliseconds{kubernetes_pod_name=\"$pod\", quantile=\"0.9\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "0.9", + "refId": "B" + }, + { + "expr": "grafana_api_dashboard_save_milliseconds{kubernetes_pod_name=\"$pod\", quantile=\"0.99\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "0.99", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Dashboard Save Duration Quantiles", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "average dashboard save duration over 5 minutes", + "fill": 1, + "id": 11, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(grafana_api_dashboard_save_milliseconds_sum{kubernetes_pod_name=\"$pod\"}[5m]) / rate(grafana_api_dashboard_save_milliseconds_count{kubernetes_pod_name=\"$pod\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "0.5", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Dashboard Save Duration Avg [5 min]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Dashboard Save Stats", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "summary for dashboard search duration", + "fill": 1, + "id": 12, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "grafana_api_dashboard_search_milliseconds{kubernetes_pod_name=\"$pod\", quantile=\"0.5\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "0.5", + "refId": "A" + }, + { + "expr": "grafana_api_dashboard_search_milliseconds{kubernetes_pod_name=\"$pod\", quantile=\"0.9\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "0.9", + "refId": "B" + }, + { + "expr": "grafana_api_dashboard_search_milliseconds{kubernetes_pod_name=\"$pod\", quantile=\"0.99\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "0.99", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Dashboard Search Duration Quantiles", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "average dashboard search duration over 5 minutes", + "fill": 1, + "id": 13, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(grafana_api_dashboard_search_milliseconds_sum{kubernetes_pod_name=\"$pod\"}[5m]) / rate(grafana_api_dashboard_search_milliseconds_count{kubernetes_pod_name=\"$pod\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "0.5", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Dashboard Search Duration Avg [5 min]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Dashboard Search Stats", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "grafana" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": false, + "label": "Pod", + "multi": false, + "name": "pod", + "options": [], + "query": "label_values(grafana_info,kubernetes_pod_name)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": false, + "label": "Version", + "multi": false, + "name": "version", + "options": [], + "query": "label_values(grafana_info,version)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Grafana Internals", + "version": 2 +} \ No newline at end of file diff --git a/charts/common-services/dashboards/haproxy/haproxy-dashboard.json b/charts/common-services/dashboards/haproxy/haproxy-dashboard.json new file mode 100644 index 00000000..ff9a0304 --- /dev/null +++ b/charts/common-services/dashboards/haproxy/haproxy-dashboard.json @@ -0,0 +1,362 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "HAProxy load balancer monitoring dashboard", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, + "id": 100, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "id": 1, + "type": "stat", + "title": "Active Backend Servers", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 6, "x": 0, "y": 1}, + "targets": [ + { + "expr": "sum(haproxy_backend_active_servers{namespace=~\"$namespace\", proxy=~\"$proxy\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "none", "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "yellow", "value": 1}, {"color": "green", "value": 2}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "none", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 2, + "type": "stat", + "title": "Current Sessions", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 6, "x": 6, "y": 1}, + "targets": [ + { + "expr": "sum(haproxy_backend_current_sessions{namespace=~\"$namespace\", proxy=~\"$proxy\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "none", "thresholds": {"mode": "absolute", "steps": [{"color": "blue", "value": null}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "area", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 3, + "type": "stat", + "title": "Requests/s", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 6, "x": 12, "y": 1}, + "targets": [ + { + "expr": "sum(rate(haproxy_backend_http_requests_total{namespace=~\"$namespace\", proxy=~\"$proxy\"}[5m]))", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "ops", "thresholds": {"mode": "absolute", "steps": [{"color": "blue", "value": null}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "area", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 4, + "type": "stat", + "title": "Connection Errors/s", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 6, "x": 18, "y": 1}, + "targets": [ + { + "expr": "sum(rate(haproxy_backend_connection_errors_total{namespace=~\"$namespace\", proxy=~\"$proxy\"}[5m]))", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "ops", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 0.1}, {"color": "red", "value": 1}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "area", "justifyMode": "center", "textMode": "value"} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 5}, + "id": 101, + "panels": [], + "title": "Traffic", + "type": "row" + }, + { + "id": 5, + "type": "timeseries", + "title": "Bytes In/Out Rate", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 6}, + "targets": [ + { + "expr": "sum by (proxy) (rate(haproxy_backend_bytes_in_total{namespace=~\"$namespace\", proxy=~\"$proxy\"}[5m]))", + "legendFormat": "{{proxy}} in", + "refId": "A" + }, + { + "expr": "sum by (proxy) (rate(haproxy_backend_bytes_out_total{namespace=~\"$namespace\", proxy=~\"$proxy\"}[5m]))", + "legendFormat": "{{proxy}} out", + "refId": "B" + } + ], + "fieldConfig": {"defaults": {"unit": "Bps", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 6, + "type": "timeseries", + "title": "Sessions", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 6}, + "targets": [ + { + "expr": "sum by (proxy) (haproxy_backend_current_sessions{namespace=~\"$namespace\", proxy=~\"$proxy\"})", + "legendFormat": "{{proxy}} current", + "refId": "A" + }, + { + "expr": "sum by (proxy) (haproxy_backend_max_sessions{namespace=~\"$namespace\", proxy=~\"$proxy\"})", + "legendFormat": "{{proxy}} max", + "refId": "B" + } + ], + "fieldConfig": {"defaults": {"unit": "short", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 14}, + "id": 102, + "panels": [], + "title": "HTTP Responses", + "type": "row" + }, + { + "id": 7, + "type": "timeseries", + "title": "HTTP Response Codes", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 24, "x": 0, "y": 15}, + "targets": [ + { + "expr": "sum by (proxy, code) (rate(haproxy_backend_http_responses_total{namespace=~\"$namespace\", proxy=~\"$proxy\"}[5m]))", + "legendFormat": "{{proxy}} {{code}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 23}, + "id": 103, + "panels": [], + "title": "Latency", + "type": "row" + }, + { + "id": 8, + "type": "timeseries", + "title": "Connect Time", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 24}, + "targets": [ + { + "expr": "haproxy_backend_connect_time_average_seconds{namespace=~\"$namespace\", proxy=~\"$proxy\"}", + "legendFormat": "{{proxy}} avg", + "refId": "A" + }, + { + "expr": "haproxy_backend_max_connect_time_seconds{namespace=~\"$namespace\", proxy=~\"$proxy\"}", + "legendFormat": "{{proxy}} max", + "refId": "B" + } + ], + "fieldConfig": {"defaults": {"unit": "s", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 9, + "type": "timeseries", + "title": "Response Time", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 24}, + "targets": [ + { + "expr": "haproxy_backend_response_time_average_seconds{namespace=~\"$namespace\", proxy=~\"$proxy\"}", + "legendFormat": "{{proxy}} avg", + "refId": "A" + }, + { + "expr": "haproxy_backend_max_response_time_seconds{namespace=~\"$namespace\", proxy=~\"$proxy\"}", + "legendFormat": "{{proxy}} max", + "refId": "B" + } + ], + "fieldConfig": {"defaults": {"unit": "s", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 32}, + "id": 104, + "panels": [], + "title": "Errors", + "type": "row" + }, + { + "id": 10, + "type": "timeseries", + "title": "Connection Errors", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 8, "x": 0, "y": 33}, + "targets": [ + { + "expr": "sum by (proxy) (rate(haproxy_backend_connection_errors_total{namespace=~\"$namespace\", proxy=~\"$proxy\"}[5m]))", + "legendFormat": "{{proxy}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 11, + "type": "timeseries", + "title": "Response Errors", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 8, "x": 8, "y": 33}, + "targets": [ + { + "expr": "sum by (proxy) (rate(haproxy_backend_response_errors_total{namespace=~\"$namespace\", proxy=~\"$proxy\"}[5m]))", + "legendFormat": "{{proxy}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 12, + "type": "timeseries", + "title": "Internal Errors", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 8, "x": 16, "y": 33}, + "targets": [ + { + "expr": "sum by (proxy) (rate(haproxy_backend_internal_errors_total{namespace=~\"$namespace\", proxy=~\"$proxy\"}[5m]))", + "legendFormat": "{{proxy}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 41}, + "id": 105, + "panels": [], + "title": "Frontend", + "type": "row" + }, + { + "id": 13, + "type": "timeseries", + "title": "Frontend Connections", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 42}, + "targets": [ + { + "expr": "sum by (proxy) (rate(haproxy_frontend_connections_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{proxy}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 14, + "type": "timeseries", + "title": "Frontend HTTP Requests", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 42}, + "targets": [ + { + "expr": "sum by (proxy) (rate(haproxy_frontend_http_requests_total{namespace=~\"$namespace\"}[5m]))", + "legendFormat": "{{proxy}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + } + ], + "refresh": "30s", + "schemaVersion": 39, + "style": "dark", + "tags": ["haproxy", "loadbalancer"], + "templating": { + "list": [ + { + "name": "namespace", + "type": "query", + "datasource": "Prometheus", + "query": "label_values(haproxy_backend_active_servers, namespace)", + "refresh": 2, + "includeAll": true, + "allValue": ".*", + "current": {"text": "All", "value": "$__all"} + }, + { + "name": "proxy", + "type": "query", + "datasource": "Prometheus", + "query": "label_values(haproxy_backend_active_servers{namespace=~\"$namespace\"}, proxy)", + "refresh": 2, + "includeAll": true, + "allValue": ".*", + "current": {"text": "All", "value": "$__all"} + } + ] + }, + "time": {"from": "now-6h", "to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "HAProxy Monitoring", + "uid": "haproxy-monitoring", + "version": 1, + "weekStart": "" +} diff --git a/charts/common-services/dashboards/ido/graph-pipeline-sinks.json b/charts/common-services/dashboards/ido/graph-pipeline-sinks.json new file mode 100644 index 00000000..2019e799 --- /dev/null +++ b/charts/common-services/dashboards/ido/graph-pipeline-sinks.json @@ -0,0 +1,857 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "12.1.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 41, + "panels": [], + "title": "Events for entities $entity_var", + "type": "row" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 1 + }, + "id": 42, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(source, entity) (flink_taskmanager_job_task_operator_source_operation_entity_rt_graph_events_counter_output{entity=~\"$entity_var\", source=~\"$source_var\", operation=~\"$operation_var\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{source}} to {{entity}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Events Count", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 1 + }, + "id": 45, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "avg by(source, entity) (flink_taskmanager_job_task_operator_source_operation_entity_rt_events_output_batch_size{entity=~\"$entity_var\", source=~\"$source_var\", operation=~\"$operation_var\"})", + "hide": false, + "instant": false, + "legendFormat": "{{source}} to {{entity}}", + "range": true, + "refId": "A" + } + ], + "title": "Batch Size", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Batch errors counted per entity/operation combinations, not including source", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 1 + }, + "id": 133, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(entity, operation) (flink_taskmanager_job_task_operator_operation_entity_rt_events_batch_error_counter_output{entity=~\"$entity_var\", operation=~\"$operation_var\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{entity}} {{operation}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Batch Errors", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 9 + }, + "id": 131, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(source, entity) (flink_taskmanager_job_task_operator_source_operation_entity_rt_graph_events_error_counter_output{entity=~\"$entity_var\", source=~\"$source_var\", operation=~\"$operation_var\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{source}} to {{entity}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Event Errors", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 9 + }, + "id": 132, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(source, entity) (flink_taskmanager_job_task_operator_source_operation_entity_rt_graph_events_retry_counter_output{entity=~\"$entity_var\", source=~\"$source_var\", operation=~\"$operation_var\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{source}} to {{entity}} ", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Retries Count", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Batch retries counted per entity/operation combinations, not including source", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 9 + }, + "id": 134, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(entity, operation) (flink_taskmanager_job_task_operator_operation_entity_rt_events_batch_retry_counter_output{entity=~\"$entity_var\", operation=~\"$operation_var\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{entity}} {{operation}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Batch Retries", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 17 + }, + "id": 44, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by(source, entity) (flink_taskmanager_job_task_operator_source_operation_entity_rt_events_output_proc_time{entity=~\"$entity_var\", source=~\"$source_var\", operation=~\"$operation_var\"})", + "hide": false, + "instant": false, + "legendFormat": "{{source}} to {{entity}}", + "range": true, + "refId": "A" + } + ], + "title": "Sink Latency", + "type": "timeseries" + } + ], + "preload": false, + "schemaVersion": 42, + "tags": [ + "monitoring", + "identity-observability", + "graph-pipeline" + ], + "templating": { + "list": [ + { + "current": { + "text": [ + "ad_groups" + ], + "value": [ + "ad_groups" + ] + }, + "definition": "label_values(source)", + "description": "", + "label": "Source", + "multi": true, + "name": "source_var", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(source)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": [ + "insert", + "delete", + "update" + ], + "value": [ + "insert", + "delete", + "update" + ] + }, + "definition": "label_values(flink_taskmanager_job_task_operator_source_operation_rt_events_counter,operation)", + "label": "Operation", + "multi": true, + "name": "operation_var", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(flink_taskmanager_job_task_operator_source_operation_rt_events_counter,operation)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": [ + "is_in_group_rt_account_rt_group", + "is_in_group_rt_group_rt_group" + ], + "value": [ + "is_in_group_rt_account_rt_group", + "is_in_group_rt_group_rt_group" + ] + }, + "definition": "label_values(flink_taskmanager_job_task_operator_source_operation_entity_rt_graph_events_counter_output,entity)", + "description": "", + "includeAll": true, + "label": "Entity", + "multi": true, + "name": "entity_var", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(flink_taskmanager_job_task_operator_source_operation_entity_rt_graph_events_counter_output,entity)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "allowCustomValue": false, + "current": { + "text": "prometheus", + "value": "bf4o8pmsrn474f" + }, + "label": "Data Source", + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "IDO Graph Pipeline Sinks", + "uid": "ido-metrics-graph-pipeline-sinks", + "version": 3 +} \ No newline at end of file diff --git a/charts/common-services/dashboards/ido/graph-pipeline-sources.json b/charts/common-services/dashboards/ido/graph-pipeline-sources.json new file mode 100644 index 00000000..76c45124 --- /dev/null +++ b/charts/common-services/dashboards/ido/graph-pipeline-sources.json @@ -0,0 +1,938 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "12.1.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Only source graphs", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [], + "title": "$source_var", + "type": "row" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(source, operation) (flink_taskmanager_job_task_operator_source_operation_rt_events_counter{source=~\"$source_var\", operation=~\"$operation_var\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{source}} {{operation}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Events count", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 1 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(source, operation) (flink_taskmanager_job_task_operator_source_operation_rt_events_vertex_counter{source=~\"$source_var\", operation=~\"$operation_var\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{source}} {{operation}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Vertices count", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 1 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(source, operation) (flink_taskmanager_job_task_operator_source_operation_rt_events_edge_counter{source=~\"$source_var\", operation=~\"$operation_var\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{source}} {{operation}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Edges count", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 9 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(source, operation) (rate(flink_taskmanager_job_task_operator_source_operation_rt_events_counter{source=~\"$source_var\", operation=~\"$operation_var\"}[5m]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{source}} {{operation}} events rate/s ", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Events rate/s", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 9 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "editorMode": "code", + "expr": "sum(rate(flink_taskmanager_job_task_operator_source_operation_rt_events_vertex_counter{source=~\"$source_var\", operation=~\"$operation_var\"}[5m])) / clamp_min(sum(rate(flink_taskmanager_job_task_operator_source_operation_rt_events_counter{source=~\"$source_var\", operation=~\"$operation_var\"}[5m])), 1)", + "legendFormat": "Vertices output per event", + "range": true, + "refId": "A" + } + ], + "title": "Vertices output per event", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 9 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "editorMode": "code", + "expr": "sum(rate(flink_taskmanager_job_task_operator_source_operation_rt_events_edge_counter{source=~\"$source_var\", operation=~\"$operation_var\"}[5m])) / clamp_min(sum(rate(flink_taskmanager_job_task_operator_source_operation_rt_events_counter{source=~\"$source_var\", operation=~\"$operation_var\"}[5m])), 1)", + "legendFormat": "Edges output per event", + "range": true, + "refId": "A" + } + ], + "title": "Edges output per event", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 16, + "x": 0, + "y": 17 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "disableTextWrap": false, + "editorMode": "code", + "expr": "avg by(source, operation) (flink_taskmanager_job_task_operator_source_operation_rt_events_proc_time{source=~\"$source_var\", operation=~\"$operation_var\", quantile=\"0.5\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{source}} {{operation}} quantile_0.5 ", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": "${DS_PROMETHEUS}", + "disableTextWrap": false, + "editorMode": "code", + "expr": "avg by(source, operation) (flink_taskmanager_job_task_operator_source_operation_rt_events_proc_time{source=~\"$source_var\", operation=~\"$operation_var\", quantile=\"0.75\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{source}} {{operation}} quantile_0.75 ", + "range": true, + "refId": "B", + "useBackend": false + }, + { + "datasource": "${DS_PROMETHEUS}", + "disableTextWrap": false, + "editorMode": "code", + "expr": "avg by(source, operation) (flink_taskmanager_job_task_operator_source_operation_rt_events_proc_time{source=~\"$source_var\", operation=~\"$operation_var\", quantile=\"0.95\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{source}} {{operation}} quantile_0.95 ", + "range": true, + "refId": "C", + "useBackend": false + }, + { + "datasource": "${DS_PROMETHEUS}", + "disableTextWrap": false, + "editorMode": "code", + "expr": "avg by(source, operation) (flink_taskmanager_job_task_operator_source_operation_rt_events_proc_time{source=~\"$source_var\", operation=~\"$operation_var\", quantile=\"0.99\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "{{source}} {{operation}} quantile_0.99 ", + "range": true, + "refId": "D", + "useBackend": false + } + ], + "title": "Transformation time ms", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 3, + "x": 16, + "y": 17 + }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "disableTextWrap": false, + "editorMode": "code", + "expr": "flink_taskmanager_job_task_operator_source_operation_rt_events_error_counter{source=~\"$source_var\", operation=~\"$operation_var\"}", + "format": "table", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "__auto", + "range": true, + "refId": "Errors", + "useBackend": false + }, + { + "datasource": "${DS_PROMETHEUS}", + "disableTextWrap": false, + "editorMode": "code", + "expr": "flink_taskmanager_job_task_operator_source_operation_rt_events_uuid_missing_rejected_counter{source=~\"$source_var\", operation=~\"$operation_var\"}", + "format": "table", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "__auto", + "range": true, + "refId": "No UUID", + "useBackend": false + } + ], + "title": "", + "type": "stat" + } + ], + "preload": false, + "schemaVersion": 42, + "tags": [ + "monitoring", + "identity-observability", + "graph-pipeline" + ], + "templating": { + "list": [ + { + "current": { + "text": [ + "ad_groups" + ], + "value": [ + "ad_groups" + ] + }, + "definition": "label_values(source)", + "description": "", + "label": "Source", + "multi": true, + "name": "source_var", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(source)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": [ + "delete", + "insert", + "update" + ], + "value": [ + "delete", + "insert", + "update" + ] + }, + "definition": "label_values(flink_taskmanager_job_task_operator_source_operation_rt_events_counter,operation)", + "label": "Operation", + "multi": true, + "name": "operation_var", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(flink_taskmanager_job_task_operator_source_operation_rt_events_counter,operation)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "allowCustomValue": false, + "current": { + "text": "prometheus", + "value": "cf1nq4mz353i8a" + }, + "label": "Data Source", + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "IDO Graph Pipeline Sources", + "uid": "ido-metrics-graph-pipeline-sources", + "version": 4 +} \ No newline at end of file diff --git a/charts/common-services/dashboards/kibana/kibana-dashboard.json b/charts/common-services/dashboards/kibana/kibana-dashboard.json new file mode 100644 index 00000000..87a941e4 --- /dev/null +++ b/charts/common-services/dashboards/kibana/kibana-dashboard.json @@ -0,0 +1,309 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "id": 100, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "mappings": [ + { "options": { "0": { "color": "red", "index": 1, "text": "DOWN" }, "1": { "color": "green", "index": 0, "text": "UP" } }, "type": "value" } + ], + "thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] } + } + }, + "gridPos": { "h": 4, "w": 4, "x": 0, "y": 1 }, + "id": 1, + "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" }, + "targets": [{ "expr": "kibana_up{namespace=~\"$namespace\"}", "refId": "A" }], + "title": "Kibana Status", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "mappings": [ + { "options": { "-1": { "color": "dark-purple", "index": 3, "text": "UNKNOWN" }, "0": { "color": "red", "index": 2, "text": "RED" }, "0.5": { "color": "yellow", "index": 1, "text": "YELLOW" }, "1": { "color": "green", "index": 0, "text": "GREEN" } }, "type": "value" } + ], + "thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "yellow", "value": 0.5 }, { "color": "green", "value": 1 }] } + } + }, + "gridPos": { "h": 4, "w": 4, "x": 4, "y": 1 }, + "id": 2, + "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" }, + "targets": [{ "expr": "kibana_status_overall{namespace=~\"$namespace\"}", "refId": "A" }], + "title": "Overall Status", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { "defaults": { "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, "unit": "s" } }, + "gridPos": { "h": 4, "w": 4, "x": 8, "y": 1 }, + "id": 3, + "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" }, + "targets": [{ "expr": "kibana_process_uptime_seconds{namespace=~\"$namespace\"}", "refId": "A" }], + "title": "Uptime", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { "defaults": { "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 100 }, { "color": "red", "value": 500 }] }, "unit": "short" } }, + "gridPos": { "h": 4, "w": 4, "x": 12, "y": 1 }, + "id": 4, + "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" }, + "targets": [{ "expr": "kibana_concurrent_connections_total{namespace=~\"$namespace\"}", "refId": "A" }], + "title": "Concurrent Connections", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { "defaults": { "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 0.5 }, { "color": "red", "value": 1 }] }, "unit": "s" } }, + "gridPos": { "h": 4, "w": 4, "x": 16, "y": 1 }, + "id": 5, + "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" }, + "targets": [{ "expr": "kibana_response_time_seconds{namespace=~\"$namespace\", quantile=\"avg\"}", "refId": "A" }], + "title": "Avg Response Time", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { "defaults": { "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, "unit": "reqps" } }, + "gridPos": { "h": 4, "w": 4, "x": 20, "y": 1 }, + "id": 6, + "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" }, + "targets": [{ "expr": "rate(kibana_requests_total{namespace=~\"$namespace\", status=\"total\"}[5m])", "refId": "A" }], + "title": "Request Rate", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 }, + "id": 101, + "panels": [], + "title": "Memory", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, "unit": "bytes" } + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 }, + "id": 10, + "options": { "legend": { "calcs": ["lastNotNull", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [ + { "expr": "kibana_heap_total_bytes{namespace=~\"$namespace\"}", "legendFormat": "Heap Total", "refId": "A" }, + { "expr": "kibana_heap_used_bytes{namespace=~\"$namespace\"}", "legendFormat": "Heap Used", "refId": "B" }, + { "expr": "kibana_heap_size_limit_bytes{namespace=~\"$namespace\"}", "legendFormat": "Heap Limit", "refId": "C" } + ], + "title": "Heap Memory", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, "unit": "percentunit" } + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 }, + "id": 11, + "options": { "legend": { "calcs": ["lastNotNull", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [ + { "expr": "kibana_heap_used_bytes{namespace=~\"$namespace\"} / kibana_heap_size_limit_bytes{namespace=~\"$namespace\"}", "legendFormat": "Heap Usage %", "refId": "A" } + ], + "title": "Heap Usage Percentage", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 }, + "id": 102, + "panels": [], + "title": "Performance", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, "unit": "s" } + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 15 }, + "id": 20, + "options": { "legend": { "calcs": ["lastNotNull", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [ + { "expr": "kibana_response_time_seconds{namespace=~\"$namespace\", quantile=\"avg\"}", "legendFormat": "Avg Response Time", "refId": "A" }, + { "expr": "kibana_response_time_seconds{namespace=~\"$namespace\", quantile=\"max\"}", "legendFormat": "Max Response Time", "refId": "B" } + ], + "title": "Response Time", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, "unit": "s" } + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 15 }, + "id": 21, + "options": { "legend": { "calcs": ["lastNotNull", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [ + { "expr": "kibana_event_loop_delay_seconds{namespace=~\"$namespace\"}", "legendFormat": "Event Loop Delay", "refId": "A" } + ], + "title": "Event Loop Delay", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 23 }, + "id": 103, + "panels": [], + "title": "Requests", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, "unit": "reqps" } + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 24 }, + "id": 30, + "options": { "legend": { "calcs": ["lastNotNull", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [ + { "expr": "rate(kibana_requests_total{namespace=~\"$namespace\", status=\"total\"}[5m])", "legendFormat": "Total Requests/s", "refId": "A" }, + { "expr": "rate(kibana_requests_total{namespace=~\"$namespace\", status=\"disconnects\"}[5m])", "legendFormat": "Disconnects/s", "refId": "B" } + ], + "title": "Request Rate", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, "unit": "reqps" } + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 24 }, + "id": 31, + "options": { "legend": { "calcs": ["lastNotNull", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [ + { "expr": "rate(kibana_requests_total{namespace=~\"$namespace\", status=\"200\"}[5m])", "legendFormat": "200 OK", "refId": "A" }, + { "expr": "rate(kibana_requests_total{namespace=~\"$namespace\", status=\"302\"}[5m])", "legendFormat": "302 Redirect", "refId": "B" }, + { "expr": "rate(kibana_requests_total{namespace=~\"$namespace\", status=\"400\"}[5m])", "legendFormat": "400 Bad Request", "refId": "C" }, + { "expr": "rate(kibana_requests_total{namespace=~\"$namespace\", status=\"404\"}[5m])", "legendFormat": "404 Not Found", "refId": "D" }, + { "expr": "rate(kibana_requests_total{namespace=~\"$namespace\", status=\"500\"}[5m])", "legendFormat": "500 Error", "refId": "E" } + ], + "title": "Requests by Status Code", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 32 }, + "id": 104, + "panels": [], + "title": "System", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, "unit": "percent" } + }, + "gridPos": { "h": 8, "w": 8, "x": 0, "y": 33 }, + "id": 40, + "options": { "legend": { "calcs": ["lastNotNull", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [ + { "expr": "kibana_os_cpu_percent{namespace=~\"$namespace\"}", "legendFormat": "CPU %", "refId": "A" } + ], + "title": "OS CPU Usage", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, "unit": "short" } + }, + "gridPos": { "h": 8, "w": 8, "x": 8, "y": 33 }, + "id": 41, + "options": { "legend": { "calcs": ["lastNotNull", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [ + { "expr": "kibana_os_load_average_1m{namespace=~\"$namespace\"}", "legendFormat": "Load 1m", "refId": "A" }, + { "expr": "kibana_os_load_average_5m{namespace=~\"$namespace\"}", "legendFormat": "Load 5m", "refId": "B" }, + { "expr": "kibana_os_load_average_15m{namespace=~\"$namespace\"}", "legendFormat": "Load 15m", "refId": "C" } + ], + "title": "OS Load Average", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, "unit": "bytes" } + }, + "gridPos": { "h": 8, "w": 8, "x": 16, "y": 33 }, + "id": 42, + "options": { "legend": { "calcs": ["lastNotNull", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [ + { "expr": "kibana_os_memory_total_bytes{namespace=~\"$namespace\"}", "legendFormat": "Total", "refId": "A" }, + { "expr": "kibana_os_memory_used_bytes{namespace=~\"$namespace\"}", "legendFormat": "Used", "refId": "B" }, + { "expr": "kibana_os_memory_free_bytes{namespace=~\"$namespace\"}", "legendFormat": "Free", "refId": "C" } + ], + "title": "OS Memory", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": ["kibana", "elastic", "monitoring"], + "templating": { + "list": [ + { + "current": { "selected": false, "text": "Prometheus", "value": "prometheus" }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": ".*", + "current": { "selected": true, "text": "All", "value": "$__all" }, + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "definition": "label_values(kibana_up, namespace)", + "hide": 0, + "includeAll": true, + "multi": false, + "name": "namespace", + "options": [], + "query": { "query": "label_values(kibana_up, namespace)", "refId": "StandardVariableQuery" }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { "from": "now-1h", "to": "now" }, + "timepicker": {}, + "timezone": "", + "title": "Kibana Metrics", + "uid": "kibana-exporter-metrics", + "version": 1, + "weekStart": "" +} diff --git a/charts/common-services/dashboards/loki/loki-dashboard.json b/charts/common-services/dashboards/loki/loki-dashboard.json new file mode 100644 index 00000000..67f1c31a --- /dev/null +++ b/charts/common-services/dashboards/loki/loki-dashboard.json @@ -0,0 +1,284 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { "type": "grafana", "uid": "-- Grafana --" }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "id": 1, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] }, + "unit": "none" + } + }, + "gridPos": { "h": 4, "w": 6, "x": 0, "y": 1 }, + "id": 2, + "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(loki_panic_total{namespace=~\"$namespace\"})", "refId": "A" }], + "title": "Total Panics", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "percentunit" + } + }, + "gridPos": { "h": 4, "w": 6, "x": 6, "y": 1 }, + "id": 3, + "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(loki_querier_index_cache_hits_total{namespace=~\"$namespace\"}[5m])) / clamp_min(sum(rate(loki_querier_index_cache_gets_total{namespace=~\"$namespace\"}[5m])), 1e-10)", "refId": "A" }], + "title": "Cache Hit Ratio", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "short" + } + }, + "gridPos": { "h": 4, "w": 6, "x": 12, "y": 1 }, + "id": 4, + "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(loki_logql_querystats_ingester_sent_lines_total{namespace=~\"$namespace\"}[5m]))", "refId": "A" }], + "title": "Ingester Sent Lines/s", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "short" + } + }, + "gridPos": { "h": 4, "w": 6, "x": 18, "y": 1 }, + "id": 5, + "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(loki_experimental_features_in_use_total{namespace=~\"$namespace\"})", "refId": "A" }], + "title": "Experimental Features", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 }, + "id": 6, + "panels": [], + "title": "Query Statistics", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "short" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 }, + "id": 7, + "options": { "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(loki_logql_querystats_ingester_sent_lines_total{namespace=~\"$namespace\"}[5m]))", "legendFormat": "Sent Lines/s", "refId": "A" }], + "title": "Ingester Sent Lines Rate", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "short" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 }, + "id": 8, + "options": { "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(loki_logql_querystats_duplicates_total{namespace=~\"$namespace\"}[5m]))", "legendFormat": "Duplicates/s", "refId": "A" }], + "title": "Query Duplicates Rate", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 }, + "id": 9, + "panels": [], + "title": "Index Cache", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "short" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 15 }, + "id": 10, + "options": { "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(loki_querier_index_cache_gets_total{namespace=~\"$namespace\"}[5m]))", "legendFormat": "Gets/s", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(loki_querier_index_cache_hits_total{namespace=~\"$namespace\"}[5m]))", "legendFormat": "Hits/s", "refId": "B" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(loki_querier_index_cache_puts_total{namespace=~\"$namespace\"}[5m]))", "legendFormat": "Puts/s", "refId": "C" } + ], + "title": "Cache Operations Rate", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "percentunit" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 15 }, + "id": 11, + "options": { "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(loki_querier_index_cache_hits_total{namespace=~\"$namespace\"}[5m])) / clamp_min(sum(rate(loki_querier_index_cache_gets_total{namespace=~\"$namespace\"}[5m])), 1e-10)", "legendFormat": "Hit Ratio", "refId": "A" }], + "title": "Cache Hit Ratio Over Time", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 23 }, + "id": 12, + "panels": [], + "title": "Errors", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "short" + }, + "overrides": [{ "matcher": { "id": "byRegexp", "options": ".*" }, "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] }] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 24 }, + "id": 13, + "options": { "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(loki_querier_index_cache_corruptions_total{namespace=~\"$namespace\"}[5m]))", "legendFormat": "Corruptions/s", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(loki_querier_index_cache_encode_errors_total{namespace=~\"$namespace\"}[5m]))", "legendFormat": "Encode Errors/s", "refId": "B" } + ], + "title": "Cache Errors Rate", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "short" + }, + "overrides": [{ "matcher": { "id": "byRegexp", "options": ".*" }, "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] }] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 24 }, + "id": 14, + "options": { "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "sum(rate(loki_panic_total{namespace=~\"$namespace\"}[5m]))", "legendFormat": "Panics/s", "refId": "A" }], + "title": "Panic Rate", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": ["loki", "logging"], + "templating": { + "list": [ + { + "current": {}, + "hide": 0, + "includeAll": false, + "label": "Datasource", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": {}, + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "definition": "label_values(loki_panic_total, namespace)", + "hide": 0, + "includeAll": true, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": { "query": "label_values(loki_panic_total, namespace)", "refId": "StandardVariableQuery" }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { "from": "now-1h", "to": "now" }, + "timepicker": {}, + "timezone": "browser", + "title": "Loki Metrics Overview", + "uid": "loki-custom-001", + "version": 1 +} diff --git a/charts/common-services/dashboards/loki/loki-operational-dashboard.json b/charts/common-services/dashboards/loki/loki-operational-dashboard.json new file mode 100644 index 00000000..c6071c33 --- /dev/null +++ b/charts/common-services/dashboards/loki/loki-operational-dashboard.json @@ -0,0 +1,7281 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + }, + { + "datasource": "$loki_datasource", + "enable": true, + "expr": "{cluster=\"$cluster\", container=\"kube-diff-logger\"} | json | namespace_extracted=\"$namespace\"", + "hide": true, + "iconColor": "rgba(255, 96, 96, 1)", + "name": "deployments", + "showIn": 0, + "target": {} + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "iteration": 1588704280892, + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 17, + "panels": [], + "title": "Main", + "type": "row" + }, + { + "aliasColors": { + "5xx": "red" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 1 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"$namespace/cortex-gw(-internal)?\", route=~\"api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\")\n)", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Queries/Second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 10, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "5xx": "red" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 1 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=\"$cluster\", job=~\"$namespace/cortex-gw(-internal)?\", route=~\"api_prom_push|loki_api_v1_push|otlp_v1_logs\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Pushes/Second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 10, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {}, + "unit": "ops" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 1 + }, + "hiddenSeries": false, + "id": 11, + "legend": { + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(5, sum by (name,level) (rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\"}[$__interval])) - \nsum by (name,level) (rate(promtail_custom_bad_words_total{cluster=\"$cluster\", exported_namespace=\"$namespace\"}[$__interval] offset 1h)))", + "legendFormat": "{{name}}-{{level}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Bad Words", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 1 + }, + "hiddenSeries": false, + "id": 2, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (tenant))", + "legendFormat": "{{tenant}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Lines Per Tenant (top 10)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {}, + "unit": "MBs" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 1 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (tenant)) / 1024 / 1024", + "legendFormat": "{{tenant}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "MBs Per Tenant (Top 10)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 1 + }, + "hiddenSeries": false, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "increase(kube_pod_container_status_restarts_total{cluster=\"$cluster\", namespace=\"$namespace\"}[10m]) > 0", + "hide": false, + "interval": "", + "legendFormat": "{{container}}-{{pod}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Container Restarts", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {}, + "unit": "ms" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 6 + }, + "hiddenSeries": false, + "id": 9, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"$namespace/cortex-gw(-internal)?\", route=~\"api_prom_push|loki_api_v1_push|otlp_v1_logs\", cluster=~\"$cluster\"})) * 1e3", + "legendFormat": ".99", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.75, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"$namespace/cortex-gw(-internal)?\", route=~\"api_prom_push|loki_api_v1_push|otlp_v1_logs\", cluster=~\"$cluster\"})) * 1e3", + "legendFormat": ".9", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.5, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"$namespace/cortex-gw(-internal)?\", route=~\"api_prom_push|loki_api_v1_push|otlp_v1_logs\", cluster=~\"$cluster\"})) * 1e3", + "legendFormat": ".5", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Push Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {}, + "unit": "ms" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 6 + }, + "hiddenSeries": false, + "id": 12, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", cluster=~\"$cluster\"})) * 1e3", + "legendFormat": ".99", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.9, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", cluster=~\"$cluster\"})) * 1e3", + "legendFormat": ".9", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.5, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/distributor\", cluster=~\"$cluster\"})) * 1e3", + "legendFormat": ".5", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Distributor Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 6 + }, + "hiddenSeries": false, + "id": 71, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/distributor\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/distributor\"}[$__rate_interval])) by (route) > 0", + "interval": "", + "legendFormat": "{{route}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Distributor Success Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {}, + "unit": "ms" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 11 + }, + "hiddenSeries": false, + "id": 13, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", + "legendFormat": ".99", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.9, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", + "hide": false, + "legendFormat": ".9", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.5, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/ingester\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", + "hide": false, + "legendFormat": ".5", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Ingester Latency Write", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 11 + }, + "hiddenSeries": false, + "id": 72, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/ingester\", status_code!~\"5[0-9]{2}\", route=\"/logproto.Pusher/Push\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/ingester\", route=\"/logproto.Pusher/Push\"}[$__rate_interval])) by (route) > 0", + "interval": "", + "legendFormat": "{{route}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Ingester Success Rate Write", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {}, + "unit": "ms" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 16 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=\"$namespace/querier\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))", + "legendFormat": "{{route}}-.99", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.9, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=\"$namespace/querier\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))", + "legendFormat": "{{route}}-.9", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.5, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=\"$namespace/querier\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))", + "legendFormat": "{{route}}-.5", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Query Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {}, + "unit": "ms" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 16 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=\"$namespace/querier\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3", + "legendFormat": ".99-{{route}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.9, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=\"$namespace/querier\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3", + "legendFormat": ".9-{{route}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.5, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=\"$namespace/querier\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3", + "legendFormat": ".5-{{route}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Querier Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 16 + }, + "hiddenSeries": false, + "id": 73, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/querier\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/querier\"}[$__rate_interval])) by (route) > 0", + "interval": "", + "legendFormat": "{{route}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Querier Success Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "unit": "ms" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 21 + }, + "hiddenSeries": false, + "id": 15, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=\"$namespace/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3", + "legendFormat": ".99-{{route}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.9, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=\"$namespace/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3", + "legendFormat": ".9-{{route}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.5, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=\"$namespace/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3", + "legendFormat": ".5-{{route}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Ingester Latency Read", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 21 + }, + "hiddenSeries": false, + "id": 74, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/ingester\", status_code!~\"5[0-9]{2}\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/ingester\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (route) > 0", + "interval": "", + "legendFormat": "{{route}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Ingester Success Rate Read", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 110, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 27 + }, + "hiddenSeries": false, + "id": 112, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10,sum by (tenant, reason) (rate(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])))", + "interval": "", + "legendFormat": "{{ tenant }} - {{ reason }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Discarded Lines", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "right", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "decimals": 2, + "displayName": "", + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tenant" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "reason" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 27 + }, + "id": 113, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "panels": [], + "pluginVersion": "10.4.0", + "targets": [ + { + "expr": "topk(10, sum by (tenant, reason) (sum_over_time(increase(loki_discarded_samples_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])[$__range:$__rate_interval])))", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "{{ tenant }} - {{ reason }}", + "refId": "A" + } + ], + "title": "Discarded Lines Per Interval", + "transformations": [ + { + "id": "merge", + "options": { + "reducers": [] + } + } + ], + "type": "table" + } + ], + "title": "Limits", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 23, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 28 + }, + "hiddenSeries": false, + "id": 26, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\".*distributor.*\"}[$__rate_interval]))", + "intervalFactor": 3, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "binBps" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 28 + }, + "hiddenSeries": false, + "id": 27, + "legend": { + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container=~\".*distributor.*\"}", + "instant": false, + "intervalFactor": 3, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$loki_datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 4, + "w": 12, + "x": 12, + "y": 28 + }, + "hiddenSeries": false, + "id": 31, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "{}", + "color": "#C4162A" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/distributor\"} | logfmt | level=\"error\"[$__auto]))", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Error Log Rate", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "datasource": "$loki_datasource", + "gridPos": { + "h": 18, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 29, + "options": { + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "targets": [ + { + "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/distributor\"} |= \"level=error\"", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Logs", + "type": "logs" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 35 + }, + "hiddenSeries": false, + "id": 33, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/distributor\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/distributor\"}[$__rate_interval])) by (route) > 0", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{route}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Success Rate", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "binBps" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 35 + }, + "hiddenSeries": false, + "id": 32, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_distributor_ingester_append_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Append Failures By Ingester", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "binBps" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 42 + }, + "hiddenSeries": false, + "id": 34, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_distributor_bytes_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Bytes Received/Second", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "ops" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 42 + }, + "hiddenSeries": false, + "id": 35, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_distributor_lines_received_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Lines Received/Second", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Distributor", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 28 + }, + "id": 19, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 29 + }, + "hiddenSeries": false, + "id": 36, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\".*ingester.*\"}[$__rate_interval]))", + "intervalFactor": 3, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "binBps" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 36 + }, + "hiddenSeries": false, + "id": 37, + "legend": { + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container=~\".*ingester.*\"}", + "instant": false, + "intervalFactor": 3, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$loki_datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 3, + "w": 18, + "x": 12, + "y": 29 + }, + "hiddenSeries": false, + "id": 38, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "{}", + "color": "#F2495C" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/ingester\"} | logfmt | level=\"error\"[$__auto]))", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Error Log Rate", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "datasource": "$loki_datasource", + "gridPos": { + "h": 18, + "w": 18, + "x": 12, + "y": 32 + }, + "id": 39, + "options": { + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "targets": [ + { + "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/ingester\"} |= \"level=error\"", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Logs", + "type": "logs" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 41 + }, + "hiddenSeries": false, + "id": 67, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/ingester\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/ingester\"}[$__rate_interval])) by (route) > 0", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{route}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Success Rate", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Ingester", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 104, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 30 + }, + "hiddenSeries": false, + "id": 106, + "legend": { + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10,sum by (tenant) (loki_ingester_memory_streams{cluster=\"$cluster\",job=\"$namespace/ingester\"}))", + "interval": "", + "legendFormat": "{{ tenant }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Active Streams", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 30 + }, + "hiddenSeries": false, + "id": 108, + "legend": { + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, sum by (tenant) (rate(loki_ingester_streams_created_total{cluster=\"$cluster\",job=\"$namespace/ingester\"}[$__rate_interval]) > 0))", + "interval": "", + "legendFormat": "{{ tenant }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Streams Created/Sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Streams", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 94, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 31 + }, + "hiddenSeries": false, + "id": 102, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "De-Dupe Ratio", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster=\"$cluster\",job=\"$namespace/ingester\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Chunks", + "refId": "A" + }, + { + "expr": "sum(increase(loki_chunk_store_deduped_chunks_total{cluster=\"$cluster\", job=\"$namespace/ingester\"}[$__rate_interval]))/sum(increase(loki_ingester_chunks_flushed_total{cluster=\"$cluster\", job=\"$namespace/ingester\"}[$__rate_interval])) < 1", + "interval": "", + "legendFormat": "De-Dupe Ratio", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Chunks Flushed/Sec", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "$datasource", + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 31 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 100, + "legend": { + "show": true + }, + "reverseYBuckets": false, + "targets": [ + { + "expr": "sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=\"$cluster\",job=\"$namespace/ingester\"}[$__rate_interval])) by (le)", + "format": "heatmap", + "instant": false, + "interval": "", + "legendFormat": "{{ le }}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Chunk Size Bytes", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "bytes", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 7, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 39 + }, + "hiddenSeries": false, + "id": 96, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(reason) (rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", namespace=~\"$namespace\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"($namespace)/ingester\", namespace=~\"$namespace\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "{{ reason }}" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Chunk Flush Reason %", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "max": null, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "$datasource", + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 39 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 98, + "legend": { + "show": true + }, + "reverseYBuckets": false, + "targets": [ + { + "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=\"$cluster\", job=\"$namespace/ingester\"}[$__rate_interval]))", + "format": "heatmap", + "instant": false, + "interval": "", + "legendFormat": "{{ le }}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Chunk Utilization", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "percentunit", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + } + ], + "title": "Chunks", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 64, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 32 + }, + "hiddenSeries": false, + "id": 68, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\".*querier.*\"}[$__rate_interval]))", + "intervalFactor": 3, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "binBps" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 39 + }, + "hiddenSeries": false, + "id": 69, + "legend": { + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container=~\".*querier.*\"}", + "instant": false, + "intervalFactor": 3, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$loki_datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 3, + "w": 18, + "x": 12, + "y": 32 + }, + "hiddenSeries": false, + "id": 65, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "{}", + "color": "#F2495C" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/querier\"} | logfmt | level=\"error\"[$__auto]))", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Error Log Rate", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "datasource": "$loki_datasource", + "gridPos": { + "h": 18, + "w": 18, + "x": 12, + "y": 35 + }, + "id": 66, + "options": { + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "targets": [ + { + "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/querier\"} |= \"level=error\"", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Logs", + "type": "logs" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 46 + }, + "hiddenSeries": false, + "id": 70, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/querier\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/querier\"}[$__rate_interval])) by (route) > 0", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{route}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Success Rate", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Querier", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 64, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 32 + }, + "hiddenSeries": false, + "id": 68, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\".*querier.*\"}[$__rate_interval]))", + "intervalFactor": 3, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "binBps" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 39 + }, + "hiddenSeries": false, + "id": 69, + "legend": { + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container=~\".*backend.*\"}", + "instant": false, + "intervalFactor": 3, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$loki_datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 3, + "w": 18, + "x": 12, + "y": 32 + }, + "hiddenSeries": false, + "id": 65, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "{}", + "color": "#F2495C" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/backend\"} | logfmt | level=\"error\"[$__auto]))", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Error Log Rate", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "datasource": "$loki_datasource", + "gridPos": { + "h": 18, + "w": 18, + "x": 12, + "y": 35 + }, + "id": 66, + "options": { + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "targets": [ + { + "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/backend\"} |= \"level=error\"", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Logs", + "type": "logs" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 46 + }, + "hiddenSeries": false, + "id": 70, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/backend\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/backend\"}[$__rate_interval])) by (route) > 0", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{route}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Success Rate", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Backend Path", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 52, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "s" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 30 + }, + "hiddenSeries": false, + "id": 53, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (method, name, le, container))", + "intervalFactor": 1, + "legendFormat": "{{container}}: .99-{{method}}-{{name}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (method, name, le, container))", + "hide": false, + "legendFormat": "{{container}}: .9-{{method}}-{{name}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (method, name, le, container))", + "hide": false, + "legendFormat": "{{container}}: .5-{{method}}-{{name}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Latency By Method", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 38 + }, + "hiddenSeries": false, + "id": 54, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_memcache_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, method, name, container)", + "intervalFactor": 1, + "legendFormat": "{{container}}: {{status_code}}-{{method}}-{{name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Status By Method", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Memcached", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 57, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "s" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 31 + }, + "hiddenSeries": false, + "id": 55, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "intervalFactor": 1, + "legendFormat": ".99-{{operation}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "hide": false, + "legendFormat": ".9-{{operation}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "hide": false, + "legendFormat": ".5-{{operation}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Latency By Operation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "ops" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 39 + }, + "hiddenSeries": false, + "id": 58, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_consul_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, status_code, method)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}-{{operation}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Status By Operation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Consul", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 34 + }, + "id": 43, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "s" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 9 + }, + "hiddenSeries": false, + "id": 41, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[$__rate_interval])) by (operation, le))", + "intervalFactor": 1, + "legendFormat": ".9", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[$__rate_interval])) by (operation, le))", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[$__rate_interval])) by (operation, le))", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "MutateRows Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "s" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 9 + }, + "hiddenSeries": false, + "id": 46, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[$__rate_interval])) by (operation, le))", + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[$__rate_interval])) by (operation, le))", + "interval": "", + "legendFormat": "90%", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[$__rate_interval])) by (operation, le))", + "interval": "", + "legendFormat": "50%", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "ReadRows Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "s" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 9 + }, + "hiddenSeries": false, + "id": 44, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[$__rate_interval])) by (operation, le))", + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[$__rate_interval])) by (operation, le))", + "interval": "", + "legendFormat": "90%", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[$__rate_interval])) by (operation, le))", + "interval": "", + "legendFormat": "50%", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "GetTable Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "s" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 9 + }, + "hiddenSeries": false, + "id": 45, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[$__rate_interval])) by (operation, le))", + "intervalFactor": 1, + "legendFormat": ".9", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[$__rate_interval])) by (operation, le))", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[$__rate_interval])) by (operation, le))", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "ListTables Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "ops" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 16 + }, + "hiddenSeries": false, + "id": 47, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[$__rate_interval])) by (status_code)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "MutateRows Status", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "ops" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 16 + }, + "hiddenSeries": false, + "id": 50, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[$__rate_interval])) by (status_code)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "ReadRows Status", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "ops" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 16 + }, + "hiddenSeries": false, + "id": 48, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[$__rate_interval])) by (status_code)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "GetTable Status", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "ops" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 16 + }, + "hiddenSeries": false, + "id": 49, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[$__rate_interval])) by (status_code)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "ListTables Status", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Big Table", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 35 + }, + "id": 60, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "s" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 33 + }, + "hiddenSeries": false, + "id": 61, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "intervalFactor": 1, + "legendFormat": ".99-{{operation}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "hide": false, + "legendFormat": ".9-{{operation}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "hide": false, + "legendFormat": ".5-{{operation}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Latency By Operation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 41 + }, + "hiddenSeries": false, + "id": 62, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_gcs_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}-{{operation}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Status By Method", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "GCS", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 76, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 9 + }, + "id": 82, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Failure Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 9 + }, + "id": 83, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Consumed Capacity Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 9 + }, + "id": 84, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Throttled Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 9 + }, + "id": 85, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Dropped Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 15 + }, + "id": 86, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))", + "legendFormat": ".99", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))", + "legendFormat": ".9", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))", + "legendFormat": ".5", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Query Pages", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "s" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 9, + "x": 6, + "y": 15 + }, + "id": 87, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "intervalFactor": 1, + "legendFormat": ".99-{{operation}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "hide": false, + "legendFormat": ".9-{{operation}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "hide": false, + "legendFormat": ".5-{{operation}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Latency By Operation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 9, + "x": 15, + "y": 15 + }, + "id": 88, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}-{{operation}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Status By Method", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Dynamo", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 37 + }, + "id": 78, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "s" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 79, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "intervalFactor": 1, + "legendFormat": ".99-{{operation}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "hide": false, + "legendFormat": ".9-{{operation}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "hide": false, + "legendFormat": ".5-{{operation}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Latency By Operation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 80, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_s3_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}-{{operation}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Status By Method", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "S3", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 37 + }, + "id": 78, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "s" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 79, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "intervalFactor": 1, + "legendFormat": ".99-{{operation}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "hide": false, + "legendFormat": ".9-{{operation}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "hide": false, + "legendFormat": ".5-{{operation}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Latency By Operation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 80, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_azure_blob_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}-{{operation}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Status By Method", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Azure Blob", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 90, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "s" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 91, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_cassandra_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "intervalFactor": 1, + "legendFormat": ".99-{{operation}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_cassandra_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "hide": false, + "legendFormat": ".9-{{operation}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_cassandra_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "hide": false, + "legendFormat": ".5-{{operation}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Latency By Operation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 44 + }, + "id": 92, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_cassandra_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}-{{operation}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Status By Method", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Cassandra", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 37 + }, + "id": 114, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "s" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 115, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "intervalFactor": 1, + "legendFormat": ".99-{{operation}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(.9, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "hide": false, + "legendFormat": ".9-{{operation}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(.5, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (operation, le))", + "hide": false, + "legendFormat": ".5-{{operation}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Latency By Operation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 116, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (status_code, operation)", + "intervalFactor": 1, + "legendFormat": "{{status_code}}-{{operation}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Status By Method", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "BoltDB Shipper", + "type": "row" + } + ], + "refresh": "30s", + "schemaVersion": 25, + "style": "dark", + "tags": [], + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "$__rate_interval", + "$__rate_interval", + "1$__rate_interval", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Loki Operational", + "uid": "f6fe30815b172c9da7e813c15ddfe607", + "version": 1 + } + \ No newline at end of file diff --git a/charts/common-services/dashboards/opensearch/opensearch-dashboard.json b/charts/common-services/dashboards/opensearch/opensearch-dashboard.json new file mode 100644 index 00000000..c3f2d89d --- /dev/null +++ b/charts/common-services/dashboards/opensearch/opensearch-dashboard.json @@ -0,0 +1,2631 @@ +{ + "__inputs": [], + "__requires": [], + "annotations": { + "list": [] + }, + "editable": false, + "gnetId": 20827, + "graphTooltip": 1, + "hideControls": false, + "id": null, + "links": [], + "refresh": "", + "rows": [ + { + "collapse": false, + "collapsed": false, + "height": "100", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(255, 166, 0, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": {}, + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(opensearch_cluster_status{cluster=\"$cluster\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "1,2", + "title": "Cluster status", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "GREEN", + "value": "0" + }, + { + "op": "=", + "text": "YELLOW", + "value": "1" + }, + { + "op": "=", + "text": "RED", + "value": "2" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "fillGradient": 0, + "gridPos": {}, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": true, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "GREEN", + "color": "rgba(50, 172, 45, 0.97)" + }, + { + "alias": "YELLOW", + "color": "rgba(255, 166, 0, 0.89)" + }, + { + "alias": "RED", + "color": "rgba(245, 54, 54, 0.9)" + } + ], + "spaceLength": 10, + "span": 4, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(opensearch_cluster_status{cluster=\"$cluster\"} == 0) + 1", + "format": "time_series", + "intervalFactor": 10, + "legendFormat": "GREEN", + "refId": "A" + }, + { + "expr": "(opensearch_cluster_status{cluster=\"$cluster\"} == 1)", + "format": "time_series", + "intervalFactor": 10, + "legendFormat": "YELLOW", + "refId": "B" + }, + { + "expr": "(opensearch_cluster_status{cluster=\"$cluster\"} == 2) - 1", + "format": "time_series", + "intervalFactor": 10, + "legendFormat": "RED", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": null, + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": false + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": {}, + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(opensearch_cluster_nodes_number{cluster=\"$cluster\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Nodes", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": {}, + "id": 5, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(opensearch_cluster_datanodes_number{cluster=\"$cluster\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Data nodes", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": {}, + "id": 6, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(opensearch_cluster_pending_tasks_number{cluster=\"$cluster\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Pending tasks", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Cluster", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "height": "200", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": {}, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "shard_type", + "repeatDirection": "h", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2.4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(opensearch_cluster_shards_number{cluster=\"$cluster\",type=\"$shard_type\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$shard_type shards", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Shards", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "height": "200", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": {}, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "pool_name", + "repeatDirection": "h", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2.4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(opensearch_threadpool_tasks_number{cluster=\"$cluster\",name=\"$pool_name\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$pool_name tasks", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Threadpools", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "height": "400", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": {}, + "id": 9, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "opensearch_os_cpu_percent{cluster=\"$cluster\", node=~\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": 100, + "min": 0, + "show": true + }, + { + "format": "percent", + "label": null, + "logBase": 1, + "max": 100, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": {}, + "id": 10, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "opensearch_os_mem_used_bytes{cluster=\"$cluster\", node=~\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": {}, + "id": 11, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "1 - opensearch_fs_path_available_bytes{cluster=\"$cluster\",node=~\"$node\"} / opensearch_fs_path_total_bytes{cluster=\"$cluster\",node=~\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}} - {{path}}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "custom", + "fill": true, + "fillColor": "rgba(216, 200, 27, 0.27)", + "op": "gt", + "value": 0.8 + }, + { + "colorMode": "custom", + "fill": true, + "fillColor": "rgba(234, 112, 112, 0.22)", + "op": "gt", + "value": 0.9 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "System", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "height": "400", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": {}, + "id": 12, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(opensearch_indices_indexing_index_count{cluster=\"$cluster\", node=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Documents indexing rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": {}, + "id": 13, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(opensearch_indices_indexing_index_time_seconds{cluster=\"$cluster\", node=~\"$node\"}[$interval]) / rate(opensearch_indices_indexing_index_count{cluster=\"$cluster\", node=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Indexing latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": {}, + "id": 14, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(opensearch_indices_search_query_count{cluster=\"$cluster\", node=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Search rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": {}, + "id": 15, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(opensearch_indices_search_query_time_seconds{cluster=\"$cluster\", node=~\"$node\"}[$interval]) / rate(opensearch_indices_search_query_count{cluster=\"$cluster\", node=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Search latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 3, + "fillGradient": 0, + "gridPos": {}, + "id": 16, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "opensearch_indices_doc_number{cluster=\"$cluster\", node=~\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Documents count (with replicas)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": {}, + "id": 17, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(opensearch_indices_doc_deleted_number{cluster=\"$cluster\", node=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Documents deleting rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": {}, + "id": 18, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(opensearch_indices_merges_total_docs_count{cluster=\"$cluster\",node=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Documents merging rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Documents and Latencies", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "height": "400", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": {}, + "id": 19, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "opensearch_indices_fielddata_memory_size_bytes{cluster=\"$cluster\", node=~\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Field data memory size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": {}, + "id": 20, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(opensearch_indices_fielddata_evictions_count{cluster=\"$cluster\", node=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Field data evictions", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": {}, + "id": 21, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "opensearch_indices_querycache_cache_size_bytes{cluster=\"$cluster\", node=~\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Query cache size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": {}, + "id": 22, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(opensearch_indices_querycache_evictions_count{cluster=\"$cluster\", node=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Query cache evictions", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": {}, + "id": 23, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(opensearch_indices_querycache_hit_count{cluster=\"$cluster\", node=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Query cache hits", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": {}, + "id": 24, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(opensearch_indices_querycache_miss_number{cluster=\"$cluster\", node=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Query cache misses", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Caches", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "height": "400", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": {}, + "id": 25, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(opensearch_indices_indexing_throttle_time_seconds{cluster=\"$cluster\", node=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Indexing throttling", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": {}, + "id": 26, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(opensearch_indices_merges_total_throttled_time_seconds{cluster=\"$cluster\", node=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Merging throttling", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Throttling", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "height": "400", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": {}, + "id": 27, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "opensearch_jvm_mem_heap_used_bytes{cluster=\"$cluster\", node=~\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}} - heap used", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Heap used", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": {}, + "id": 28, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(opensearch_jvm_gc_collection_count{cluster=\"$cluster\",node=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}} - {{gc}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "GC count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": {}, + "id": 29, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(opensearch_jvm_gc_collection_time_seconds{cluster=\"$cluster\", node=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}} - {{gc}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "GC time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "JVM", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "tags": [], + "text": "1m", + "value": "1m" + }, + "datasource": "prometheus", + "hide": 0, + "includeAll": false, + "label": "Interval", + "multi": false, + "name": "interval", + "options": [ + { + "selected": false, + "text": "15s", + "value": "15s" + }, + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": true, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + } + ], + "query": "15s, 30s, 1m, 5m, 1h, 6h, 1d", + "refresh": 0, + "type": "custom" + }, + { + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Cluster", + "name": "cluster", + "query": "label_values(opensearch_cluster_status, cluster)", + "refresh": 1, + "regex": "", + "sort": 1, + "type": "query" + }, + { + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "Node", + "name": "node", + "query": "label_values(opensearch_jvm_uptime_seconds{cluster=\"$cluster\"}, node)", + "refresh": 1, + "regex": "", + "sort": 1, + "type": "query" + }, + { + "datasource": "$datasource", + "hide": 2, + "includeAll": true, + "label": "Shard", + "name": "shard_type", + "query": "label_values(opensearch_cluster_shards_number, type)", + "refresh": 1, + "regex": "", + "sort": 1, + "type": "query" + }, + { + "datasource": "$datasource", + "hide": 2, + "includeAll": true, + "label": "Threadpool Type name", + "name": "pool_name", + "query": "label_values(opensearch_threadpool_tasks_number, name)", + "refresh": 1, + "regex": "", + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "OpenSearch Prometheus Exporter (Aiven Plugin)", + "version": 0, + "description": "This dashboard is the result of the build of the mixin dashboards published by the OpenSearch Prometheus Exporter plugin by Aiven (https://github.com/Aiven-Open/prometheus-exporter-plugin-for-opensearch/tree/mixin)" +} \ No newline at end of file diff --git a/charts/common-services/dashboards/postgresql/postgresql-dashboard.json b/charts/common-services/dashboards/postgresql/postgresql-dashboard.json new file mode 100644 index 00000000..f78f9d37 --- /dev/null +++ b/charts/common-services/dashboards/postgresql/postgresql-dashboard.json @@ -0,0 +1,494 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "PostgreSQL database monitoring dashboard", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, + "id": 100, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "id": 1, + "type": "stat", + "title": "Status", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 3, "x": 0, "y": 1}, + "targets": [ + { + "expr": "pg_up{namespace=~\"$namespace\"}", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"mappings": [{"options": {"0": {"text": "DOWN"}, "1": {"text": "UP"}}, "type": "value"}], "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "green", "value": 1}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "background", "graphMode": "none", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 2, + "type": "stat", + "title": "Active Connections", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 4, "x": 3, "y": 1}, + "targets": [ + { + "expr": "sum(pg_stat_database_numbackends{namespace=~\"$namespace\", datname=~\"$database\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 50}, {"color": "red", "value": 100}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "area", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 3, + "type": "stat", + "title": "Database Size", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 4, "x": 7, "y": 1}, + "targets": [ + { + "expr": "sum(pg_database_size_bytes{namespace=~\"$namespace\", datname=~\"$database\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "bytes", "thresholds": {"mode": "absolute", "steps": [{"color": "blue", "value": null}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "none", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 4, + "type": "stat", + "title": "Transactions/s", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 4, "x": 11, "y": 1}, + "targets": [ + { + "expr": "sum(rate(pg_stat_database_xact_commit{namespace=~\"$namespace\", datname=~\"$database\"}[5m])) + sum(rate(pg_stat_database_xact_rollback{namespace=~\"$namespace\", datname=~\"$database\"}[5m]))", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "ops", "thresholds": {"mode": "absolute", "steps": [{"color": "blue", "value": null}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "area", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 5, + "type": "stat", + "title": "Deadlocks", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 3, "x": 15, "y": 1}, + "targets": [ + { + "expr": "sum(increase(pg_stat_database_deadlocks{namespace=~\"$namespace\", datname=~\"$database\"}[1h]))", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 1}, {"color": "red", "value": 10}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "none", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 6, + "type": "stat", + "title": "Locks", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 3, "x": 18, "y": 1}, + "targets": [ + { + "expr": "sum(pg_locks_count{namespace=~\"$namespace\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 100}, {"color": "red", "value": 500}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "area", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 7, + "type": "stat", + "title": "WAL Size", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 3, "x": 21, "y": 1}, + "targets": [ + { + "expr": "pg_wal_size_bytes{namespace=~\"$namespace\"}", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "bytes", "thresholds": {"mode": "absolute", "steps": [{"color": "blue", "value": null}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "none", "justifyMode": "center", "textMode": "value"} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 5}, + "id": 101, + "panels": [], + "title": "Transactions", + "type": "row" + }, + { + "id": 8, + "type": "timeseries", + "title": "Transactions Rate", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 6}, + "targets": [ + { + "expr": "sum by (datname) (rate(pg_stat_database_xact_commit{namespace=~\"$namespace\", datname=~\"$database\"}[5m]))", + "legendFormat": "{{datname}} commits", + "refId": "A" + }, + { + "expr": "sum by (datname) (rate(pg_stat_database_xact_rollback{namespace=~\"$namespace\", datname=~\"$database\"}[5m]))", + "legendFormat": "{{datname}} rollbacks", + "refId": "B" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 9, + "type": "timeseries", + "title": "Tuple Operations", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 6}, + "targets": [ + { + "expr": "sum by (datname) (rate(pg_stat_database_tup_inserted{namespace=~\"$namespace\", datname=~\"$database\"}[5m]))", + "legendFormat": "{{datname}} inserted", + "refId": "A" + }, + { + "expr": "sum by (datname) (rate(pg_stat_database_tup_updated{namespace=~\"$namespace\", datname=~\"$database\"}[5m]))", + "legendFormat": "{{datname}} updated", + "refId": "B" + }, + { + "expr": "sum by (datname) (rate(pg_stat_database_tup_deleted{namespace=~\"$namespace\", datname=~\"$database\"}[5m]))", + "legendFormat": "{{datname}} deleted", + "refId": "C" + }, + { + "expr": "sum by (datname) (rate(pg_stat_database_tup_fetched{namespace=~\"$namespace\", datname=~\"$database\"}[5m]))", + "legendFormat": "{{datname}} fetched", + "refId": "D" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 14}, + "id": 102, + "panels": [], + "title": "Connections & Sessions", + "type": "row" + }, + { + "id": 10, + "type": "timeseries", + "title": "Connections by Database", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 15}, + "targets": [ + { + "expr": "pg_stat_database_numbackends{namespace=~\"$namespace\", datname=~\"$database\"}", + "legendFormat": "{{datname}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "short", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 11, + "type": "timeseries", + "title": "Active Sessions by State", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 15}, + "targets": [ + { + "expr": "sum by (state) (pg_stat_activity_count{namespace=~\"$namespace\", datname=~\"$database\"})", + "legendFormat": "{{state}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "short", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 23}, + "id": 103, + "panels": [], + "title": "Buffer & Cache", + "type": "row" + }, + { + "id": 12, + "type": "timeseries", + "title": "Cache Hit Ratio", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 24}, + "targets": [ + { + "expr": "sum(pg_stat_database_blks_hit{namespace=~\"$namespace\", datname=~\"$database\"}) / clamp_min(sum(pg_stat_database_blks_hit{namespace=~\"$namespace\", datname=~\"$database\"}) + sum(pg_stat_database_blks_read{namespace=~\"$namespace\", datname=~\"$database\"}), 1) * 100", + "legendFormat": "cache hit %", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "percent", "min": 0, "max": 100, "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "yellow", "value": 90}, {"color": "green", "value": 99}]}}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 13, + "type": "timeseries", + "title": "Block I/O", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 24}, + "targets": [ + { + "expr": "sum by (datname) (rate(pg_stat_database_blks_read{namespace=~\"$namespace\", datname=~\"$database\"}[5m]))", + "legendFormat": "{{datname}} reads", + "refId": "A" + }, + { + "expr": "sum by (datname) (rate(pg_stat_database_blks_hit{namespace=~\"$namespace\", datname=~\"$database\"}[5m]))", + "legendFormat": "{{datname}} hits", + "refId": "B" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 32}, + "id": 104, + "panels": [], + "title": "Checkpoints & BGWriter", + "type": "row" + }, + { + "id": 14, + "type": "timeseries", + "title": "Checkpoints", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 33}, + "targets": [ + { + "expr": "rate(pg_stat_bgwriter_checkpoints_timed_total{namespace=~\"$namespace\"}[5m])", + "legendFormat": "timed", + "refId": "A" + }, + { + "expr": "rate(pg_stat_bgwriter_checkpoints_req_total{namespace=~\"$namespace\"}[5m])", + "legendFormat": "requested", + "refId": "B" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 15, + "type": "timeseries", + "title": "Buffers Written", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 33}, + "targets": [ + { + "expr": "rate(pg_stat_bgwriter_buffers_checkpoint_total{namespace=~\"$namespace\"}[5m])", + "legendFormat": "checkpoint", + "refId": "A" + }, + { + "expr": "rate(pg_stat_bgwriter_buffers_clean_total{namespace=~\"$namespace\"}[5m])", + "legendFormat": "bgwriter", + "refId": "B" + }, + { + "expr": "rate(pg_stat_bgwriter_buffers_backend_total{namespace=~\"$namespace\"}[5m])", + "legendFormat": "backend", + "refId": "C" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 41}, + "id": 105, + "panels": [], + "title": "Locks & Conflicts", + "type": "row" + }, + { + "id": 16, + "type": "timeseries", + "title": "Locks by Mode", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 42}, + "targets": [ + { + "expr": "sum by (mode) (pg_locks_count{namespace=~\"$namespace\"})", + "legendFormat": "{{mode}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "short", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 17, + "type": "timeseries", + "title": "Deadlocks & Conflicts", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 42}, + "targets": [ + { + "expr": "rate(pg_stat_database_deadlocks{namespace=~\"$namespace\", datname=~\"$database\"}[5m])", + "legendFormat": "{{datname}} deadlocks", + "refId": "A" + }, + { + "expr": "rate(pg_stat_database_conflicts{namespace=~\"$namespace\", datname=~\"$database\"}[5m])", + "legendFormat": "{{datname}} conflicts", + "refId": "B" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 50}, + "id": 106, + "panels": [], + "title": "Temp Files & WAL", + "type": "row" + }, + { + "id": 18, + "type": "timeseries", + "title": "Temporary Files", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 51}, + "targets": [ + { + "expr": "rate(pg_stat_database_temp_files{namespace=~\"$namespace\", datname=~\"$database\"}[5m])", + "legendFormat": "{{datname}} files/s", + "refId": "A" + }, + { + "expr": "rate(pg_stat_database_temp_bytes{namespace=~\"$namespace\", datname=~\"$database\"}[5m])", + "legendFormat": "{{datname}} bytes/s", + "refId": "B" + } + ], + "fieldConfig": {"defaults": {"unit": "short", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 19, + "type": "timeseries", + "title": "WAL Size Over Time", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 51}, + "targets": [ + { + "expr": "pg_wal_size_bytes{namespace=~\"$namespace\"}", + "legendFormat": "WAL size", + "refId": "A" + }, + { + "expr": "pg_wal_segments{namespace=~\"$namespace\"}", + "legendFormat": "WAL segments", + "refId": "B" + } + ], + "fieldConfig": {"defaults": {"unit": "bytes", "min": 0}, "overrides": [{"matcher": {"id": "byName", "options": "WAL segments"}, "properties": [{"id": "unit", "value": "short"}]}]}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + } + ], + "refresh": "30s", + "schemaVersion": 39, + "style": "dark", + "tags": ["postgresql", "database"], + "templating": { + "list": [ + { + "name": "namespace", + "type": "query", + "datasource": "Prometheus", + "query": "label_values(pg_up, namespace)", + "refresh": 2, + "includeAll": true, + "allValue": ".*", + "current": {"text": "All", "value": "$__all"} + }, + { + "name": "database", + "type": "query", + "datasource": "Prometheus", + "query": "label_values(pg_stat_database_numbackends{namespace=~\"$namespace\"}, datname)", + "refresh": 2, + "includeAll": true, + "allValue": ".*", + "current": {"text": "All", "value": "$__all"} + } + ] + }, + "time": {"from": "now-6h", "to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "PostgreSQL Database Monitoring", + "uid": "postgresql-monitoring", + "version": 1, + "weekStart": "" +} diff --git a/charts/common-services/dashboards/prometheus/prometheus-dashboard.json b/charts/common-services/dashboards/prometheus/prometheus-dashboard.json new file mode 100644 index 00000000..7150f1be --- /dev/null +++ b/charts/common-services/dashboards/prometheus/prometheus-dashboard.json @@ -0,0 +1,429 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Prometheus server self-monitoring dashboard", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, + "id": 100, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "id": 1, + "type": "stat", + "title": "Prometheus Version", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 6, "x": 0, "y": 1}, + "targets": [ + { + "expr": "prometheus_build_info{}", + "legendFormat": "{{version}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"thresholds": {"mode": "absolute", "steps": [{"color": "blue", "value": null}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "/^version$/", "values": false}, "colorMode": "value", "graphMode": "none", "justifyMode": "center", "textMode": "name"} + }, + { + "id": 2, + "type": "stat", + "title": "Ready", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 3, "x": 6, "y": 1}, + "targets": [ + { + "expr": "prometheus_ready{}", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"mappings": [{"options": {"0": {"text": "Not Ready"}, "1": {"text": "Ready"}}, "type": "value"}], "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "green", "value": 1}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "background", "graphMode": "none", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 3, + "type": "stat", + "title": "Config Reload", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 3, "x": 9, "y": 1}, + "targets": [ + { + "expr": "prometheus_config_last_reload_successful{}", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"mappings": [{"options": {"0": {"text": "Failed"}, "1": {"text": "Success"}}, "type": "value"}], "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "green", "value": 1}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "background", "graphMode": "none", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 4, + "type": "stat", + "title": "Active Targets", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 4, "x": 12, "y": 1}, + "targets": [ + { + "expr": "count(up{})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "blue", "value": null}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "none", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 5, + "type": "stat", + "title": "Targets Up", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 4, "x": 16, "y": 1}, + "targets": [ + { + "expr": "count(up{} == 1)", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "none", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 6, + "type": "stat", + "title": "Targets Down", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 4, "x": 20, "y": 1}, + "targets": [ + { + "expr": "count(up{} == 0)", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 1}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "none", "justifyMode": "center", "textMode": "value"} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 5}, + "id": 101, + "panels": [], + "title": "Query Performance", + "type": "row" + }, + { + "id": 7, + "type": "stat", + "title": "Active Queries", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 6, "x": 0, "y": 6}, + "targets": [ + { + "expr": "prometheus_engine_queries{}", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "blue", "value": null}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "area", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 8, + "type": "stat", + "title": "Max Concurrent Queries", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 6, "x": 6, "y": 6}, + "targets": [ + { + "expr": "prometheus_engine_queries_concurrent_max{}", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "orange", "value": null}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "none", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 9, + "type": "timeseries", + "title": "Query Duration", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 6}, + "targets": [ + { + "expr": "rate(prometheus_engine_query_duration_seconds_sum{}[5m]) / clamp_min(rate(prometheus_engine_query_duration_seconds_count{}[5m]), 1)", + "legendFormat": "{{slice}} avg", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "s", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 10, + "type": "timeseries", + "title": "Active Queries Over Time", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 12, "x": 0, "y": 10}, + "targets": [ + { + "expr": "prometheus_engine_queries{}", + "legendFormat": "active queries", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "short", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 14}, + "id": 102, + "panels": [], + "title": "HTTP API", + "type": "row" + }, + { + "id": 11, + "type": "timeseries", + "title": "HTTP Request Rate", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 15}, + "targets": [ + { + "expr": "sum by (handler) (rate(prometheus_http_requests_total{}[5m]))", + "legendFormat": "{{handler}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "reqps", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "table", "placement": "right", "sortBy": "Last", "sortDesc": true}, "tooltip": {"mode": "multi"}} + }, + { + "id": 12, + "type": "timeseries", + "title": "HTTP Request Latency (avg)", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 15}, + "targets": [ + { + "expr": "sum by (handler) (rate(prometheus_http_request_duration_seconds_sum{}[5m])) / clamp_min(sum by (handler) (rate(prometheus_http_request_duration_seconds_count{}[5m])), 1)", + "legendFormat": "{{handler}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "s", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "table", "placement": "right", "sortBy": "Last", "sortDesc": true}, "tooltip": {"mode": "multi"}} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 23}, + "id": 103, + "panels": [], + "title": "Scraping", + "type": "row" + }, + { + "id": 13, + "type": "timeseries", + "title": "Scrape Duration by Job", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 24}, + "targets": [ + { + "expr": "topk(10, avg by (job) (scrape_duration_seconds{}))", + "legendFormat": "{{job}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "s", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "table", "placement": "right", "sortBy": "Last", "sortDesc": true}, "tooltip": {"mode": "multi"}} + }, + { + "id": 14, + "type": "timeseries", + "title": "Samples Scraped by Job", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 24}, + "targets": [ + { + "expr": "topk(10, sum by (job) (scrape_samples_scraped{}))", + "legendFormat": "{{job}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "short", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "table", "placement": "right", "sortBy": "Last", "sortDesc": true}, "tooltip": {"mode": "multi"}} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 32}, + "id": 104, + "panels": [], + "title": "Storage", + "type": "row" + }, + { + "id": 15, + "type": "timeseries", + "title": "TSDB Head Series", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 33}, + "targets": [ + { + "expr": "prometheus_tsdb_head_series{}", + "legendFormat": "head series", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "short", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 16, + "type": "timeseries", + "title": "TSDB Compactions", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 33}, + "targets": [ + { + "expr": "rate(prometheus_tsdb_compactions_total{}[5m])", + "legendFormat": "compactions/s", + "refId": "A" + }, + { + "expr": "rate(prometheus_tsdb_compactions_failed_total{}[5m])", + "legendFormat": "failed/s", + "refId": "B" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 41}, + "id": 105, + "panels": [], + "title": "Alerting", + "type": "row" + }, + { + "id": 17, + "type": "stat", + "title": "Alertmanagers Discovered", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 6, "x": 0, "y": 42}, + "targets": [ + { + "expr": "prometheus_notifications_alertmanagers_discovered{}", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "green", "value": 1}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "none", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 18, + "type": "stat", + "title": "Notification Queue", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 6, "x": 6, "y": 42}, + "targets": [ + { + "expr": "prometheus_notifications_queue_length{}", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 100}, {"color": "red", "value": 1000}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "area", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 19, + "type": "timeseries", + "title": "Notifications Sent", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 42}, + "targets": [ + { + "expr": "rate(prometheus_notifications_sent_total{}[5m])", + "legendFormat": "sent/s", + "refId": "A" + }, + { + "expr": "rate(prometheus_notifications_dropped_total{}[5m])", + "legendFormat": "dropped/s", + "refId": "B" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + } + ], + "refresh": "30s", + "schemaVersion": 39, + "style": "dark", + "tags": ["prometheus", "monitoring"], + "templating": { + "list": [] + }, + "time": {"from": "now-6h", "to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "Prometheus Self-Monitoring", + "uid": "prometheus-self-monitoring", + "version": 1, + "weekStart": "" +} diff --git a/charts/common-services/dashboards/velero/velero-dashboard.json b/charts/common-services/dashboards/velero/velero-dashboard.json new file mode 100644 index 00000000..c505cb6f --- /dev/null +++ b/charts/common-services/dashboards/velero/velero-dashboard.json @@ -0,0 +1,2499 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Velero Stats maintained by Velero team", + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 16829, + "graphTooltip": 1, + "id": 1, + "iteration": 1694687653016, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 15, + "panels": [], + "title": "Backup", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "The sum of one-off backup and schedule backup success total ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0.95 + }, + { + "color": "#299c46", + "value": 0.99 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 3, + "x": 0, + "y": 1 + }, + "id": 23, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "velero_backup_total", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Backup Total", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "The last status of the backup. A value of 1 indicates success, while a value of 0 indicates failure.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0.95 + }, + { + "color": "#299c46", + "value": 0.99 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 3, + "x": 3, + "y": 1 + }, + "id": 48, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "velero_backup_last_status", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Backup Last Status", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0.95 + }, + { + "color": "#299c46", + "value": 0.99 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 3, + "x": 6, + "y": 1 + }, + "id": 22, + "links": [], + "maxDataPoints": 100, + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "sum(velero_backup_success_total{schedule=~\"$schedule\"}) / sum(velero_backup_attempt_total{schedule=~\"$schedule\"})", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Backup Success Rate", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0.95 + }, + { + "color": "#299c46", + "value": 0.99 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 3, + "x": 9, + "y": 1 + }, + "id": 25, + "links": [], + "maxDataPoints": 100, + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "sum(velero_volume_snapshot_success_total{schedule=~\"$schedule\"}) / sum(velero_volume_snapshot_attempt_total{schedule=~\"$schedule\"})\n", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Volume snapshot Success Rate", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0.95 + }, + { + "color": "#299c46", + "value": 0.99 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 3, + "x": 12, + "y": 1 + }, + "id": 26, + "links": [], + "maxDataPoints": 100, + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "sum(velero_backup_deletion_success_total{schedule=~\"$schedule\"}) / sum(velero_backup_deletion_attempt_total{schedule=~\"$schedule\"})", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Backup Deletion Success Rate", + "type": "gauge" + }, + { + "columns": [ + { + "$$hashKey": "object:462", + "text": "Current", + "value": "current" + } + ], + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "for schedule backup use only", + "fontSize": "100%", + "gridPos": { + "h": 9, + "w": 9, + "x": 15, + "y": 1 + }, + "id": 13, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "styles": [ + { + "alias": "Time", + "align": "auto", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Hours since last backup", + "align": "auto", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "decimals": 2, + "pattern": "Current", + "thresholds": [ + "24", + "48" + ], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "(time() - velero_backup_last_successful_timestamp{schedule!=\"\"}) / 60 / 60", + "instant": true, + "interval": "", + "legendFormat": "{{schedule}}", + "refId": "A" + } + ], + "title": "Hours since last Backup", + "transform": "timeseries_aggregations", + "type": "table-old" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "decimals": 0, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 10 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.4", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": true, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "round(sum(increase(velero_backup_success_total{schedule=~\"$schedule\"}[1h])))", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "Backup success", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "round(sum(increase(velero_backup_failure_total{schedule=~\"$schedule\"}[1h])))", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Backup failure", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "round(sum(increase(velero_backup_partial_failure_total{schedule=~\"$schedule\"}[1h])))", + "instant": false, + "interval": "", + "legendFormat": "Backup partial failure", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "round(sum(increase(velero_backup_deletion_success_total{schedule=~\"$schedule\"}[1h])))", + "instant": false, + "interval": "", + "legendFormat": "Backup deletion success", + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "round(sum(increase(velero_backup_deletion_failure_total{schedule=~\"$schedule\"}[1h])))", + "instant": false, + "interval": "", + "legendFormat": "Backup deletion failure", + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "sum(avg_over_time(velero_backup_items_total{schedule=~\"$schedule\"}[1h]))", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Backup items total", + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "sum(avg_over_time(velero_backup_items_errors{schedule=~\"$schedule\"}[1h]))", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Backup items errors_total", + "refId": "G" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "round(sum(increase(velero_backup_validation_failure_total{schedule=~\"$schedule\"}[1h])))", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Backup validation failure", + "refId": "H" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "round(sum(increase(velero_volume_snapshot_success_total{schedule=~\"$schedule\"}[1h])))", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Backup volume snapshot success", + "refId": "I" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "round(sum(increase(velero_volume_snapshot_failure_total{schedule=~\"$schedule\"}[1h])))", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Backup volume snapshot failure", + "refId": "J" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": true, + "expr": "round(sum(increase(velero_backup_warning_total{schedule=~\"$schedule\"}[1h])))", + "hide": false, + "interval": "", + "legendFormat": "Backup warning", + "refId": "K" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Backup per hour", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:124", + "decimals": 0, + "format": "short", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:125", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "cards": {}, + "color": { + "cardColor": "#1F60C4", + "colorScale": "linear", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 19 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 34, + "legend": { + "show": true + }, + "reverseYBuckets": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "sum(increase(velero_backup_duration_seconds_bucket{schedule=~\"$schedule\",le!=\"+Inf\"}[1h])) by (le)", + "format": "heatmap", + "hide": false, + "interval": "", + "legendFormat": "{{le}}", + "refId": "B" + } + ], + "title": "Backup time heatmap", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "format": "s", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 27 + }, + "hiddenSeries": false, + "id": 18, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.4", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "sum(avg_over_time(velero_backup_tarball_size_bytes{schedule=~\"$schedule\"}[15m]))", + "interval": "", + "legendFormat": "{{schedule}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Backup Size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:561", + "decimals": 0, + "format": "decbytes", + "label": "", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:562", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 17, + "panels": [], + "title": "Restore", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0.95 + }, + { + "color": "#299c46", + "value": 0.99 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 4, + "x": 0, + "y": 34 + }, + "id": 27, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "velero_restore_total", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Restore Total", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0.95 + }, + { + "color": "#299c46", + "value": 0.99 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 4, + "x": 4, + "y": 34 + }, + "id": 24, + "links": [], + "maxDataPoints": 100, + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "sum(velero_restore_success_total{schedule=~\"$schedule\"}) / sum(velero_restore_attempt_total{schedule=~\"$schedule\"})", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Restore Success Rate", + "type": "gauge" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "decimals": 0, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 16, + "x": 8, + "y": 34 + }, + "hiddenSeries": false, + "id": 28, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.4", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": true, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "round(sum(increase(velero_restore_success_total{schedule=~\"$schedule\"}[1h])))", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "Restore success", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "round(sum(increase(velero_restore_failed_total{schedule=~\"$schedule\"}[1h])))", + "hide": false, + "interval": "", + "legendFormat": "Restore failure", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "round(sum(increase(velero_restore_validation_failed_total{schedule=~\"$schedule\"}[1h])))", + "hide": false, + "interval": "", + "legendFormat": "Restore validation failure", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "round(sum(increase(velero_restore_partial_failure_total{schedule=~\"$schedule\"}[1h])))", + "hide": false, + "interval": "", + "legendFormat": "Restore partial failure", + "refId": "D" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Restore per hour", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:124", + "decimals": 0, + "format": "short", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:125", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 43 + }, + "id": 30, + "panels": [], + "title": "CSI", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "decimals": 0, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 44 + }, + "hiddenSeries": false, + "id": 32, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.4", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": true, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "round(sum(increase(velero_csi_snapshot_attempt_total{schedule=~\"$schedule\", backupName=~\"$csi_backup_name\"}[1h])))", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "CSI Snapshot attempt", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "round(sum(increase(velero_csi_snapshot_success_total{schedule=~\"$schedule\", backupName=~\"$csi_backup_name\"}[1h])))", + "hide": false, + "interval": "", + "legendFormat": "CSI Snapshot success", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "round(sum(increase(velero_csi_snapshot_failure_total{schedule=~\"$schedule\", backupName=~\"$csi_backup_name\"}[1h])))", + "hide": false, + "interval": "", + "legendFormat": "CSI Snapshot failure", + "refId": "C" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "CSI per hour", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:124", + "decimals": 0, + "format": "short", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:125", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 53 + }, + "id": 36, + "panels": [], + "title": "Restic", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0.95 + }, + { + "color": "#299c46", + "value": 0.99 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 4, + "x": 0, + "y": 54 + }, + "id": 37, + "links": [], + "maxDataPoints": 100, + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "sum(restic_pod_volume_backup_dequeue_count{node=~\"$restic_node\"}) / sum(restic_pod_volume_backup_enqueue_count{node=~\"$restic_node\"})", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Restic Success Rate", + "type": "gauge" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "decimals": 0, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 20, + "x": 4, + "y": 54 + }, + "hiddenSeries": false, + "id": 38, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.4", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": true, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "round(sum(increase(restic_pod_volume_backup_enqueue_count{node=~\"$restic_node\"}[1h])))", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "Restic enqueue", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "round(sum(increase(restic_pod_volume_backup_dequeue_count{node=~\"$restic_node\"}[1h])))", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Restic dequeue", + "refId": "B" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Restic per hour", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:124", + "decimals": 0, + "format": "short", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:125", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 63 + }, + "hiddenSeries": false, + "id": 39, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.4", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "sum(avg_over_time(restic_restic_operation_latency_seconds_gauge{backupName=~\"$restic_backup_name\", node=~\"$restic_node\", operation=~\"$restic_operation\", pod_volume_backup=~\"$restic_pvb_name\"}[15m]))", + "interval": "", + "legendFormat": "Avg over time", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Restic time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:561", + "decimals": 0, + "format": "s", + "label": "", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:562", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 69 + }, + "id": 43, + "panels": [], + "title": "File System Backup(for v1.10 and later)", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0.95 + }, + { + "color": "#299c46", + "value": 0.99 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 4, + "x": 0, + "y": 70 + }, + "id": 41, + "links": [], + "maxDataPoints": 100, + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "sum(podVolume_pod_volume_backup_dequeue_count{node=~\"$fsb_node\"}) / sum(podVolume_pod_volume_backup_enqueue_count{node=~\"$fsb_node\"})", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "FSB Success Rate", + "type": "gauge" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "decimals": 0, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 20, + "x": 4, + "y": 70 + }, + "hiddenSeries": false, + "id": 45, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.4", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": true, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "round(sum(increase(podVolume_pod_volume_backup_enqueue_count{node=~\"$fsb_node\"}[1h])))", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "FSB enqueue", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "round(sum(increase(podVolume_pod_volume_backup_dequeue_count{node=~\"$fsb_node\"}[1h])))", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "FSB dequeue", + "refId": "B" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "FSB per hour", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:124", + "decimals": 0, + "format": "short", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:125", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 79 + }, + "hiddenSeries": false, + "id": 47, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.4", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "sum(avg_over_time(podVolume_pod_volume_operation_latency_seconds_gauge{backupName=~\"$fsb_backup_name\", node=~\"$fsb_node\", operation=~\"$fsb_operation\", pod_volume_backup=~\"$fsb_pvb_name\"}[15m]))", + "interval": "", + "legendFormat": "Avg over time", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "FSB time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:561", + "decimals": 0, + "format": "s", + "label": "", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:562", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 85 + }, + "id": 52, + "panels": [], + "title": "Data Mover", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "decimals": 0, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 86 + }, + "hiddenSeries": false, + "id": 53, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.4", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": true, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "round(sum(increase(podVolume_data_upload_success_total{node=~\"$fsb_node\"}[1h])))", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "Data upload success", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "round(sum(increase(podVolume_data_upload_failure_total{node=~\"$fsb_node\"}[1h])))", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Data upload failure", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "round(sum(increase(podVolume_data_upload_cancel_total{node=~\"$fsb_node\"}[1h])))", + "instant": false, + "interval": "", + "legendFormat": "Data upload cancel", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "round(sum(increase(podVolume_data_download_success_total{node=~\"$fsb_node\"}[1h])))", + "instant": false, + "interval": "", + "legendFormat": "Data download success", + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": false, + "expr": "round(sum(increase(podVolume_data_download_failure_total{node=~\"$fsb_node\"}[1h])))", + "instant": false, + "interval": "", + "legendFormat": "Data download failure", + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "exemplar": true, + "expr": "round(sum(increase(podVolume_data_upload_cancel_total{node=~\"$fsb_node\"}[1h])))", + "hide": false, + "interval": "", + "legendFormat": "Data download cancel", + "refId": "F" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "CSI Snapshot Data Mover per hour", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:124", + "decimals": 0, + "format": "short", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:125", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + } + ], + "refresh": false, + "schemaVersion": 35, + "style": "dark", + "tags": [ + "velero" + ], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "includeAll": false, + "label": "Datasource", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "definition": "label_values(velero_backup_attempt_total, schedule)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "schedule", + "options": [], + "query": { + "query": "label_values(velero_backup_attempt_total, schedule)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "definition": "label_values(velero_csi_snapshot_attempt_total, backupName)", + "hide": 0, + "includeAll": true, + "multi": false, + "name": "csi_backup_name", + "options": [], + "query": { + "query": "label_values(velero_csi_snapshot_attempt_total, backupName)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "definition": "label_values(restic_pod_volume_backup_enqueue_count, node)", + "hide": 0, + "includeAll": true, + "multi": false, + "name": "restic_node", + "options": [], + "query": { + "query": "label_values(restic_pod_volume_backup_enqueue_count, node)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "definition": "label_values(restic_restic_operation_latency_seconds_gauge, backupName)", + "hide": 0, + "includeAll": true, + "multi": false, + "name": "restic_backup_name", + "options": [], + "query": { + "query": "label_values(restic_restic_operation_latency_seconds_gauge, backupName)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "definition": "label_values(restic_restic_operation_latency_seconds_gauge, operation)", + "hide": 0, + "includeAll": true, + "multi": false, + "name": "restic_operation", + "options": [], + "query": { + "query": "label_values(restic_restic_operation_latency_seconds_gauge, operation)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "definition": "label_values(restic_restic_operation_latency_seconds_gauge, pod_volume_backup)", + "hide": 0, + "includeAll": true, + "multi": false, + "name": "restic_pvb_name", + "options": [], + "query": { + "query": "label_values(restic_restic_operation_latency_seconds_gauge, pod_volume_backup)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "definition": "label_values(podVolume_pod_volume_backup_enqueue_count, node)", + "hide": 0, + "includeAll": true, + "multi": false, + "name": "fsb_node", + "options": [], + "query": { + "query": "label_values(podVolume_pod_volume_backup_enqueue_count, node)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "definition": "label_values(podVolume_pod_volume_operation_latency_seconds_gauge, backupName)", + "hide": 0, + "includeAll": true, + "multi": false, + "name": "fsb_backup_name", + "options": [], + "query": { + "query": "label_values(podVolume_pod_volume_operation_latency_seconds_gauge, backupName)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "definition": "label_values(podVolume_pod_volume_operation_latency_seconds_gauge, operation)", + "hide": 0, + "includeAll": true, + "multi": false, + "name": "fsb_operation", + "options": [], + "query": { + "query": "label_values(podVolume_pod_volume_operation_latency_seconds_gauge, operation)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "definition": "label_values(podVolume_pod_volume_operation_latency_seconds_gauge, pod_volume_backup)", + "hide": 0, + "includeAll": true, + "multi": false, + "name": "fsb_pvb_name", + "options": [], + "query": { + "query": "label_values(podVolume_pod_volume_operation_latency_seconds_gauge, pod_volume_backup)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-2d", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Kubernetes/Tanzu/Velero", + "uid": "EbXSjT24k", + "version": 15, + "weekStart": "" +} \ No newline at end of file diff --git a/charts/common-services/dashboards/zookeeper/zookeeper-dashboard.json b/charts/common-services/dashboards/zookeeper/zookeeper-dashboard.json new file mode 100644 index 00000000..23c3609f --- /dev/null +++ b/charts/common-services/dashboards/zookeeper/zookeeper-dashboard.json @@ -0,0 +1,1383 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "6.5.2" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": 11442, + "graphTooltip": 0, + "id": null, + "iteration": 1576741902842, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 0 + }, + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "zk_up{job=~\"$job\", instance=~\"$instance\"}", + "refId": "A" + } + ], + "thresholds": "0.99,0.10", + "timeFrom": null, + "timeShift": null, + "title": "Status", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "UP", + "value": "1" + }, + { + "op": "=", + "text": "DOWN", + "value": "0" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 0 + }, + "id": 6, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "zk_server_leader{job=~\"$job\", instance=~\"$instance\"}", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Is Leader", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "No", + "value": "0" + }, + { + "op": "=", + "text": "Yes", + "value": "1" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "zk_ephemerals_count{job=~\"$job\", instance=~\"$instance\"}", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Ephemerals Count", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 0 + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "zk_znode_count{job=~\"$job\", instance=~\"$instance\"}", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Znode Count", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 0 + }, + "id": 8, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "zk_watch_count{job=~\"$job\", instance=~\"$instance\"}", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Watch Count", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "content": "$version\n\n\n", + "datasource": "${DS_PROMETHEUS}", + "gridPos": { + "h": 2, + "w": 4, + "x": 20, + "y": 0 + }, + "id": 4, + "mode": "html", + "options": {}, + "timeFrom": null, + "timeShift": null, + "title": "ZK Version", + "type": "text" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 4 + }, + "hiddenSeries": false, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(zk_packets_sent{job=~\"$job\", instance=~\"$instance\"}[5m])", + "legendFormat": "{{ instance }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Pakages Sent", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 4 + }, + "hiddenSeries": false, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(zk_packets_received{job=~\"$job\", instance=~\"$instance\"}[5m])", + "legendFormat": "{{ instance }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Pakages Recieved", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 10 + }, + "hiddenSeries": false, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "zk_num_alive_connections{job=~\"$job\", instance=~\"$instance\"}", + "legendFormat": "{{ instance }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Alive Connections", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 10 + }, + "hiddenSeries": false, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "zk_open_file_descriptor_count{job=~\"$job\", instance=~\"$instance\"}", + "legendFormat": "open", + "refId": "A" + }, + { + "expr": "zk_max_file_descriptor_count{job=~\"$job\", instance=~\"$instance\"}", + "legendFormat": "max", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "File Descriptors", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 16 + }, + "hiddenSeries": false, + "id": 25, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "zk_avg_latency{job=~\"$job\", instance=~\"$instance\"}", + "legendFormat": "{{ instance }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Avg Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "ms", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 12, + "y": 16 + }, + "id": 23, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "zk_min_latency{job=~\"$job\", instance=~\"$instance\"}", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Min Latency", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "ms", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 16, + "y": 16 + }, + "id": 24, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "zk_max_latency{job=~\"$job\", instance=~\"$instance\"}", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Max Latency", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 22 + }, + "hiddenSeries": false, + "id": 28, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "zk_outstanding_requests{job=~\"$job\", instance=~\"$instance\"}", + "legendFormat": "{{ instance }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Outstanding Requests", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 22 + }, + "hiddenSeries": false, + "id": 29, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "zk_approximate_data_size{job=~\"$job\", instance=~\"$instance\"}", + "legendFormat": "{{ instance }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Approx Data Size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "schemaVersion": 21, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(zk_up,job)", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "job", + "options": [], + "query": "label_values(zk_up,job)", + "refresh": 2, + "regex": "/(.*zk.*|.*zoo.*)/", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(zk_up{job=~\"$job\"},instance)", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "instance", + "options": [], + "query": "label_values(zk_up{job=~\"$job\"},instance)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(zk_version{job=~\"$job\", instance=~\"$instance\"}, version)", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "version", + "options": [], + "query": "label_values(zk_version{job=~\"$job\", instance=~\"$instance\"}, version)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Zookeeper Exporter (dabealu)", + "uid": "4HhbN1BZk", + "version": 6, + "description": "Dashboard for https://github.com/dabealu/zookeeper-exporter\r\n" +} \ No newline at end of file diff --git a/charts/common-services/dashboards/zookeeper/zookeeper-metrics-dashboard.json b/charts/common-services/dashboards/zookeeper/zookeeper-metrics-dashboard.json new file mode 100644 index 00000000..daec04f9 --- /dev/null +++ b/charts/common-services/dashboards/zookeeper/zookeeper-metrics-dashboard.json @@ -0,0 +1,366 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "ZooKeeper cluster monitoring dashboard with JMX metrics", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, + "id": 100, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "id": 1, + "type": "stat", + "title": "Uptime", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 4, "x": 0, "y": 1}, + "targets": [ + { + "expr": "max(uptime{service=~\".*zookeeper.*\", namespace=~\"$namespace\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "ms", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "none", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 2, + "type": "stat", + "title": "ZNode Count", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 4, "x": 4, "y": 1}, + "targets": [ + { + "expr": "sum(znode_count{service=~\".*zookeeper.*\", namespace=~\"$namespace\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "blue", "value": null}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "none", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 3, + "type": "stat", + "title": "Watch Count", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 4, "x": 8, "y": 1}, + "targets": [ + { + "expr": "sum(watch_count{service=~\".*zookeeper.*\", namespace=~\"$namespace\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "purple", "value": null}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "area", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 4, + "type": "stat", + "title": "Global Sessions", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 4, "x": 12, "y": 1}, + "targets": [ + { + "expr": "sum(global_sessions{service=~\".*zookeeper.*\", namespace=~\"$namespace\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "orange", "value": null}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "area", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 5, + "type": "stat", + "title": "Outstanding Requests", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 4, "x": 16, "y": 1}, + "targets": [ + { + "expr": "sum(outstanding_requests{service=~\".*zookeeper.*\", namespace=~\"$namespace\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 10}, {"color": "red", "value": 100}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "area", "justifyMode": "center", "textMode": "value"} + }, + { + "id": 6, + "type": "stat", + "title": "Avg Latency (ms)", + "datasource": "Prometheus", + "gridPos": {"h": 4, "w": 4, "x": 20, "y": 1}, + "targets": [ + { + "expr": "avg(avg_latency{service=~\".*zookeeper.*\", namespace=~\"$namespace\"})", + "legendFormat": "", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": {"unit": "ms", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 10}, {"color": "red", "value": 100}]}}, + "overrides": [] + }, + "options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "colorMode": "value", "graphMode": "area", "justifyMode": "center", "textMode": "value"} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 5}, + "id": 101, + "panels": [], + "title": "Latency", + "type": "row" + }, + { + "id": 7, + "type": "timeseries", + "title": "Request Latency", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 6}, + "targets": [ + { + "expr": "avg_latency{service=~\".*zookeeper.*\", namespace=~\"$namespace\"}", + "legendFormat": "{{pod}} avg", + "refId": "A" + }, + { + "expr": "max_latency{service=~\".*zookeeper.*\", namespace=~\"$namespace\"}", + "legendFormat": "{{pod}} max", + "refId": "B" + }, + { + "expr": "min_latency{service=~\".*zookeeper.*\", namespace=~\"$namespace\"}", + "legendFormat": "{{pod}} min", + "refId": "C" + } + ], + "fieldConfig": {"defaults": {"unit": "ms", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 8, + "type": "timeseries", + "title": "Quorum Ack Latency", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 6}, + "targets": [ + { + "expr": "rate(quorum_ack_latency_sum{service=~\".*zookeeper.*\", namespace=~\"$namespace\"}[5m]) / clamp_min(rate(quorum_ack_latency_count{service=~\".*zookeeper.*\", namespace=~\"$namespace\"}[5m]), 1)", + "legendFormat": "{{pod}} avg", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "ms", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 14}, + "id": 102, + "panels": [], + "title": "Throughput", + "type": "row" + }, + { + "id": 9, + "type": "timeseries", + "title": "Packets Received/Sent", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 15}, + "targets": [ + { + "expr": "rate(packets_received{service=~\".*zookeeper.*\", namespace=~\"$namespace\"}[5m])", + "legendFormat": "{{pod}} received", + "refId": "A" + }, + { + "expr": "rate(packets_sent{service=~\".*zookeeper.*\", namespace=~\"$namespace\"}[5m])", + "legendFormat": "{{pod}} sent", + "refId": "B" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 10, + "type": "timeseries", + "title": "Commits", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 15}, + "targets": [ + { + "expr": "rate(commit_count{service=~\".*zookeeper.*\", namespace=~\"$namespace\"}[5m])", + "legendFormat": "{{pod}} commits/s", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "ops", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 23}, + "id": 103, + "panels": [], + "title": "Disk I/O", + "type": "row" + }, + { + "id": 11, + "type": "timeseries", + "title": "Fsync Time", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 24}, + "targets": [ + { + "expr": "rate(fsynctime_sum{service=~\".*zookeeper.*\", namespace=~\"$namespace\"}[5m]) / clamp_min(rate(fsynctime_count{service=~\".*zookeeper.*\", namespace=~\"$namespace\"}[5m]), 1)", + "legendFormat": "{{pod}} avg fsync", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "ms", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 12, + "type": "timeseries", + "title": "Snapshot Time", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 24}, + "targets": [ + { + "expr": "rate(snapshottime_sum{service=~\".*zookeeper.*\", namespace=~\"$namespace\"}[5m]) / clamp_min(rate(snapshottime_count{service=~\".*zookeeper.*\", namespace=~\"$namespace\"}[5m]), 1)", + "legendFormat": "{{pod}} avg snapshot", + "refId": "A" + }, + { + "expr": "snap_count{service=~\".*zookeeper.*\", namespace=~\"$namespace\"}", + "legendFormat": "{{pod}} snapshot count", + "refId": "B" + } + ], + "fieldConfig": {"defaults": {"unit": "ms", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 32}, + "id": 104, + "panels": [], + "title": "Cluster Health", + "type": "row" + }, + { + "id": 13, + "type": "timeseries", + "title": "Outstanding Requests Over Time", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 33}, + "targets": [ + { + "expr": "outstanding_requests{service=~\".*zookeeper.*\", namespace=~\"$namespace\"}", + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "short", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 14, + "type": "timeseries", + "title": "Election Time", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 33}, + "targets": [ + { + "expr": "rate(election_time_sum{service=~\".*zookeeper.*\", namespace=~\"$namespace\"}[5m]) / clamp_min(rate(election_time_count{service=~\".*zookeeper.*\", namespace=~\"$namespace\"}[5m]), 1)", + "legendFormat": "{{pod}} election time", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "ms", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + }, + { + "id": 15, + "type": "timeseries", + "title": "Follower Sync Time", + "datasource": "Prometheus", + "gridPos": {"h": 8, "w": 24, "x": 0, "y": 41}, + "targets": [ + { + "expr": "rate(follower_sync_time_sum{service=~\".*zookeeper.*\", namespace=~\"$namespace\"}[5m]) / clamp_min(rate(follower_sync_time_count{service=~\".*zookeeper.*\", namespace=~\"$namespace\"}[5m]), 1)", + "legendFormat": "{{pod}} sync time", + "refId": "A" + } + ], + "fieldConfig": {"defaults": {"unit": "ms", "min": 0}, "overrides": []}, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}} + } + ], + "refresh": "30s", + "schemaVersion": 39, + "style": "dark", + "tags": ["zookeeper", "coordination"], + "templating": { + "list": [ + { + "name": "namespace", + "type": "query", + "datasource": "Prometheus", + "query": "label_values(uptime{service=~\".*zookeeper.*\"}, namespace)", + "refresh": 2, + "includeAll": true, + "allValue": ".*", + "current": {"text": "All", "value": "$__all"} + } + ] + }, + "time": {"from": "now-6h", "to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "ZooKeeper Cluster Monitoring", + "uid": "zookeeper-cluster-monitoring", + "version": 1, + "weekStart": "" +} diff --git a/charts/common-services/override-values.yaml b/charts/common-services/override-values.yaml new file mode 100644 index 00000000..f6b216c4 --- /dev/null +++ b/charts/common-services/override-values.yaml @@ -0,0 +1,498 @@ +# Override values for duploservices-qaibtest namespace +# This file only enables services with metrics and dashboards configured + +# Global nodeSelector - for EKS cluster (tenantname label) +nodeSelector: + tenantname: duploservices-qaibtest + +# ========================================== +# ENABLED SERVICES (with metrics/dashboards) +# ========================================== + +# 1. Argo CD - Metrics: 8082, 8083, 8084, Dashboard: ID 14584 +argo-cd: + enabled: true + fullnameOverride: argocd + controller: + nodeSelector: + tenantname: duploservices-qaibtest + metrics: + enabled: true + serviceMonitor: + enabled: true + additionalLabels: + release: prometheus + server: + nodeSelector: + tenantname: duploservices-qaibtest + metrics: + enabled: true + serviceMonitor: + enabled: true + additionalLabels: + release: prometheus + repoServer: + nodeSelector: + tenantname: duploservices-qaibtest + metrics: + enabled: true + serviceMonitor: + enabled: true + additionalLabels: + release: prometheus + +# 2. PostgreSQL - Metrics: 9187, Dashboard: ID 9628 +postgresql: + enabled: true + fullnameOverride: postgresql + auth: + postgresPassword: "testpassword123" + database: "postgres" + primary: + nodeSelector: + tenantname: duploservices-qaibtest + metrics: + enabled: true + image: + registry: docker.io + repository: bitnami/postgres-exporter + tag: latest + serviceMonitor: + enabled: true + labels: + release: prometheus + +# 3. HAProxy - Metrics: 8404, Dashboard: ID 12030 +haproxy: + enabled: true + fullnameOverride: haproxy + nodeSelector: + tenantname: duploservices-qaibtest + serviceMonitor: + enabled: true + +# 4. ZooKeeper - Metrics: 9141, Dashboard: ID 11442 +zookeeper: + enabled: true + fullnameOverride: zookeeper + nodeSelector: + tenantname: duploservices-qaibtest + metrics: + enabled: true + serviceMonitor: + enabled: true + additionalLabels: + release: prometheus + +# 5. Fluent Bit - Metrics: 2020, Dashboard: ID 18855 +fluent-bit: + enabled: true + fullnameOverride: fluent-bit + nodeSelector: + tenantname: duploservices-qaibtest + serviceMonitor: + enabled: true + dashboards: + enabled: true + +# 6. Velero - Metrics: 8085, Dashboard: ID 16829 +velero: + enabled: true + nodeSelector: + tenantname: duploservices-qaibtest + metrics: + enabled: true + serviceMonitor: + enabled: true + +# 7. Loki - Metrics: 3100, Dashboard: ID 13639 +# DISABLED - requires object storage (S3/GCS/Minio) +loki: + enabled: false + +# 8. CloudNative PG - Dashboard: ID 20417 +cloudnative-pg: + enabled: true + nodeSelector: + tenantname: duploservices-qaibtest + +# 9. OpenSearch - Metrics: 9200, Dashboard: ID 20827 +opensearch: + enabled: true + nodeSelector: + tenantname: duploservices-qaibtest + metrics: + enabled: true + serviceMonitor: + enabled: true + +# OpenSearch Dashboards +opensearch-dashboards: + enabled: true + nodeSelector: + tenantname: duploservices-qaibtest + service: + type: ClusterIP + +# 10. Flink Kubernetes Operator - Metrics: 9999, Dashboard: ID 14911 +flink-kubernetes-operator: + enabled: true + nodeSelector: + tenantname: duploservices-qaibtest + metrics: + enabled: true + +# 11. Alloy - Metrics: 12345 +alloy: + enabled: true + nodeSelector: + tenantname: duploservices-qaibtest + metrics: + enabled: true + monitoring: + serviceMonitor: + enabled: true + +# 12. Elasticsearch - Metrics: via exporter 9108, Dashboard: ID 266 +elasticsearch: + enabled: true + nodeSelector: + tenantname: duploservices-qaibtest + metrics: + enabled: true + +# 13. Kibana - Metrics: via sidecar 9684, Dashboard: ID 21420 +kibana: + enabled: true + nodeSelector: + tenantname: duploservices-qaibtest + metrics: + enabled: true + service: + type: ClusterIP + # Custom exporter: https://github.com/gnanirahulnutakki/kibana-prometheus-exporter + extraContainers: + - name: kibana-exporter + image: rahulnutakki/kibana-prometheus-exporter:latest + args: + - --kibana-url=http://localhost:5601 + - --log-level=info + ports: + - containerPort: 9684 + name: metrics + protocol: TCP + resources: + limits: + cpu: 100m + memory: 64Mi + requests: + cpu: 10m + memory: 32Mi + livenessProbe: + httpGet: + path: /health + port: metrics + initialDelaySeconds: 10 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /ready + port: metrics + initialDelaySeconds: 10 + periodSeconds: 10 + +# ========================================== +# DISABLED SERVICES (no metrics or UI only) +# ========================================== + +# 14. Prometheus - Deploy in qaibtest namespace for cross-tenant scraping +prometheus: + enabled: true + nodeSelector: + tenantname: duploservices-qaibtest + server: + nodeSelector: + tenantname: duploservices-qaibtest + service: + type: ClusterIP + alertmanager: + nodeSelector: + tenantname: duploservices-qaibtest + pushgateway: + nodeSelector: + tenantname: duploservices-qaibtest + kubeStateMetrics: + enabled: false # Already in monitoring namespace + nodeExporter: + enabled: false # Already in monitoring namespace + +grafana: + enabled: true + fullnameOverride: grafana + nodeSelector: + tenantname: duploservices-qaibtest + service: + type: ClusterIP + persistence: + enabled: false # Disable for testing + +pgadmin4: + enabled: false + +slamd: + enabled: false + +shellinabox: + enabled: false + +smtp: + enabled: false + +curator: + enabled: false + +zoonavigator: + enabled: false + +nebula-operator: + enabled: false + +backupManager: + enabled: false + +velero-ui: + enabled: false + +# ========================================== +# PROMETHEUS EXPORTERS +# ========================================== + +# Prometheus Elasticsearch Exporter - for Elasticsearch metrics +prometheus-elasticsearch-exporter: + enabled: true + nodeSelector: + tenantname: duploservices-qaibtest + +# ========================================== +# GRAFANA CONFIGURATION +# ========================================== + +grafana: + enabled: true + fullnameOverride: grafana + nodeSelector: + tenantname: duploservices-qaibtest + service: + type: ClusterIP + persistence: + enabled: false + + datasources: + datasources.yaml: + apiVersion: 1 + datasources: + - name: Prometheus + type: prometheus + url: http://prometheus-server.duploservices-qaibtest.svc.cluster.local:9090 + access: proxy + isDefault: true + + dashboardProviders: + dashboardproviders.yaml: + apiVersion: 1 + providers: + - name: 'argo-cd' + orgId: 1 + folder: 'Infrastructure' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + editable: true + options: + path: /var/lib/grafana/dashboards/argo-cd + - name: 'postgresql' + orgId: 1 + folder: 'Infrastructure' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + editable: true + options: + path: /var/lib/grafana/dashboards/postgresql + - name: 'haproxy' + orgId: 1 + folder: 'Infrastructure' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + editable: true + options: + path: /var/lib/grafana/dashboards/haproxy + - name: 'zookeeper' + orgId: 1 + folder: 'Infrastructure' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + editable: true + options: + path: /var/lib/grafana/dashboards/zookeeper + - name: 'fluent-bit' + orgId: 1 + folder: 'Logging' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + editable: true + options: + path: /var/lib/grafana/dashboards/fluent-bit + - name: 'velero' + orgId: 1 + folder: 'Infrastructure' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + editable: true + options: + path: /var/lib/grafana/dashboards/velero + + # - name: 'cloudnative-pg' + # orgId: 1 + # folder: 'Infrastructure' + # type: file + # disableDeletion: false + # updateIntervalSeconds: 10 + # allowUiUpdates: true + # editable: true + # options: + # path: /var/lib/grafana/dashboards/cloudnative-pg + - name: 'opensearch' + orgId: 1 + folder: 'Infrastructure' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + editable: true + options: + path: /var/lib/grafana/dashboards/opensearch + - name: 'opensearch-dashboards' + orgId: 1 + folder: 'Infrastructure' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + editable: true + options: + path: /var/lib/grafana/dashboards/opensearch-dashboards + - name: 'flink' + orgId: 1 + folder: 'Infrastructure' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + editable: true + options: + path: /var/lib/grafana/dashboards/flink + - name: 'elasticsearch' + orgId: 1 + folder: 'Infrastructure' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + editable: true + options: + path: /var/lib/grafana/dashboards/elasticsearch + - name: 'kibana' + orgId: 1 + folder: 'Infrastructure' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + editable: true + options: + path: /var/lib/grafana/dashboards/kibana + + dashboardsConfigMaps: + argo-cd: "argo-cd-dashboard" + postgresql: "postgresql-dashboard" + haproxy: "haproxy-dashboard" + zookeeper: "zookeeper-metrics-dashboard" + fluent-bit: "fluent-bit-dashboard" + # Dashboards for all enabled services + flink: "flink-dashboard" + kibana: "kibana-dashboard" + opensearch: "opensearch-dashboard" + opensearch-dashboards: "opensearch-dashboard" + # cloudnative-pg: "cloudnative-pg-dashboard" + elasticsearch-metrics: "elasticsearch-dashboard" + velero: "velero-dashboard" + # Disable dashboards for unavailable services + loki: null + alloy: null +argo-cd: + server: + service: + type: ClusterIP + +haproxy: + service: + type: ClusterIP + +# Fix HAProxy routes - disable kibana since it's not enabled +haproxy: + enabled: true + fullnameOverride: haproxy + nodeSelector: + tenantname: duploservices-qaibtest + service: + type: ClusterIP + serviceMonitor: + enabled: true + route: + argocd: true + grafana: true + prometheus: false + pushgateway: false + elasticsearch: true + kibana: true + pgadmin4: false + slamd: false + shellinabox: false + eocui: false + eocapi: false + sdcapi: false + opensearch: true + opensearchdashboards: true + velero: false + +# Fix PostgreSQL metrics image +postgresql: + enabled: true + fullnameOverride: postgresql + auth: + postgresPassword: "testpassword123" + database: "postgres" + primary: + nodeSelector: + tenantname: duploservices-qaibtest + metrics: + enabled: true + image: + registry: docker.io + repository: bitnami/postgres-exporter + tag: 0.15.0-debian-11-r2 + serviceMonitor: + enabled: true + labels: + release: prometheus diff --git a/charts/common-services/templates/NOTES.txt b/charts/common-services/templates/NOTES.txt index 6e9bf23f..c4ff544f 100644 --- a/charts/common-services/templates/NOTES.txt +++ b/charts/common-services/templates/NOTES.txt @@ -7,21 +7,29 @@ Changes in this upgrade: - Chart version updated to {{ .Chart.Version }} - Application version is now {{ .Chart.AppVersion }} -{{- if ne .Values.elasticsearch.volumeClaimTemplate.resources.requests.storage "30Gi" }} +{{- if and .Values.elasticsearch.enabled ((.Values.elasticsearch.volumeClaimTemplate).resources) }} +{{- if ne ((.Values.elasticsearch.volumeClaimTemplate.resources.requests).storage | default "30Gi") "30Gi" }} - Elasticsearch storage size has been changed to {{ .Values.elasticsearch.volumeClaimTemplate.resources.requests.storage }} {{- end }} +{{- end }} -{{- if ne .Values.postgresql.primary.persistence.size "10Gi" }} +{{- if and .Values.postgresql.enabled ((.Values.postgresql.primary).persistence) }} +{{- if ne ((.Values.postgresql.primary.persistence).size | default "10Gi") "10Gi" }} - PostgreSQL storage size has been changed to {{ .Values.postgresql.primary.persistence.size }} {{- end }} +{{- end }} -{{- if ne .Values.grafana.persistence.size "8Gi" }} +{{- if and .Values.grafana.enabled ((.Values.grafana.persistence).size) }} +{{- if ne (.Values.grafana.persistence.size | default "8Gi") "8Gi" }} - Grafana storage size has been changed to {{ .Values.grafana.persistence.size }} {{- end }} +{{- end }} -{{- if ne .Values.prometheus.server.persistentVolume.size "8Gi" }} +{{- if and .Values.prometheus.enabled ((.Values.prometheus.server).persistentVolume) }} +{{- if ne ((.Values.prometheus.server.persistentVolume).size | default "8Gi") "8Gi" }} - Prometheus storage size has been changed to {{ .Values.prometheus.server.persistentVolume.size }} {{- end }} +{{- end }} Please review these changes and ensure they align with your expectations. @@ -49,146 +57,155 @@ Ingress is not enabled. To access services, use port-forwarding or enable ingres {{- end }} Deployed Resources in namespace {{ .Release.Namespace }}: -{{- if index .Values "argo-cd" "enabled" }} +{{- if (index .Values "argo-cd" "enabled") }} - Argo CD - Service Type: {{ index .Values "argo-cd" "server" "service" "type" }} - {{- if index .Values "argo-cd" "server" "nodeSelector" }} - Node Selector: {{ index .Values "argo-cd" "server" "nodeSelector" | toYaml | nindent 4 }} + {{- $argoServer := index .Values "argo-cd" "server" | default dict }} + {{- if $argoServer.service }} + Service Type: {{ $argoServer.service.type | default "ClusterIP" }} + {{- end }} + {{- if $argoServer.nodeSelector }} + Node Selector: {{ $argoServer.nodeSelector | toYaml | nindent 4 }} {{- end }} {{- end }} {{- if .Values.prometheus.enabled }} - Prometheus - Service Type: {{ .Values.prometheus.server.service.type }} - {{- if .Values.prometheus.server.nodeSelector }} - Node Selector: {{ .Values.prometheus.server.nodeSelector | toYaml | nindent 4 }} + {{- $promServer := (.Values.prometheus).server | default dict }} + {{- $promService := $promServer.service | default dict }} + Service Type: {{ $promService.type | default "ClusterIP" }} + {{- if $promServer.nodeSelector }} + Node Selector: {{ $promServer.nodeSelector | toYaml | nindent 4 }} {{- end }} {{- end }} {{- if .Values.grafana.enabled }} - Grafana - Service Type: {{ .Values.grafana.service.type }} + Service Type: {{ (.Values.grafana.service).type | default "ClusterIP" }} {{- if .Values.grafana.nodeSelector }} Node Selector: {{ .Values.grafana.nodeSelector | toYaml | nindent 4 }} {{- end }} Datasources: - {{- if .Values.grafana.datasources }} - {{- if .Values.grafana.datasources.datasources }} - {{- if .Values.grafana.datasources.datasources.yaml }} - {{- if .Values.grafana.datasources.datasources.yaml.datasources }} - {{- range .Values.grafana.datasources.datasources.yaml.datasources }} + {{- if ((.Values.grafana.datasources) | default dict) }} + {{- $dsFile := (.Values.grafana.datasources) | default dict }} + {{- $dsYaml := (index $dsFile "datasources.yaml") | default dict }} + {{- $dsList := $dsYaml.datasources | default list }} + {{- if $dsList }} + {{- range $dsList }} - {{ .name }}: {{ .type }} - {{- end }} - {{- else }} - No datasources defined in .Values.grafana.datasources.datasources.yaml.datasources - {{- end }} - {{- else }} - No datasources defined in .Values.grafana.datasources.datasources.yaml {{- end }} {{- else }} - No datasources defined in .Values.grafana.datasources.datasources + No datasources configured {{- end }} {{- else }} - No datasources defined in .Values.grafana.datasources + No datasources configured {{- end }} {{- end }} {{- if .Values.elasticsearch.enabled }} - Elasticsearch - Service Type: {{ .Values.elasticsearch.service.type }} - Replicas: {{ .Values.elasticsearch.replicas }} + Service Type: {{ (.Values.elasticsearch.service).type | default "ClusterIP" }} + Replicas: {{ .Values.elasticsearch.replicas | default 1 }} {{- if .Values.elasticsearch.nodeSelector }} Node Selector: {{ .Values.elasticsearch.nodeSelector | toYaml | nindent 4 }} {{- end }} {{- end }} {{- if .Values.kibana.enabled }} - Kibana - Service Type: {{ .Values.kibana.service.type }} + Service Type: {{ (.Values.kibana.service).type | default "ClusterIP" }} {{- if .Values.kibana.nodeSelector }} Node Selector: {{ .Values.kibana.nodeSelector | toYaml | nindent 4 }} {{- end }} {{- end }} {{- if .Values.haproxy.enabled }} - HAProxy - Service Type: {{ .Values.haproxy.service.type }} + Service Type: {{ (.Values.haproxy.service).type | default "ClusterIP" }} {{- if .Values.haproxy.nodeSelector }} Node Selector: {{ .Values.haproxy.nodeSelector | toYaml | nindent 4 }} {{- end }} + {{- if (.Values.haproxy).route }} Enabled Routes: {{- range $key, $value := .Values.haproxy.route }} {{- if $value }} - {{ $key }} {{- end }} {{- end }} + {{- end }} {{- end }} {{- if .Values.postgresql.enabled }} - PostgreSQL - Service Type: {{ .Values.postgresql.primary.service.type }} - {{- if .Values.postgresql.primary.nodeSelector }} - Node Selector: {{ .Values.postgresql.primary.nodeSelector | toYaml | nindent 4 }} + {{- $pgPrimary := (.Values.postgresql).primary | default dict }} + {{- $pgService := $pgPrimary.service | default dict }} + Service Type: {{ $pgService.type | default "ClusterIP" }} + {{- if $pgPrimary.nodeSelector }} + Node Selector: {{ $pgPrimary.nodeSelector | toYaml | nindent 4 }} {{- end }} {{- end }} -{{- if .Values.pgadmin4.enabled }} +{{- if (.Values.pgadmin4).enabled }} - pgAdmin4 - Service Type: {{ .Values.pgadmin4.service.type }} + Service Type: {{ (.Values.pgadmin4.service).type | default "ClusterIP" }} {{- if .Values.pgadmin4.nodeSelector }} Node Selector: {{ .Values.pgadmin4.nodeSelector | toYaml | nindent 4 }} {{- end }} {{- end }} -{{- if .Values.slamd.enabled }} +{{- if (.Values.slamd).enabled }} - SLAMD - Service Type: {{ .Values.slamd.service.type }} + Service Type: {{ (.Values.slamd.service).type | default "ClusterIP" }} {{- if .Values.slamd.nodeSelector }} Node Selector: {{ .Values.slamd.nodeSelector | toYaml | nindent 4 }} {{- end }} {{- end }} -{{- if .Values.shellinabox.enabled }} +{{- if (.Values.shellinabox).enabled }} - Shellinabox - Service Type: {{ .Values.shellinabox.service.type }} + Service Type: {{ (.Values.shellinabox.service).type | default "ClusterIP" }} {{- if .Values.shellinabox.nodeSelector }} Node Selector: {{ .Values.shellinabox.nodeSelector | toYaml | nindent 4 }} {{- end }} {{- end }} -{{- if .Values.smtp.enabled }} +{{- if (.Values.smtp).enabled }} - SMTP Relay - Service Type: {{ .Values.smtp.service.type }} + Service Type: {{ (.Values.smtp.service).type | default "ClusterIP" }} {{- if .Values.smtp.nodeSelector }} Node Selector: {{ .Values.smtp.nodeSelector | toYaml | nindent 4 }} {{- end }} {{- end }} -{{- if .Values.curator.enabled }} +{{- if (.Values.curator).enabled }} - Elasticsearch Curator {{- if .Values.curator.nodeSelector }} Node Selector: {{ .Values.curator.nodeSelector | toYaml | nindent 4 }} {{- end }} - Log Level: {{ .Values.curator.logging.loglevel }} - Dry Run: {{ .Values.curator.dryrun }} + Log Level: {{ (.Values.curator.logging).loglevel | default "INFO" }} + Dry Run: {{ .Values.curator.dryrun | default false }} {{- end }} -{{- if .Values.velero.enabled }} +{{- if (.Values.velero).enabled }} - Velero {{- if .Values.velero.nodeSelector }} Node Selector: {{ .Values.velero.nodeSelector | toYaml | nindent 4 }} {{- end }} + {{- if (.Values.velero).backupStorage }} Backup Storage: - Bucket: {{ .Values.velero.backupStorage.bucket }} - Region: {{ .Values.velero.backupStorage.region }} + Bucket: {{ (.Values.velero.backupStorage).bucket | default "not-configured" }} + Region: {{ (.Values.velero.backupStorage).region | default "us-east-1" }} + {{- end }} {{- end }} -{{- if .Values.zoonavigator.enabled }} +{{- if (.Values.zoonavigator).enabled }} - Zoo Navigator - Service Type: {{ .Values.zoonavigator.service.type }} + Service Type: {{ (.Values.zoonavigator.service).type | default "ClusterIP" }} {{- if .Values.zoonavigator.nodeSelector }} Node Selector: {{ .Values.zoonavigator.nodeSelector | toYaml | nindent 4 }} {{- end }} {{- end }} -{{- if .Values.opensearch.enabled }} +{{- if (.Values.opensearch).enabled }} - OpenSearch - Service Type: {{ .Values.opensearch.service.type }} + Service Type: {{ (.Values.opensearch.service).type | default "ClusterIP" }} {{- if .Values.opensearch.nodeSelector }} Node Selector: {{ .Values.opensearch.nodeSelector | toYaml | nindent 4 }} {{- end }} {{- end }} -{{- if index .Values "fluent-bit" "enabled" }} +{{- if (index .Values "fluent-bit" "enabled") }} - Fluent Bit - Service Type: {{ index .Values "fluent-bit" "service" "type" }} - {{- if index .Values "fluent-bit" "nodeSelector" }} - Node Selector: {{ index .Values "fluent-bit" "nodeSelector" | toYaml | nindent 4 }} + {{- $fb := index .Values "fluent-bit" }} + {{- if $fb.service }} + Service Type: {{ $fb.service.type | default "ClusterIP" }} + {{- end }} + {{- if $fb.nodeSelector }} + Node Selector: {{ $fb.nodeSelector | toYaml | nindent 4 }} {{- end }} {{- end }} @@ -199,6 +216,7 @@ HAProxy: kubectl port-forward svc/{{ .Release.Name }}-haproxy 8080:80 -n {{ .Release.Namespace }} Then access: http://localhost:8080 +{{- if (.Values.haproxy).route }} Enabled routes and their port-forward commands: {{- range $key, $value := .Values.haproxy.route }} {{- if $value }} @@ -208,21 +226,28 @@ Enabled routes and their port-forward commands: {{- end }} {{- end }} {{- end }} +{{- end }} -{{- if index .Values "fluent-bit" "enabled" }} +{{- if (index .Values "fluent-bit" "enabled") }} Log Forwarding: +{{- $fbLogs := (index .Values "fluent-bit" "logs") | default list }} +{{- if $fbLogs }} Fluent Bit is configured to forward logs from the following sources: -{{- range (index .Values "fluent-bit" "logs") }} +{{- range $fbLogs }} {{- if .enable }} - {{ .name }} ({{ .path }}) {{- end }} {{- end }} {{- else }} +Fluent Bit is enabled but no log sources are configured. +{{- end }} +{{- else }} Log Forwarding: Fluent Bit is disabled. Log forwarding is not currently set up. {{- end }} -{{- if .Values.curator.enabled }} +{{- if (.Values.curator).enabled }} +{{- if (.Values.curator).logs }} Log Management: Elasticsearch Curator is configured to manage the following log indices: {{- range .Values.curator.logs }} @@ -234,13 +259,11 @@ Elasticsearch Curator is configured to manage the following log indices: {{- end }} {{- end }} {{- end }} +{{- end }} {{- if .Values.grafana.enabled }} Grafana Dashboards: -The following dashboards have been imported into Grafana: -{{- range $key, $value := .Values.grafana.dashboardsConfigMaps }} - - {{ $value }} -{{- end }} +Dashboards are auto-provisioned by the Grafana sidecar from ConfigMaps with label grafana_dashboard=1. {{- end }} Note: Replace 'localhost' with the appropriate hostname or IP address if you're not accessing these services from the same machine where you're running the port-forward command. diff --git a/charts/common-services/templates/backup/deployment.yaml b/charts/common-services/templates/backup/deployment.yaml index 6efed853..08756773 100644 --- a/charts/common-services/templates/backup/deployment.yaml +++ b/charts/common-services/templates/backup/deployment.yaml @@ -89,18 +89,21 @@ spec: valueFrom: resourceFieldRef: resource: limits.memory + {{- $webhook := (.Values.backupManager).webhook | default dict }} + {{- $webhookBackup := $webhook.backup | default dict }} + {{- $webhookRestore := $webhook.restore | default dict }} - name: WEBHOOK_ENABLED - value: {{ .Values.backupManager.webhook.enabled | default false | quote }} + value: {{ $webhook.enabled | default false | quote }} - name: WATCH_SYNC_PERIOD - value: {{ .Values.backupManager.webhook.syncPeriod | default "1m" | quote }} + value: {{ $webhook.syncPeriod | default "1m" | quote }} - name: BACKUP_WEBHOOK_URL - value: {{ .Values.backupManager.webhook.backup.url | quote }} + value: {{ $webhookBackup.url | default "" | quote }} - name: BACKUP_WAIT_TIMEOUT - value: {{ .Values.backupManager.webhook.backup.timeout | default "1h" | quote }} + value: {{ $webhookBackup.timeout | default "1h" | quote }} - name: RESTORE_WEBHOOK_URL - value: {{ .Values.backupManager.webhook.restore.url | quote }} + value: {{ $webhookRestore.url | default "" | quote }} - name: RESTORE_WAIT_TIMEOUT - value: {{ .Values.backupManager.webhook.restore.timeout | default "1h" | quote }} + value: {{ $webhookRestore.timeout | default "1h" | quote }} - name: PODS_WAIT_TIMEOUT value: {{ .Values.backupManager.podsWaitTimeout | default "5m" | quote }} livenessProbe: diff --git a/charts/common-services/templates/cnpg/catalog.yaml b/charts/common-services/templates/cnpg/catalog.yaml index 130853e6..95c84478 100644 --- a/charts/common-services/templates/cnpg/catalog.yaml +++ b/charts/common-services/templates/cnpg/catalog.yaml @@ -1,8 +1,11 @@ -{{- if index .Values "cloudnative-pg" "enabled" }} +{{- if and (index .Values "cloudnative-pg" "enabled") (index .Values "cloudnative-pg" "imageCatalog" "enabled") (.Capabilities.APIVersions.Has "postgresql.cnpg.io/v1/ClusterImageCatalog") }} apiVersion: postgresql.cnpg.io/v1 kind: ClusterImageCatalog metadata: name: postgresql + annotations: + helm.sh/hook: post-install,post-upgrade + helm.sh/hook-weight: "10" spec: images: - major: 15 @@ -11,4 +14,7 @@ spec: image: ghcr.io/cloudnative-pg/postgresql:16.8 - major: 17 image: ghcr.io/cloudnative-pg/postgresql:17.5 -{{- end }} + {{- with (index .Values "cloudnative-pg" "imageCatalog" "images") }} + {{ . | toYaml | nindent 4 }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/common-services/templates/grafana-dashboards-metrics.yaml b/charts/common-services/templates/grafana-dashboards-metrics.yaml new file mode 100644 index 00000000..1bc2faa5 --- /dev/null +++ b/charts/common-services/templates/grafana-dashboards-metrics.yaml @@ -0,0 +1,304 @@ +{{/* ============================================================================ + Grafana Dashboards for Metrics-Enabled Services + + This template creates ConfigMaps for Grafana dashboards that are conditionally + imported based on whether the respective service is enabled. + + All dashboards are automatically provisioned when the service is enabled. + Prometheus scrapes metrics via service annotations (prometheus.io/scrape). + + Dashboards included: + - Elasticsearch - Search and analytics engine + - HAProxy - Load balancer / API gateway + - Fluent Bit - Log processor / forwarder + - Grafana - Self-monitoring metrics + - ZooKeeper - Coordination service + - Prometheus - Monitoring system self-metrics + - Kibana - Log visualization UI metrics + ============================================================================ */}} + +{{/* ---------------------------------------------------------------------------- + ArgoCD Dashboard + Condition: argo-cd.enabled = true + Metrics: argocd_* from controller, server, repo-server + Note: Application metrics appear when ArgoCD apps are deployed + ---------------------------------------------------------------------------- */}} +{{- if and (.Values.grafana.enabled) (index .Values "argo-cd" "enabled") }} +{{- if (.Files.Glob "dashboards/argo-cd/argocd-dashboard.json") }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: argocd-dashboard + labels: + grafana_dashboard: "1" + annotations: + grafana_folder: "GitOps" +data: +{{ (.Files.Glob "dashboards/argo-cd/argocd-dashboard.json").AsConfig | indent 2 }} +--- +{{- end }} +{{- end }} + +{{/* ---------------------------------------------------------------------------- + Elasticsearch Dashboard + Condition: elasticsearch.enabled = true + ---------------------------------------------------------------------------- */}} +{{- if and (.Values.grafana.enabled) (.Values.elasticsearch.enabled) }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: elasticsearch-dashboard + labels: + grafana_dashboard: "1" + annotations: + grafana_folder: "Database" +data: +{{ (.Files.Glob "dashboards/elasticsearch/elasticsearch-dashboard.json").AsConfig | indent 2 }} +--- +{{- end }} + +{{/* ---------------------------------------------------------------------------- + HAProxy Dashboard + Condition: haproxy.enabled = true + ---------------------------------------------------------------------------- */}} +{{- if and (.Values.grafana.enabled) (.Values.haproxy.enabled) }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: haproxy-dashboard + labels: + grafana_dashboard: "1" + annotations: + grafana_folder: "Infrastructure" +data: +{{ (.Files.Glob "dashboards/haproxy/haproxy-dashboard.json").AsConfig | indent 2 }} +--- +{{- end }} + +{{/* ---------------------------------------------------------------------------- + Fluent Bit Dashboard + Condition: fluent-bit.enabled = true + ---------------------------------------------------------------------------- */}} +{{- if and (.Values.grafana.enabled) (index .Values "fluent-bit" "enabled") }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: fluent-bit-dashboard + labels: + grafana_dashboard: "1" + annotations: + grafana_folder: "Logging" +data: +{{ (.Files.Glob "dashboards/fluent-bit/fluent-bit-dashboard.json").AsConfig | indent 2 }} +--- +{{- end }} + +{{/* ---------------------------------------------------------------------------- + Grafana Self-Monitoring Dashboard + Condition: grafana.enabled = true + ---------------------------------------------------------------------------- */}} +{{- if .Values.grafana.enabled }} +{{- if (.Files.Glob "dashboards/grafana/*.json") }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-metrics-dashboard + labels: + grafana_dashboard: "1" + annotations: + grafana_folder: "Observability" +data: +{{ (.Files.Glob "dashboards/grafana/*.json").AsConfig | indent 2 }} +--- +{{- end }} +{{- end }} + +{{/* ---------------------------------------------------------------------------- + ZooKeeper Dashboard + Condition: zookeeper.enabled = true + Metrics: JMX metrics via zookeeper-metrics service + ---------------------------------------------------------------------------- */}} +{{- if and (.Values.grafana.enabled) (.Values.zookeeper.enabled) }} +{{- if (.Files.Glob "dashboards/zookeeper/zookeeper-metrics-dashboard.json") }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: zookeeper-metrics-dashboard + labels: + grafana_dashboard: "1" + annotations: + grafana_folder: "Infrastructure" +data: +{{ (.Files.Glob "dashboards/zookeeper/zookeeper-metrics-dashboard.json").AsConfig | indent 2 }} +--- +{{- end }} +{{- end }} + +{{/* ---------------------------------------------------------------------------- + Prometheus Self-Monitoring Dashboard + Condition: prometheus.enabled = true + Metrics: prometheus_* self-metrics + ---------------------------------------------------------------------------- */}} +{{- if and (.Values.grafana.enabled) (.Values.prometheus.enabled) }} +{{- if (.Files.Glob "dashboards/prometheus/prometheus-dashboard.json") }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-dashboard + labels: + grafana_dashboard: "1" + annotations: + grafana_folder: "Observability" +data: +{{ (.Files.Glob "dashboards/prometheus/prometheus-dashboard.json").AsConfig | indent 2 }} +--- +{{- end }} +{{- end }} + +{{/* ---------------------------------------------------------------------------- + PostgreSQL Dashboard + Condition: postgresql.enabled = true + Metrics: pg_* from postgres_exporter sidecar + ---------------------------------------------------------------------------- */}} +{{- if and (.Values.grafana.enabled) (.Values.postgresql.enabled) }} +{{- if (.Files.Glob "dashboards/postgresql/postgresql-dashboard.json") }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: postgresql-dashboard + labels: + grafana_dashboard: "1" + annotations: + grafana_folder: "Database" +data: +{{ (.Files.Glob "dashboards/postgresql/postgresql-dashboard.json").AsConfig | indent 2 }} +--- +{{- end }} +{{- end }} + +{{/* ---------------------------------------------------------------------------- + Kibana Dashboard + Condition: kibana.enabled = true + Metrics: kibana_* from custom prometheus exporter sidecar + Exporter: https://github.com/gnanirahulnutakki/kibana-prometheus-exporter + ---------------------------------------------------------------------------- */}} +{{- if and (.Values.grafana.enabled) (.Values.kibana.enabled) }} +{{- if (.Files.Glob "dashboards/kibana/kibana-dashboard.json") }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: kibana-dashboard + labels: + grafana_dashboard: "1" + annotations: + grafana_folder: "Logging" +data: +{{ (.Files.Glob "dashboards/kibana/kibana-dashboard.json").AsConfig | indent 2 }} +--- +{{- end }} +{{- end }} + +{{/* ---------------------------------------------------------------------------- + OpenSearch Dashboard + Condition: opensearch.enabled = true + Metrics: opensearch_* cluster and node metrics + ---------------------------------------------------------------------------- */}} +{{- if and (.Values.grafana.enabled) (.Values.opensearch.enabled) }} +{{- if (.Files.Glob "dashboards/opensearch/opensearch-dashboard.json") }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: opensearch-dashboard + labels: + grafana_dashboard: "1" + annotations: + grafana_folder: "Infrastructure" +data: +{{ (.Files.Glob "dashboards/opensearch/opensearch-dashboard.json").AsConfig | indent 2 }} +--- +{{- end }} +{{- end }} + +{{/* ---------------------------------------------------------------------------- + Velero Dashboard + Condition: velero.enabled = true + Metrics: velero_* backup and restore metrics + ---------------------------------------------------------------------------- */}} +{{- if and (.Values.grafana.enabled) (.Values.velero.enabled) }} +{{- if (.Files.Glob "dashboards/velero/velero-dashboard.json") }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: velero-dashboard + labels: + grafana_dashboard: "1" + annotations: + grafana_folder: "Backup" +data: +{{ (.Files.Glob "dashboards/velero/velero-dashboard.json").AsConfig | indent 2 }} +--- +{{- end }} +{{- end }} + +{{/* ---------------------------------------------------------------------------- + Loki Dashboard + Condition: loki.enabled = true + Metrics: loki_* log aggregation metrics + ---------------------------------------------------------------------------- */}} +{{- if and (.Values.grafana.enabled) (.Values.loki.enabled) }} +{{- if (.Files.Glob "dashboards/loki/*.json") }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: loki-dashboard + labels: + grafana_dashboard: "1" + annotations: + grafana_folder: "Logging" +data: +{{ (.Files.Glob "dashboards/loki/*.json").AsConfig | indent 2 }} +--- +{{- end }} +{{- end }} + +{{/* ---------------------------------------------------------------------------- + Flink Dashboard + Condition: flink-kubernetes-operator.enabled = true + Metrics: flink_* stream processing metrics + ---------------------------------------------------------------------------- */}} +{{- if and (.Values.grafana.enabled) (index .Values "flink-kubernetes-operator" "enabled") }} +{{- if (.Files.Glob "dashboards/flink/flink-dashboard.json") }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: flink-dashboard + labels: + grafana_dashboard: "1" + annotations: + grafana_folder: "Data Processing" +data: +{{ (.Files.Glob "dashboards/flink/flink-dashboard.json").AsConfig | indent 2 }} +--- +{{- end }} +{{- end }} + +{{/* ---------------------------------------------------------------------------- + Alloy Dashboard + Condition: alloy.enabled = true + Metrics: alloy cluster and pipeline metrics + ---------------------------------------------------------------------------- */}} +{{- if and (.Values.grafana.enabled) (.Values.alloy.enabled) }} +{{- if (.Files.Glob "dashboards/alloy/alloy-cluster-dashboard.json") }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: alloy-dashboard + labels: + grafana_dashboard: "1" + annotations: + grafana_folder: "Infrastructure" +data: +{{ (.Files.Glob "dashboards/alloy/alloy-cluster-dashboard.json").AsConfig | indent 2 }} +--- +{{- end }} +{{- end }} diff --git a/charts/common-services/templates/kibana/kibana-metrics-service.yaml b/charts/common-services/templates/kibana/kibana-metrics-service.yaml new file mode 100644 index 00000000..f3d084cf --- /dev/null +++ b/charts/common-services/templates/kibana/kibana-metrics-service.yaml @@ -0,0 +1,20 @@ +{{- if .Values.kibana.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: kibana-metrics + labels: + app: kibana + component: metrics + {{- include "common-services.labels" . | nindent 4 }} +spec: + type: ClusterIP + ports: + - name: metrics + port: 9684 + targetPort: 9684 + protocol: TCP + selector: + app: kibana + release: {{ .Release.Name }} +{{- end }} diff --git a/charts/common-services/templates/kibana/kibana-servicemonitor.yaml b/charts/common-services/templates/kibana/kibana-servicemonitor.yaml new file mode 100644 index 00000000..88e19373 --- /dev/null +++ b/charts/common-services/templates/kibana/kibana-servicemonitor.yaml @@ -0,0 +1,23 @@ +{{- if and .Values.kibana.enabled .Values.prometheus.enabled (((.Values.kibana).serviceMonitor).enabled | default false) }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: kibana + labels: + app: kibana + release: prometheus + {{- include "common-services.labels" . | nindent 4 }} +spec: + selector: + matchLabels: + app: kibana + component: metrics + namespaceSelector: + matchNames: + - {{ .Release.Namespace }} + endpoints: + - port: metrics + interval: 30s + scrapeTimeout: 10s + path: /metrics +{{- end }} diff --git a/charts/common-services/templates/postgres/postgres-backup-cronjob.yaml b/charts/common-services/templates/postgres/postgres-backup-cronjob.yaml index 1a9b6d5f..7776dd79 100644 --- a/charts/common-services/templates/postgres/postgres-backup-cronjob.yaml +++ b/charts/common-services/templates/postgres/postgres-backup-cronjob.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.postgresql.enabled (and ((.Values.postgresql).backup).enabled | default false) }} +{{- if and .Values.postgresql.enabled (((.Values.postgresql).backup).enabled | default false) }} apiVersion: batch/v1 kind: CronJob metadata: diff --git a/charts/common-services/templates/postgres/postgres-init-script-configmap.yaml b/charts/common-services/templates/postgres/postgres-init-script-configmap.yaml index 15415e60..78773468 100644 --- a/charts/common-services/templates/postgres/postgres-init-script-configmap.yaml +++ b/charts/common-services/templates/postgres/postgres-init-script-configmap.yaml @@ -1,17 +1,22 @@ -{{- if eq .Values.postgresql.enabled true }} +{{- if and .Values.postgresql.enabled ((.Values.postgresql).databases) }} apiVersion: v1 kind: ConfigMap metadata: name: postgres-init-script data: + {{- if (.Values.postgresql.databases).sdc }} 01_sdc_init_script.sql: | CREATE DATABASE {{ .Values.postgresql.databases.sdc.databaseName }}; CREATE USER {{ .Values.postgresql.databases.sdc.user }} WITH ENCRYPTED PASSWORD '{{ .Values.postgresql.databases.sdc.password }}'; GRANT ALL PRIVILEGES ON DATABASE agentsdb TO {{ .Values.postgresql.databases.sdc.user }}; + {{- end }} + {{- if (.Values.postgresql.databases).eoc }} 02_eoc_init_script.sql: | CREATE DATABASE {{ .Values.postgresql.databases.eoc.databaseName }}; CREATE USER {{ .Values.postgresql.databases.eoc.user }} WITH ENCRYPTED PASSWORD '{{ .Values.postgresql.databases.eoc.password }}'; GRANT ALL PRIVILEGES ON DATABASE eocdb TO {{ .Values.postgresql.databases.eoc.user }}; + {{- end }} + {{- if and (.Values.postgresql.databases).eoc (.Values.postgresql.databases).sdc }} XX_create_schema_init_script.sh: | #!/bin/bash PGPASSWORD=$POSTGRES_PASSWORD psql -v ON_ERROR_STOP=1 <<-EOSQL @@ -24,4 +29,5 @@ data: GRANT ALL ON SCHEMA {{ .Values.postgresql.databases.sdc.schema }} TO {{ .Values.postgresql.databases.sdc.user }}; GRANT ALL ON SCHEMA public TO {{ .Values.postgresql.databases.sdc.user }}; EOSQL + {{- end }} {{- end }} diff --git a/charts/common-services/templates/postgres/postgres-restore-cronjob.yaml b/charts/common-services/templates/postgres/postgres-restore-cronjob.yaml index c056f0ba..9821acd3 100644 --- a/charts/common-services/templates/postgres/postgres-restore-cronjob.yaml +++ b/charts/common-services/templates/postgres/postgres-restore-cronjob.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.postgresql.enabled (and ((.Values.postgresql).backup).enabled | default false) }} +{{- if and .Values.postgresql.enabled (((.Values.postgresql).backup).enabled | default false) }} apiVersion: batch/v1 kind: CronJob metadata: diff --git a/charts/common-services/values.yaml b/charts/common-services/values.yaml index 0ff99eb3..292bff22 100644 --- a/charts/common-services/values.yaml +++ b/charts/common-services/values.yaml @@ -12,6 +12,9 @@ global: hibernate: false + postgresql: + auth: + database: eocdb nodeSelector: {} @@ -45,14 +48,26 @@ zookeeper: enabled: false volumePermissions: enabled: false - # image: - # repository: radiantone/bitnami-shell - # tag: 11-debian-11-r51 image: repository: radiantone/zookeeper tag: 3.8.0-debian-11-r56 fullnameOverride: zookeeper nodeSelector: {} + metrics: + enabled: true + containerPort: 9141 + service: + type: ClusterIP + port: 9141 + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9141" + prometheus.io/path: "/metrics" + serviceMonitor: + enabled: false + interval: 30s + additionalLabels: + release: prometheus # Argo CD Configuration # --------------------- @@ -88,15 +103,71 @@ argo-cd: enabled: false controller: nodeSelector: {} + metrics: + enabled: true + service: + servicePort: 8082 + portName: http-metrics + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8082" + prometheus.io/path: "/metrics" + serviceMonitor: + enabled: false + interval: 30s + additionalLabels: + release: prometheus + rules: + enabled: false redis: nodeSelector: {} + metrics: + enabled: true + service: + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9121" + prometheus.io/path: "/metrics" + serviceMonitor: + enabled: false + additionalLabels: + release: prometheus server: nodeSelector: {} service: type: NodePort + metrics: + enabled: true + service: + servicePort: 8083 + portName: http-metrics + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8083" + prometheus.io/path: "/metrics" + serviceMonitor: + enabled: false + interval: 30s + additionalLabels: + release: prometheus repoServer: nodeSelector: {} + metrics: + enabled: true + service: + servicePort: 8084 + portName: http-metrics + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8084" + prometheus.io/path: "/metrics" + serviceMonitor: + enabled: false + interval: 30s + additionalLabels: + release: prometheus crds: + install: true keep: false # Disabling new components that didn't exist in 5.6.0 commitServer: @@ -127,14 +198,13 @@ prometheus: configmapReload: prometheus: enabled: false - # nodeExporter: - # enabled: false - # kubeStateMetrics: - # enabled: false + # NOTE: Use subchart dependency names (not aliases) for enable/disable: + # prometheus-node-exporter.enabled (NOT nodeExporter.enabled) + # kube-state-metrics.enabled (NOT kubeStateMetrics.enabled) kube-state-metrics: enabled: false prometheus-node-exporter: - enabled: true + enabled: false alertmanager: enabled: false # pushgateway: @@ -149,6 +219,7 @@ prometheus: - web.route-prefix=/ - web.external-url=http://prometheus-server/prometheus/ - web.enable-admin-api + - web.enable-remote-write-receiver fullnameOverride: prometheus-server nodeSelector: {} # Persistence enabled by default and size to 50Gi @@ -175,11 +246,23 @@ prometheus: # # Note: Ensure correct data source URLs and credentials. Place dashboard JSON files in the specified directories. grafana: - fullnameOverride: grafana enabled: true + fullnameOverride: grafana nodeSelector: {} + # Sidecar configuration for dashboard provisioning + sidecar: + dashboards: + enabled: true + folderAnnotation: grafana_folder + provider: + foldersFromFilesStructure: true service: type: ClusterIP + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "3000" + # Path matches grafana.ini server.root_url subpath + prometheus.io/path: "/eoc-backend/grafana/metrics" # Persistence enabled by default persistence: enabled: true @@ -206,6 +289,9 @@ grafana: enabled: true host: smtp-server:25 from_address: saas@radiantlogic.com + dataproxy: + timeout: 90 + keep_alive_seconds: 30 live: allowed_origins: "*" # Setup Data Source (prometheus and elastic) @@ -232,6 +318,7 @@ grafana: logMessageField: message maxConcurrentShardRequests: 5 timeField: '@timestamp' + timeout: 90 readonly: true - name: OpenSearch type: opensearch @@ -252,624 +339,25 @@ grafana: # timeout: 60 # maxLines: 1000 - dashboardProviders: - dashboardproviders.yaml: - apiVersion: 1 - providers: - - name: 'fid' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/fid - - name: 'zookeeper' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/zookeeper - - name: 'iddm' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/iddm - - name: 'elasticsearch' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/elasticsearch - - name: 'service-status' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/service-status - # -- IDA Metrics Dashboards - - name: 'ia-service-status' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ia-service-status - - name: 'ida' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ida - - name: ia-controller-beam - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ia-controller-beam - - name: 'ia-controller-ecto' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ia-controller-ecto - - name: 'ia-controller-oban' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ia-controller-oban - - name: 'ia-controller-phoenix' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ia-controller-phoenix - - name: 'ia-data-ingestion' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ia-data-ingestion - - name: 'ia-extractor-webapp' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ia-extractor-webapp - - name: 'ia-governance-portal' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ia-governance-portal - # -- IDO Logs Dashboards - - name: 'ido-logs-alert-center' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-alert-center - - name: 'ido-logs-analytics-db' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-analytics-db - - name: 'ido-logs-api-umbrella' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-api-umbrella - - name: 'ido-logs-environment-setup' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-environment-setup - - name: 'ido-logs-global-scheduler' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-global-scheduler - - name: 'ido-logs-iddm-api-gateway' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-iddm-api-gateway - - name: 'ido-logs-iddm-authentication-service' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-iddm-authentication-service - - name: 'ido-logs-iddm-core' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-iddm-core - - name: 'ido-logs-iddm-data-catalog' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-iddm-data-catalog - - name: 'ido-logs-iddm-directory-browser' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-iddm-directory-browser - - name: 'ido-logs-iddm-directory-namespace' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-iddm-directory-namespace - - name: 'ido-logs-iddm-directory-schema' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-iddm-directory-schema - - name: 'ido-logs-iddm-proxy' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-iddm-proxy - - name: 'ido-logs-iddm-settings' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-iddm-settings - - name: 'ido-logs-iddm-system-administration' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-iddm-system-administration - - name: 'ido-logs-iddm-ui' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-iddm-ui - - name: 'ido-logs-iddm-zookeeper' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-iddm-zookeeper - - name: 'ido-logs-portal' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-portal - - name: 'ido-logs-portal-setup' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-portal-setup - - name: 'ido-logs-ledger-operations' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-ledger-operations - - name: 'ido-logs-nats' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-nats - - name: 'ido-logs-nebula-graphd' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-nebula-graphd - - name: 'ido-logs-nebula-metad' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-nebula-metad - - name: 'ido-logs-nebula-storaged' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-nebula-storaged - - name: 'ido-logs-observation-supervisor' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-observation-supervisor - - name: 'ido-logs-pipeline-job-manager' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-pipeline-job-manager - - name: 'ido-logs-pipeline-orchestrator' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-pipeline-orchestrator - - name: 'ido-logs-pipeline-task-manager' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-pipeline-task-manager - - name: 'ido-logs-schema-manager' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-schema-manager - - name: 'ido-logs-secret-manager' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-secret-manager - - name: 'ido-logs-writeback' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-logs-writeback - # -- IDO Metrics Dashboards - - name: 'ido-metrics-alert-center' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-metrics-alert-center - - name: 'ido-metrics-graph-database' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-metrics-graph-database - - name: 'ido-metrics-iddm-core' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-metrics-iddm-core - - name: 'ido-metrics-ido-dashboard' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-metrics-ido-dashboard - - name: 'ido-metrics-observations-channels-contention' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-metrics-observations-channels-contention - - name: 'ido-metrics-observations-functional' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-metrics-observations-functional - - name: 'ido-metrics-observations-process' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-metrics-observations-process - - name: 'ido-metrics-observations-timings' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-metrics-observations-timings - - name: 'ido-metrics-system' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-metrics-system - - name: 'ido-metrics-writeback-service' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - editable: true - options: - path: /var/lib/grafana/dashboards/ido-metrics-writeback-service - - dashboardsConfigMaps: - # -- FID Dashboards - fid: "fid-dashboard" - zookeeper: "zookeeper-dashboard" - iddm: "iddm-dashboard" - elasticsearch: "audit-logs-elastic-dashboard" - service-status: "service-status-dashboard" - # -- IDA Metrics Dashboards - ia-service-status: "ia-service-status-dashboard" - ida: "ia-dashboard" - ia-controller-beam: "ia-controller-beam-dashboard" - ia-controller-ecto: "ia-controller-ecto-dasboard" - ia-controller-oban: "ia-controller-oban-dashboard" - ia-controller-phoenix: "ia-controller-phoenix-dashboard" - ia-data-ingestion: "ia-data-ingestion-dashboard" - ia-extractor-webapp: "ia-extractor-webapp-dashboard" - ia-governance-portal: "ia-governance-portal-dashboard" - # -- IDO Logs Dashboards - ido-logs-alert-center: "ido-logs-alert-center-dashboard" - ido-logs-analytics-db: "ido-logs-analytics-db-dashboard" - ido-logs-api-umbrella: "ido-logs-api-umbrella-dashboard" - ido-logs-environment-setup: "ido-logs-environment-setup-dashboard" - ido-logs-global-scheduler: "ido-logs-global-scheduler-dashboard" - ido-logs-iddm-api-gateway: "ido-logs-iddm-api-gateway-dashboard" - ido-logs-iddm-authentication-service: "ido-logs-iddm-authentication-service-dashboard" - ido-logs-iddm-core: "ido-logs-iddm-core-dashboard" - ido-logs-iddm-data-catalog: "ido-logs-iddm-data-catalog-dashboard" - ido-logs-iddm-directory-browser: "ido-logs-iddm-directory-browser-dashboard" - ido-logs-iddm-directory-namespace: "ido-logs-iddm-directory-namespace-dashboard" - ido-logs-iddm-directory-schema: "ido-logs-iddm-directory-schema-dashboard" - ido-logs-iddm-proxy: "ido-logs-iddm-proxy-dashboard" - ido-logs-iddm-settings: "ido-logs-iddm-settings-dashboard" - ido-logs-iddm-system-administration: "ido-logs-iddm-system-administration-dashboard" - ido-logs-iddm-ui: "ido-logs-iddm-ui-dashboard" - ido-logs-iddm-zookeeper: "ido-logs-iddm-zookeeper-dashboard" - ido-logs-portal: "ido-logs-portal-dashboard" - ido-logs-portal-setup: "ido-logs-portal-setup-dashboard" - ido-logs-ledger-operations: "ido-logs-ledger-operations-dashboard" - ido-logs-nats: "ido-logs-nats-dashboard" - ido-logs-nebula-graphd: "ido-logs-nebula-graphd-dashboard" - ido-logs-nebula-metad: "ido-logs-nebula-metad-dashboard" - ido-logs-nebula-storaged: "ido-logs-nebula-storaged-dashboard" - ido-logs-observation-supervisor: "ido-logs-observation-supervisor-dashboard" - ido-logs-pipeline-job-manager: "ido-logs-pipeline-job-manager-dashboard" - ido-logs-pipeline-orchestrator: "ido-logs-pipeline-orchestrator-dashboard" - ido-logs-pipeline-task-manager: "ido-logs-pipeline-task-manager-dashboard" - ido-logs-schema-manager: "ido-logs-schema-manager-dashboard" - ido-logs-secret-manager: "ido-logs-secret-manager-dashboard" - ido-logs-writeback: "ido-logs-writeback-dashboard" - # -- IDO Metrics Dashboards - ido-metrics-alert-center: "ido-metrics-alert-center-dashboard" - ido-metrics-graph-database: "ido-metrics-graph-database-dashboard" - ido-metrics-iddm-core: "ido-metrics-iddm-core-dashboard" - ido-metrics-ido-dashboard: "ido-metrics-ido-dashboard-dashboard" - ido-metrics-observations-channels-contention: "ido-metrics-observations-channels-contention-dashboard" - ido-metrics-observations-functional: "ido-metrics-observations-functional-dashboard" - ido-metrics-observations-process: "ido-metrics-observations-process-dashboard" - ido-metrics-observations-timings: "ido-metrics-observations-timings-dashboard" - ido-metrics-system: "ido-metrics-system-dashboard" - ido-metrics-writeback-service: "ido-metrics-writeback-service-dashboard" + # dashboardProviders: NOT NEEDED + # -------------------------------- + # Dashboard provisioning is handled entirely by the Grafana sidecar. + # The sidecar creates its own provider that reads from /tmp/dashboards. + # It auto-discovers ConfigMaps with label "grafana_dashboard: 1" and + # organizes them into folders using the "grafana_folder" annotation. + # + # DO NOT add dashboardProviders entries — they require matching + # dashboardsConfigMaps volume mounts which break when services are disabled. + dashboardProviders: {} + + # Dashboard provisioning is handled by the Grafana sidecar (sidecar.dashboards.enabled: true). + # The sidecar auto-discovers ConfigMaps with the label "grafana_dashboard: 1" and + # organizes them into folders using the "grafana_folder" annotation. + # See grafana-dashboards*.yaml templates for all available dashboards. + # + # DO NOT use dashboardProviders or dashboardsConfigMaps — they create static volume + # mounts that break when a service is disabled (missing ConfigMap → FailedMount). + dashboardsConfigMaps: {} # Elasticsearch & Kibana Configuration # ----------------------------------- @@ -888,6 +376,7 @@ grafana: elasticsearch: enabled: true + imageTag: "7.17.25" replicas: 1 nodeSelector: {} service: @@ -911,16 +400,19 @@ elasticsearch: kibana: enabled: true + imageTag: "7.17.25" fullnameOverride: kibana nodeSelector: {} service: type: ClusterIP - # extraEnvs: - # - name: ELASTICSEARCH_USERNAME - # value: elastic - # - name: ELASTICSEARCH_PASSWORD - # value: changeme + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9684" + prometheus.io/path: "/metrics" healthCheckPath: "/eoc-backend/kibana" + serviceMonitor: + enabled: false + kibanaConfig: kibana.yml: | server.basePath: "/eoc-backend/kibana" @@ -930,6 +422,43 @@ kibana: server.rewriteBasePath: true server.compression.enabled: true server.requestId.allowFromAnyIp: true + # Add exporter sidecar for metrics + # Using custom exporter: https://github.com/gnanirahulnutakki/kibana-prometheus-exporter + extraContainers: + - name: kibana-exporter + image: rahulnutakki/kibana-prometheus-exporter:v1.1.0 + args: + - --kibana-url=http://localhost:5601/eoc-backend/kibana + - --log-level=info + # Authentication for Kibana exporter (optional) + # The exporter skips auth when username is empty. + # For authenticated clusters, uncomment: + # - '--kibana-username=elastic' + # - '--kibana-password=' + # Or use env vars: KIBANA_USERNAME, KIBANA_PASSWORD + ports: + - containerPort: 9684 + name: metrics + protocol: TCP + resources: + limits: + cpu: 100m + memory: 64Mi + requests: + cpu: 10m + memory: 32Mi + livenessProbe: + httpGet: + path: /health + port: metrics + initialDelaySeconds: 90 + periodSeconds: 30 + failureThreshold: 6 + # No readinessProbe — the exporter is a metrics sidecar, not a traffic-serving + # container. Pod readiness is determined by the main kibana container. + # The /ready endpoint depends on Kibana's stats API which may 404 on certain + # Kibana versions (7.17.x) or during startup, causing the entire pod to become + # not-ready and blocking helm --wait upgrades. # HAProxy Configuration # --------------------- @@ -965,6 +494,18 @@ haproxy: nodeSelector: {} service: type: NodePort + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8404" + prometheus.io/path: "/metrics" + containerPorts: + prometheus: 8404 + serviceMonitor: + enabled: false + endpoints: + - port: prometheus + path: /metrics + interval: 30s config: | defaults timeout connect 10s @@ -974,6 +515,13 @@ haproxy: mode http option httplog maxconn 3000 + frontend stats + bind :8404 + http-request use-service prometheus-exporter if { path /metrics } + stats enable + stats uri /stats + stats refresh 10s + frontend http-in bind *:80 @@ -1060,6 +608,7 @@ haproxy: {{- end }} {{- if ((.Values.route).grafana | default false) }} backend grafana_backend + timeout server 90s http-request set-path %[path,regsub(^/grafana/?,/)] server grafana grafana:80 check {{- end }} @@ -1155,10 +704,32 @@ postgresql: # repository: radiantone/bitnami-shell # tag: 11-debian-11-r57 metrics: - enabled: false - # image: - # repository: radiantone/postgres-exporter - # tag: 0.11.1-debian-11-r34 + enabled: true + image: + repository: radiantone/postgres-exporter + tag: 0.11.1-debian-11-r34 + service: + ports: + metrics: 9187 + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9187" + serviceMonitor: + enabled: false + interval: 30s + labels: + release: prometheus + prometheusRule: + enabled: false + rules: + - alert: PostgresqlDown + expr: pg_up == 0 + for: 0m + labels: + severity: critical + annotations: + summary: "PostgreSQL down" + description: "PostgreSQL instance is down" fullnameOverride: postgresql image: repository: radiantone/postgresql @@ -1393,6 +964,17 @@ opensearch: value: "true" - name: "DISABLE_INSTALL_DEMO_CONFIG" value: "true" + # Install Prometheus exporter plugin for metrics + plugins: + enabled: true + installList: + - https://github.com/Aiven-Open/prometheus-exporter-plugin-for-opensearch/releases/download/2.11.0.0/prometheus-exporter-2.11.0.0.zip + # Add pod annotations for Prometheus scraping + podAnnotations: + prometheus.io/scrape: "true" + prometheus.io/path: "/_prometheus/metrics" + prometheus.io/port: "9200" + prometheus.io/scheme: "http" rbac: create: false serviceAccountAnnotations: {} @@ -1476,10 +1058,28 @@ fluent-bit: metricsPort: 2020 service: type: ClusterIP + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "2020" + prometheus.io/path: "/api/v1/metrics/prometheus" prometheusRule: enabled: false + rules: + - alert: FluentBitNoOutputBytes + expr: rate(fluentbit_output_proc_bytes_total[5m]) == 0 + for: 15m + labels: + severity: warning + annotations: + summary: "Fluent Bit not processing output bytes" serviceMonitor: enabled: false + interval: 10s + scrapeTimeout: 10s + dashboards: + enabled: true + labelKey: grafana_dashboard + labelValue: "1" # ====== For helm chart version < 1.0.9 ===== outputSearchType: "es" outputSearchHost: "elasticsearch-master" @@ -1548,12 +1148,12 @@ fluent-bit: # Curator can perform actions like deleting, closing, or creating indices based on configured filters. curator: + enabled: true image: repository: radiantone/elasticsearch-curator-archived pullPolicy: Always tag: 5.8.4-debian-10-r253 imagePullSecrets: [] - enabled: true dryrun: false # Elasticsearch Client Settings # ----------------------------- @@ -1721,6 +1321,35 @@ velero: volumeMounts: - mountPath: /target name: plugins + metrics: + enabled: true + scrapeInterval: 30s + scrapeTimeout: 10s + podAnnotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8085" + prometheus.io/path: "/metrics" + serviceMonitor: + enabled: false + autodetect: true + additionalLabels: + release: prometheus + nodeAgentPodMonitor: + enabled: false + autodetect: true + additionalLabels: + release: prometheus + prometheusRule: + enabled: false + autodetect: true + spec: + - alert: VeleroBackupFailed + annotations: + message: "Velero backup {{ $labels.schedule }} has failed" + expr: velero_backup_last_status{schedule!=""} != 1 + for: 15m + labels: + severity: warning # Zoo Navigator Configuration # ---------------------------- @@ -1783,12 +1412,27 @@ cloudnative-pg: data: INHERITED_LABELS: app.kubernetes.io/*, radiantlogic.io/*, *.radiantlogic.io/* INHERITED_ANNOTATIONS: meta.helm.sh/*, helm.sh/*, radiantlogic.io/*, *.radiantlogic.io/* + imageCatalog: + enabled: false + images: [] + # images: + # - major: 18 + # image: ghcr.io/cloudnative-pg/postgresql:18.1 nodeSelector: {} tolerations: [] affinity: {} # CRDs are installed by a dedicated job crds: create: false + # Monitoring configuration + monitoring: + podMonitorEnabled: false + podMonitorAdditionalLabels: + release: prometheus + grafanaDashboard: + create: false + labels: + grafana_dashboard: "1" # ----------------------------------------------------------------- # -- Identity Observability - Nebula Operator for Graph Database @@ -1827,6 +1471,10 @@ flink-kubernetes-operator: imagePullSecrets: [] operatorPod: nodeSelector: {} + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9999" + prometheus.io/path: "/metrics" webhook: create: false @@ -1907,7 +1555,7 @@ loki: enabled: false fullnameOverride: loki - deploymentMode: SimpleScalable + deploymentMode: SingleBinary imagePullSecrets: [] test: @@ -1918,13 +1566,18 @@ loki: enabled: false loki: + # Prometheus annotations for metrics discovery (port 3100, built-in /metrics endpoint) + serviceAnnotations: + prometheus.io/scrape: "true" + prometheus.io/port: "3100" + prometheus.io/path: "/metrics" # multi-tenancy disabled by default auth_enabled: false schemaConfig: configs: - from: "2024-04-01" store: tsdb - object_store: s3 + object_store: filesystem schema: v13 index: prefix: loki_index_ @@ -1979,15 +1632,15 @@ loki: compactor: retention_enabled: true retention_delete_delay: 1h - delete_request_store: s3 + delete_request_store: filesystem storage: - type: s3 - bucketNames: {} + type: filesystem + # bucketNames: {} # chunks: duploservices-rli-ops10-svc-common-125328463501 # ruler: duploservices-rli-ops10-svc-common-125328463501 # admin: duploservices-rli-ops10-svc-common-125328463501 - s3: {} + # s3: {} # region: eu-west-3 # s3forcepathstyle: true @@ -2002,7 +1655,7 @@ loki: nodeSelector: {} backend: - replicas: 3 + replicas: 0 persistence: # Usage: # - Local Temporal (TSDB) Index: index cache for faster queries @@ -2018,11 +1671,11 @@ loki: nodeSelector: {} read: - replicas: 3 + replicas: 0 nodeSelector: {} write: - replicas: 3 + replicas: 0 persistence: # Usage: # - Write-Ahead Log (WAL): ensure data durability before pushing to S3 @@ -2038,6 +1691,20 @@ loki: size: 50Gi nodeSelector: {} + # Monitoring configuration + monitoring: + serviceMonitor: + enabled: false + interval: 15s + labels: {} + dashboards: + enabled: true + labels: + grafana_dashboard: "1" + rules: + enabled: false + alerting: true + # ----------------------------------------------------------------- # Alloy # https://grafana.com/docs/loki/latest/send-data/alloy/ @@ -2050,6 +1717,13 @@ alloy: image: pullSecrets: [] + # Service configuration with Prometheus annotations + service: + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "12345" + prometheus.io/path: "/metrics" + # Do not install CRDs for monitoring crds: create: false @@ -2315,3 +1989,79 @@ alloy: // Limit concurrent streams max_streams = 5000 } + + // ========== Self-monitoring for Alloy ========== + prometheus.exporter.self "alloy_metrics" {} + + prometheus.scrape "alloy" { + targets = prometheus.exporter.self.alloy_metrics.targets + forward_to = [prometheus.remote_write.default.receiver] + scrape_interval = "15s" + job_name = "alloy" + } + + // ServiceMonitor support + prometheus.operator.servicemonitors "services" { + forward_to = [prometheus.remote_write.default.receiver] + } + + // PodMonitor support + prometheus.operator.podmonitors "pods" { + forward_to = [prometheus.remote_write.default.receiver] + } + + // Remote write to Prometheus + prometheus.remote_write "default" { + endpoint { + name = "prometheus" + url = "http://prometheus-server:9090/api/v1/write" + + queue_config { + capacity = 10000 + max_samples_per_send = 2000 + max_shards = 10 + } + } + } + +# ----------------------------------------------------------------- +# -- Prometheus Elasticsearch Exporter for Elasticsearch Metrics +# ----------------------------------------------------------------- +prometheus-elasticsearch-exporter: + enabled: false + fullnameOverride: elasticsearch-exporter + + # Service annotations for Prometheus scraping + service: + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9108" + prometheus.io/path: "/metrics" + + # Elasticsearch connection settings + es: + uri: http://elasticsearch-master:9200 + all: true + indices: true + shards: true + snapshots: true + cluster_settings: true + + serviceMonitor: + enabled: false + interval: 30s + scrapeTimeout: 10s + labels: + release: prometheus + +# ----------------------------------------------------------------- +# -- Enable All Services for Metrics Testing +# ----------------------------------------------------------------- +# All services are now enabled by default with metrics configured. +# To disable specific services, override in your values: +# +# Example: +# zookeeper: +# enabled: false +# fluent-bit: +# enabled: false diff --git a/values-qaibtest.yaml b/values-qaibtest.yaml new file mode 100644 index 00000000..42406fdf --- /dev/null +++ b/values-qaibtest.yaml @@ -0,0 +1,177 @@ +# Overrides for duploservices-qaibtest deployment +# Chart version: 2.0.1 +# Purpose: Full E2E testing of metrics and dashboards +global: + hibernate: false + postgresql: + auth: + database: eocdb + +nodeSelector: + tenantname: duploservices-qaibtest + +# ---------------------------------------------------------------------------- +# Core Services with Metrics/Dashboards +# ---------------------------------------------------------------------------- + +zookeeper: + enabled: true + nodeSelector: { tenantname: duploservices-qaibtest } + metrics: + enabled: true + serviceMonitor: + enabled: true + additionalLabels: { release: prometheus } + +postgresql: + enabled: true + primary: + nodeSelector: { tenantname: duploservices-qaibtest } + metrics: + enabled: true + image: + # Use latest tag as specific versions may have issues + tag: latest + serviceMonitor: + enabled: true + labels: { release: prometheus } + +argo-cd: + enabled: true + crds: + install: true + controller: + nodeSelector: { tenantname: duploservices-qaibtest } + metrics: + enabled: true + serviceMonitor: + enabled: true + additionalLabels: { release: prometheus } + server: + nodeSelector: { tenantname: duploservices-qaibtest } + metrics: + enabled: true + serviceMonitor: + enabled: true + additionalLabels: { release: prometheus } + repoServer: + nodeSelector: { tenantname: duploservices-qaibtest } + metrics: + enabled: true + serviceMonitor: + enabled: true + additionalLabels: { release: prometheus } + redis: + nodeSelector: { tenantname: duploservices-qaibtest } + metrics: + enabled: true + serviceMonitor: + enabled: true + additionalLabels: { release: prometheus } + +haproxy: + enabled: true + nodeSelector: { tenantname: duploservices-qaibtest } + serviceMonitor: + enabled: true + +elasticsearch: + enabled: true + nodeSelector: { tenantname: duploservices-qaibtest } + +kibana: + enabled: true + nodeSelector: { tenantname: duploservices-qaibtest } + # Disable sidecar exporter to avoid image pull errors + extraContainers: [] + +prometheus-elasticsearch-exporter: + enabled: true + nodeSelector: { tenantname: duploservices-qaibtest } + serviceMonitor: + enabled: true + labels: { release: prometheus } + +cloudnative-pg: + enabled: true + nodeSelector: { tenantname: duploservices-qaibtest } + monitoring: + podMonitorEnabled: true + podMonitorAdditionalLabels: { release: prometheus } + +fluent-bit: + enabled: true + nodeSelector: { tenantname: duploservices-qaibtest } + serviceMonitor: + enabled: true + +# ---------------------------------------------------------------------------- +# Prometheus Configuration +# ---------------------------------------------------------------------------- +prometheus: + enabled: true + server: + nodeSelector: { tenantname: duploservices-qaibtest } + prometheus-pushgateway: + nodeSelector: { tenantname: duploservices-qaibtest } + +# ---------------------------------------------------------------------------- +# Grafana Configuration +# ---------------------------------------------------------------------------- +grafana: + enabled: true + nodeSelector: { tenantname: duploservices-qaibtest } + sidecar: + dashboards: + enabled: true + label: grafana_dashboard + labelValue: "1" + # Dashboards - all enabled services will have their dashboards loaded + dashboardsConfigMaps: + # -- FID/Product Dashboards + fid: "fid-dashboard" + zookeeper: "zookeeper-dashboard" + iddm: "iddm-dashboard" + elasticsearch: "audit-logs-elastic-dashboard" + service-status: "service-status-dashboard" + # -- Metrics Dashboards (Enabled services) + argo-cd: "argo-cd-dashboard" + elasticsearch-metrics: "elasticsearch-dashboard" + haproxy: "haproxy-dashboard" + postgresql: "postgresql-dashboard" + zookeeper-metrics: "zookeeper-metrics-dashboard" + fluent-bit: "fluent-bit-dashboard" + cloudnative-pg: "cloudnative-pg-dashboard" + kibana: "kibana-dashboard" + grafana: "grafana-metrics-dashboard" + alloy: "alloy-dashboard" + # -- Disabled Dashboards (services not enabled) + opensearch: null + velero: null + loki: null + flink: null + +# ---------------------------------------------------------------------------- +# Alloy - Log Collector (sends to Loki when enabled) +# ---------------------------------------------------------------------------- +alloy: + enabled: true + nodeSelector: { tenantname: duploservices-qaibtest } + +# ---------------------------------------------------------------------------- +# Services Disabled for this test +# ---------------------------------------------------------------------------- +loki: + enabled: false + +velero: + enabled: false + +opensearch: + enabled: false + +flink-kubernetes-operator: + enabled: false + +nebula-operator: + enabled: false