From 66fa45806d4a7644ba4fe609fb3cc1a78cc3ea98 Mon Sep 17 00:00:00 2001 From: Ankit Pati Date: Mon, 5 Jan 2026 22:04:44 +0530 Subject: [PATCH 01/23] feat: support Valkey Cluster with sharding Signed-off-by: Ankit Pati --- .gitignore | 4 +- valkey/scripts/cluster-init-script.sh | 157 ++++++++++ valkey/templates/_helpers.tpl | 53 ++++ valkey/templates/cluster-script.yaml | 11 + valkey/templates/cluster-statefulset.yaml | 309 +++++++++++++++++++ valkey/templates/deploy_valkey.yaml | 2 +- valkey/templates/init_config.yaml | 56 ++++ valkey/templates/pvc.yaml | 2 +- valkey/templates/service-headless.yaml | 8 +- valkey/templates/service.yaml | 7 + valkey/tests/cluster_test.yaml | 347 ++++++++++++++++++++++ valkey/tests/deployment_test.yaml | 36 +++ valkey/tests/pvc_test.yaml | 165 ++++++++++ valkey/tests/service_test.yaml | 55 ++++ valkey/values.yaml | 44 +++ 15 files changed, 1251 insertions(+), 5 deletions(-) create mode 100644 valkey/scripts/cluster-init-script.sh create mode 100644 valkey/templates/cluster-script.yaml create mode 100644 valkey/templates/cluster-statefulset.yaml create mode 100644 valkey/tests/cluster_test.yaml create mode 100644 valkey/tests/pvc_test.yaml diff --git a/.gitignore b/.gitignore index 92b40475..8faae3a7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ -*.sh *.lock dist/ .vscode -temp/ \ No newline at end of file +temp/ +*.tgz diff --git a/valkey/scripts/cluster-init-script.sh b/valkey/scripts/cluster-init-script.sh new file mode 100644 index 00000000..79e3514c --- /dev/null +++ b/valkey/scripts/cluster-init-script.sh @@ -0,0 +1,157 @@ +#!/bin/sh +set -e + +# --- Configuration & Initial Checks --- +if [ "${CLUSTER_NODE_COUNT}" -eq "1" ]; then + echo "Single node deployment. Skipping cluster initialization" + exit 0 +fi + +ORDINAL=$(echo "${POD_NAME}" | rev | cut -d'-' -f1 | rev) +REPLICAS_PER_SHARD=${CLUSTER_REPLICAS_PER_SHARD:-1} +PRIMARIES=$(( CLUSTER_NODE_COUNT / (1 + REPLICAS_PER_SHARD) )) + +{{- if and .Values.auth.enabled .Values.auth.aclUsers }} +AUTH_OPTION="-a $(cat /etc/valkey/users.acl | grep '^user {{ .Values.cluster.replicationUser }} ' | sed 's/.*#\([a-f0-9]*\).*/\1/' | head -1)" +# If we have the password from environment, use that instead +if [ -n "${VALKEY_AUTH_PASSWORD}" ]; then + AUTH_OPTION="-a ${VALKEY_AUTH_PASSWORD}" +fi +{{- else }} +AUTH_OPTION="" +{{- end }} + +{{- if .Values.tls.enabled }} +TLS_OPTION="--tls --cacert /tls/{{ .Values.tls.caPublicKey }}" +{{- else }} +TLS_OPTION="" +{{- end }} + +echo "Initializing as ordinal ${ORDINAL}. Total nodes: ${CLUSTER_NODE_COUNT}, Primaries: ${PRIMARIES}, Replicas per shard: ${REPLICAS_PER_SHARD}" + +HEADLESS_SVC="{{ include "valkey.headlessServiceName" . }}" +NAMESPACE="{{ .Release.Namespace }}" +CLUSTER_DOMAIN="{{ .Values.clusterDomain }}" +MY_IP=$(hostname -i) + +# Wait for the local Valkey server process to start +until valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h localhost -p {{ .Values.service.port }} ping 2>/dev/null | grep -q "PONG"; do + echo "Waiting for local Valkey to start..." + sleep 2 +done +echo "Local Valkey is ready at ${MY_IP}" + +# --- Discover Existing Cluster --- +HEALTHY_NODE="" +for i in $(seq 0 $((CLUSTER_NODE_COUNT - 1))); do + if [ "${i}" != "${ORDINAL}" ]; then + NODE_HOST="{{ include "valkey.fullname" . }}-${i}.${HEADLESS_SVC}.${NAMESPACE}.svc.${CLUSTER_DOMAIN}" + if valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${NODE_HOST}" -p {{ .Values.service.port }} cluster info 2>/dev/null | grep -q "cluster_state:ok"; then + HEALTHY_NODE="${NODE_HOST}" + echo "Found healthy cluster node: ${HEALTHY_NODE}" + break + fi + fi +done + +# --- Logic for Joining an Existing Cluster --- +if [ -n "${HEALTHY_NODE}" ]; then + echo "Healthy cluster found. Attempting to join..." + + # 1. Forget any old, failed instance of ourselves + FAILED_NODE_ID=$(valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${HEALTHY_NODE}" -p {{ .Values.service.port }} cluster nodes 2>/dev/null | grep "${MY_IP}:{{ .Values.service.port }}" | grep "fail" | awk '{print $1}' || echo "") + if [ -n "${FAILED_NODE_ID}" ]; then + echo "Found my IP (${MY_IP}) marked as failed with ID ${FAILED_NODE_ID}. Forgetting it..." + valkey-cli ${AUTH_OPTION} ${TLS_OPTION} --cluster call "${HEALTHY_NODE}:{{ .Values.service.port }}" cluster forget "${FAILED_NODE_ID}" > /dev/null 2>&1 || true + sleep 3 + fi + + # 2. Meet the cluster + HEALTHY_NODE_IP=$(getent hosts "${HEALTHY_NODE}" | awk '{print $1}') + echo "Sending CLUSTER MEET to ${HEALTHY_NODE} (${HEALTHY_NODE_IP})" + valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h localhost -p {{ .Values.service.port }} cluster meet "${HEALTHY_NODE_IP}" {{ .Values.service.port }} + sleep 5 + + # 3. Find an orphaned master and become its replica + echo "Searching for a master to replicate..." + + MY_NODE_ID=$(valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h localhost -p {{ .Values.service.port }} cluster myid) + echo "My Node ID is ${MY_NODE_ID}" + + # This prevents race conditions from the order of 'cluster nodes' output + TARGET_MASTER_ID=$(valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${HEALTHY_NODE}" -p {{ .Values.service.port }} cluster nodes | awk -v replicas_needed="${REPLICAS_PER_SHARD}" -v my_id="${MY_NODE_ID}" ' + # Pass 1: Build maps of masters and replica counts + /master/ && !/fail/ { masters[$1] = 1 } + /slave/ && !/fail/ { master_replicas[$4]++ } + END { + # Pass 2: Iterate over the masters we found + for (master_id in masters) { + # Check if it needs a replica AND it is not ourself + if ( master_id != my_id && (master_replicas[master_id] < replicas_needed || master_replicas[master_id] == "") ) { + print master_id + exit # Found a suitable master + } + } + } + ') + + if [ -n "${TARGET_MASTER_ID}" ]; then + echo "Found target master ${TARGET_MASTER_ID} that needs a replica." + echo "Sending CLUSTER REPLICATE command..." + + if valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h localhost -p {{ .Values.service.port }} cluster replicate "${TARGET_MASTER_ID}"; then + echo "Successfully configured as a replica for ${TARGET_MASTER_ID}." + else + echo "ERROR: Failed to replicate master ${TARGET_MASTER_ID}. Manual intervention required." + exit 1 + fi + else + echo "WARNING: Could not find a master that needs a replica. Staying as a master with no slots. Attempting rebalance..." + + # Wait for cluster propagation before rebalancing + PROPAGATION_ATTEMPTS=0 + MAX_PROPAGATION_ATTEMPTS=60 + while [ ${PROPAGATION_ATTEMPTS} -lt ${MAX_PROPAGATION_ATTEMPTS} ]; do + CLUSTER_STATE=$(valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h localhost -p {{ .Values.service.port }} cluster info 2>/dev/null | grep "cluster_state:" | cut -d: -f2 | tr -d '\r\n') + if [ "${CLUSTER_STATE}" = "ok" ]; then + echo "Cluster state is OK. Proceeding with rebalance." + break + fi + echo "Cluster state is ${CLUSTER_STATE}. Waiting for propagation... (${PROPAGATION_ATTEMPTS}/${MAX_PROPAGATION_ATTEMPTS})" + PROPAGATION_ATTEMPTS=$((PROPAGATION_ATTEMPTS + 1)) + sleep 5 + done + + valkey-cli ${AUTH_OPTION} ${TLS_OPTION} --cluster rebalance "${HEALTHY_NODE}:{{ .Values.service.port }}" --cluster-use-empty-masters --cluster-yes || true + fi + exit 0 +fi + +echo "No healthy cluster found. Proceeding with initial creation logic." +if [ "${ORDINAL}" = "0" ]; then + echo "This is the primary-0 node, creating a new cluster..." + NODES="" + for i in $(seq 0 $((CLUSTER_NODE_COUNT - 1))); do + NODE_HOST="{{ include "valkey.fullname" . }}-${i}.${HEADLESS_SVC}.${NAMESPACE}.svc.${CLUSTER_DOMAIN}" + until valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${NODE_HOST}" -p {{ .Values.service.port }} ping 2>/dev/null | grep -q "PONG"; do + echo "Waiting for ${NODE_HOST} to be ready..." + sleep 2 + done + NODES="${NODES} ${NODE_HOST}:{{ .Values.service.port }}" + done + sleep 10 + + echo "Creating cluster with nodes: ${NODES}" + echo "yes" | valkey-cli ${AUTH_OPTION} ${TLS_OPTION} --cluster create ${NODES} --cluster-replicas "${REPLICAS_PER_SHARD}" + echo "Cluster created successfully." +else + echo "Waiting for pod-0 to initialize the cluster..." + PRIMARY_HOST="{{ include "valkey.fullname" . }}-0.${HEADLESS_SVC}.${NAMESPACE}.svc.${CLUSTER_DOMAIN}" + until valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${PRIMARY_HOST}" -p {{ .Values.service.port }} cluster info 2>/dev/null | grep -q "cluster_state:ok"; do + echo "Waiting for cluster to be initialized by pod-0..." + sleep 5 + done + echo "Cluster is initialized. My role has been assigned by the creator." +fi + +exit 0 diff --git a/valkey/templates/_helpers.tpl b/valkey/templates/_helpers.tpl index 593cf77c..abbfd4d8 100644 --- a/valkey/templates/_helpers.tpl +++ b/valkey/templates/_helpers.tpl @@ -188,3 +188,56 @@ Validate replica authentication configuration {{- end }} {{- end -}} +{{/* +Validate cluster configuration +*/}} +{{- define "valkey.validateClusterConfig" -}} +{{- if .Values.cluster.enabled }} + {{- if .Values.replica.enabled }} + {{- fail "cluster.enabled and replica.enabled are mutually exclusive. Please enable only one mode." }} + {{- end }} + {{- if lt (int .Values.cluster.shards) 3 }} + {{- fail "Cluster mode requires at least 3 shards (cluster.shards >= 3) for proper cluster operation." }} + {{- end }} + {{- if not .Values.cluster.persistence.size }} + {{- fail "Cluster mode requires persistent storage. Please set cluster.persistence.size (e.g., '5Gi')" }} + {{- end }} +{{- end }} +{{- end -}} + +{{/* +Validate cluster authentication configuration +*/}} +{{- define "valkey.validateClusterAuth" -}} +{{- if and .Values.cluster.enabled .Values.auth.enabled }} + {{- if not (hasKey .Values.auth.aclUsers .Values.cluster.replicationUser) }} + {{- fail (printf "Cluster replication user '%s' (cluster.replicationUser) must be defined in auth.aclUsers. The chart requires this to retrieve the password for cluster authentication." .Values.cluster.replicationUser) }} + {{- end }} +{{- end }} +{{- end -}} + +{{/* +Calculate total number of nodes in the cluster +*/}} +{{- define "valkey.clusterNodeCount" -}} +{{- $shards := int .Values.cluster.shards -}} +{{- $replicasPerShard := int .Values.cluster.replicasPerShard -}} +{{- mul $shards (add 1 $replicasPerShard) -}} +{{- end -}} + +{{/* +Generate list of cluster nodes for VALKEY_NODES environment variable +*/}} +{{- define "valkey.clusterNodes" -}} +{{- $fullname := include "valkey.fullname" . -}} +{{- $headlessSvc := include "valkey.headlessServiceName" . -}} +{{- $namespace := .Release.Namespace -}} +{{- $clusterDomain := .Values.clusterDomain -}} +{{- $nodeCount := include "valkey.clusterNodeCount" . | int -}} +{{- $nodes := list -}} +{{- range $i := until $nodeCount -}} +{{- $nodes = append $nodes (printf "%s-%d.%s.%s.svc.%s" $fullname $i $headlessSvc $namespace $clusterDomain) -}} +{{- end -}} +{{- join " " $nodes -}} +{{- end -}} + diff --git a/valkey/templates/cluster-script.yaml b/valkey/templates/cluster-script.yaml new file mode 100644 index 00000000..6023fe75 --- /dev/null +++ b/valkey/templates/cluster-script.yaml @@ -0,0 +1,11 @@ +{{- if .Values.cluster.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "valkey.fullname" . }}-cluster-script + labels: + {{- include "valkey.labels" . | nindent 4 }} +data: + init-cluster.sh: |- +{{ tpl (.Files.Get "scripts/cluster-init-script.sh") . | indent 4 }} +{{- end }} diff --git a/valkey/templates/cluster-statefulset.yaml b/valkey/templates/cluster-statefulset.yaml new file mode 100644 index 00000000..30a94893 --- /dev/null +++ b/valkey/templates/cluster-statefulset.yaml @@ -0,0 +1,309 @@ +{{- if .Values.cluster.enabled }} +{{- include "valkey.validateAuthConfig" . }} +{{- include "valkey.validateClusterConfig" . }} +{{- include "valkey.validateClusterAuth" . }} +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ include "valkey.fullname" . }} + labels: + {{- include "valkey.labels" . | nindent 4 }} +spec: + serviceName: {{ include "valkey.fullname" . }}-headless + replicas: {{ include "valkey.clusterNodeCount" . }} + podManagementPolicy: Parallel + selector: + matchLabels: + {{- include "valkey.selectorLabels" . | nindent 6 }} + volumeClaimTemplates: + - metadata: + name: valkey-data + spec: + accessModes: {{ toYaml .Values.cluster.persistence.accessModes | nindent 8 }} + {{- if .Values.cluster.persistence.storageClass }} + storageClassName: {{ .Values.cluster.persistence.storageClass | quote }} + {{- end }} + resources: + requests: + storage: {{ .Values.cluster.persistence.size | quote }} + template: + metadata: + labels: + {{- include "valkey.selectorLabels" . | nindent 8 }} + {{- with .Values.commonLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + annotations: + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + checksum/initconfig: {{ include (print $.Template.BasePath "/init_config.yaml") . | sha256sum | trunc 32 | quote }} + {{- if .Values.valkeyConfig }} + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum | trunc 32 | quote }} + {{- end }} + spec: + {{- (include "valkey.imagePullSecrets" .) | nindent 6 }} + automountServiceAccountToken: {{ .Values.serviceAccount.automount }} + serviceAccountName: {{ include "valkey.serviceAccountName" . }} + {{- if .Values.priorityClassName }} + priorityClassName: {{ .Values.priorityClassName | quote }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + initContainers: + - name: {{ include "valkey.fullname" . }}-init + image: {{ include "valkey.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + command: [ "/scripts/init.sh" ] + env: + - name: POD_INDEX + valueFrom: + fieldRef: + fieldPath: metadata.labels['apps.kubernetes.io/pod-index'] + - name: CLUSTER_SHARDS + value: {{ .Values.cluster.shards | quote }} + - name: CLUSTER_REPLICAS_PER_SHARD + value: {{ .Values.cluster.replicasPerShard | quote }} + volumeMounts: + - name: valkey-data + mountPath: /data + - name: scripts + mountPath: /scripts + {{- if .Values.valkeyConfig }} + - name: valkey-config + mountPath: /usr/local/etc/valkey/valkey.conf + subPath: valkey.conf + {{- end }} + {{- if .Values.extraSecretValkeyConfigs }} + - name: extravalkeyconfigs-volume + mountPath: /extravalkeyconfigs + {{- end }} + {{- if .Values.auth.enabled }} + - name: valkey-acl + mountPath: /etc/valkey + {{- if .Values.auth.usersExistingSecret }} + - name: valkey-users-secret + mountPath: /valkey-users-secret + readOnly: true + {{- end }} + {{- if or (include "valkey.hasInlinePasswords" . | eq "true") .Values.auth.aclConfig }} + - name: valkey-auth-secret + mountPath: /valkey-auth-secret + readOnly: true + {{- end }} + {{- end }} + {{- with .Values.initResources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.extraInitContainers }} + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ include "valkey.fullname" . }} + image: {{ include "valkey.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: [ "/bin/sh", "-c" ] + args: + - | + /cluster-script/init-cluster.sh & + valkey-server /data/conf/valkey.conf + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + env: + - name: POD_INDEX + valueFrom: + fieldRef: + fieldPath: metadata.labels['apps.kubernetes.io/pod-index'] + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: VALKEY_NODES + value: {{ include "valkey.clusterNodes" . | quote }} + - name: CLUSTER_NODE_COUNT + value: {{ include "valkey.clusterNodeCount" . | quote }} + - name: CLUSTER_REPLICAS_PER_SHARD + value: {{ .Values.cluster.replicasPerShard | quote }} + {{- range $key, $val := .Values.env }} + - name: {{ $key }} + value: "{{ $val }}" + {{- end }} + - name: VALKEY_LOGLEVEL + value: "{{ .Values.valkeyLogLevel }}" + ports: + - name: tcp + containerPort: {{ .Values.service.port }} + protocol: TCP + - name: tcp-bus + containerPort: {{ .Values.cluster.busPort }} + protocol: TCP + startupProbe: + exec: + {{- if .Values.tls.enabled }} + command: [ "sh", "-c", "valkey-cli --cacert /tls/{{ .Values.tls.caPublicKey }} --tls ping" ] + {{- else }} + command: [ "sh", "-c", "valkey-cli ping" ] + {{- end }} + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 5 + failureThreshold: 30 + livenessProbe: + exec: + {{- if .Values.tls.enabled }} + command: [ "sh", "-c", "valkey-cli --cacert /tls/{{ .Values.tls.caPublicKey }} --tls ping" ] + {{- else }} + command: [ "sh", "-c", "valkey-cli ping" ] + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumeMounts: + - name: valkey-data + mountPath: /data + - name: cluster-script + mountPath: /cluster-script + {{- if .Values.tls.enabled }} + - name: {{ include "valkey.fullname" . }}-tls + mountPath: /tls + {{- end }} + {{- if .Values.auth.enabled }} + - name: valkey-acl + mountPath: /etc/valkey + {{- end }} + {{- range $secret := .Values.extraValkeySecrets }} + - name: {{ $secret.name }}-valkey + mountPath: {{ $secret.mountPath }} + {{- end }} + {{- range $config := .Values.extraValkeyConfigs }} + - name: {{ $config.name }}-valkey + mountPath: {{ $config.mountPath }} + {{- end }} + {{- if .Values.metrics.enabled }} + - name: metrics + image: {{ include "valkey.metrics.exporter.image" . }} + imagePullPolicy: {{ .Values.metrics.exporter.image.pullPolicy | quote }} + {{- with .Values.metrics.exporter.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.metrics.exporter.command }} + command: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.metrics.exporter.args }} + args: + {{- toYaml . | nindent 12 }} + {{- end }} + ports: + - name: metrics + containerPort: {{ .Values.metrics.exporter.port }} + startupProbe: + tcpSocket: + port: metrics + livenessProbe: + tcpSocket: + port: metrics + readinessProbe: + httpGet: + path: / + port: metrics + {{- with .Values.metrics.exporter.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.metrics.exporter.extraVolumeMounts }} + volumeMounts: + {{- toYaml . | nindent 12 }} + {{- end }} + env: + - name: REDIS_ALIAS + value: {{ include "valkey.fullname" . }} + {{- range $key, $val := .Values.metrics.exporter.extraEnvs }} + - name: {{ $key }} + value: "{{ $val }}" + {{- end }} + {{- end }} + volumes: + - name: scripts + configMap: + name: {{ include "valkey.fullname" . }}-init-scripts + defaultMode: 0555 + - name: cluster-script + configMap: + name: {{ include "valkey.fullname" . }}-cluster-script + defaultMode: 0555 + {{- if .Values.auth.enabled }} + - name: valkey-acl + emptyDir: + medium: Memory + {{- end }} + {{- if .Values.valkeyConfig }} + - name: valkey-config + configMap: + name: {{ include "valkey.fullname" . }}-config + {{- end }} + {{- range .Values.extraValkeySecrets }} + - name: {{ .name }}-valkey + secret: + secretName: {{ .name }} + defaultMode: {{ .defaultMode | default 0440 }} + {{- end }} + {{- if .Values.tls.enabled }} + - name: {{ include "valkey.fullname" . }}-tls + secret: + secretName: {{ required "An existing secret is required to enable TLS" .Values.tls.existingSecret }} + defaultMode: 0400 + {{- end }} + {{- range .Values.extraValkeyConfigs }} + - name: {{ .name }}-valkey + configMap: + name: {{ .name }} + defaultMode: {{ .defaultMode | default 0440 }} + {{- end }} + {{- if .Values.metrics.enabled }} + {{- range .Values.metrics.exporter.extraExporterSecrets }} + - name: {{ .name }}-exporter + secret: + secretName: {{ .name }} + defaultMode: {{ .defaultMode | default 0440 }} + {{- end }} + {{- end }} + {{- if .Values.auth.enabled }} + {{- if .Values.auth.usersExistingSecret }} + - name: valkey-users-secret + secret: + secretName: {{ .Values.auth.usersExistingSecret }} + defaultMode: 0400 + {{- end }} + {{- if or (include "valkey.hasInlinePasswords" . | eq "true") .Values.auth.aclConfig }} + - name: valkey-auth-secret + secret: + secretName: {{ include "valkey.fullname" . }}-auth + defaultMode: 0400 + {{- end }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.topologySpreadConstraints }} + topologySpreadConstraints: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/valkey/templates/deploy_valkey.yaml b/valkey/templates/deploy_valkey.yaml index 7bc9a5a8..19a96b7d 100644 --- a/valkey/templates/deploy_valkey.yaml +++ b/valkey/templates/deploy_valkey.yaml @@ -1,4 +1,4 @@ -{{- if not .Values.replica.enabled }} +{{- if not (or .Values.replica.enabled .Values.cluster.enabled) }} {{- $fullname := include "valkey.fullname" . }} {{- $storage := .Values.dataStorage }} {{- $createPVC := and $storage.enabled (not (empty $storage.requestedSize)) (empty $storage.persistentVolumeClaimName) }} diff --git a/valkey/templates/init_config.yaml b/valkey/templates/init_config.yaml index 9b0337e5..654e156d 100644 --- a/valkey/templates/init_config.yaml +++ b/valkey/templates/init_config.yaml @@ -219,6 +219,62 @@ data: {{- end }} {{- end }} + {{- if .Values.cluster.enabled }} + # Cluster mode configuration + log "Configuring cluster mode" + + # Use POD_INDEX from Kubernetes metadata + POD_INDEX=${POD_INDEX:-0} + + # Configure cluster-enabled settings + { + echo "" + echo "# Cluster Configuration" + echo "cluster-enabled yes" + echo "cluster-config-file /data/nodes.conf" + echo "cluster-node-timeout {{ .Values.cluster.nodeTimeout }}" + {{- if not .Values.cluster.requireFullCoverage }} + echo "cluster-require-full-coverage no" + {{- end }} + {{- if .Values.cluster.allowReadsWhenDown }} + echo "cluster-allow-reads-when-down yes" + {{- end }} + echo "" + echo "# Cluster node announcement" + echo "cluster-announce-hostname {{ include "valkey.fullname" . }}-$POD_INDEX.{{ include "valkey.headlessServiceName" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}" + echo "cluster-announce-port {{ .Values.service.port }}" + echo "cluster-announce-bus-port {{ .Values.cluster.busPort }}" + echo "cluster-preferred-endpoint-type hostname" + } >>"$VALKEY_CONFIG" + + log "Cluster node $POD_INDEX configured with announce IP" + + {{- if .Values.auth.enabled }} + # Configure cluster authentication + {{- $replUsername := .Values.cluster.replicationUser }} + REPL_PASSWORD=$(get_user_password "{{ $replUsername }}") || exit 1 + + { + echo "" + echo "# Cluster authentication" + echo "masterauth $REPL_PASSWORD" + echo "masteruser {{ $replUsername }}" + } >>"$VALKEY_CONFIG" + log "Configured cluster authentication with user {{ $replUsername }}" + {{- end }} + + {{- if .Values.tls.enabled }} + # TLS for cluster + { + echo "" + echo "# TLS for cluster" + echo "tls-replication yes" + echo "tls-cluster yes" + } >>"$VALKEY_CONFIG" + log "Enabled TLS for cluster communication" + {{- end }} + {{- end }} + # Append extra configs if present if [ -f /usr/local/etc/valkey/valkey.conf ]; then log "Appending /usr/local/etc/valkey/valkey.conf" diff --git a/valkey/templates/pvc.yaml b/valkey/templates/pvc.yaml index aa20859b..9f25edf8 100644 --- a/valkey/templates/pvc.yaml +++ b/valkey/templates/pvc.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.dataStorage.enabled (not .Values.replica.enabled) (not (empty .Values.dataStorage.requestedSize)) (empty .Values.dataStorage.persistentVolumeClaimName) }} +{{- if and .Values.dataStorage.enabled (not .Values.replica.enabled) (not .Values.cluster.enabled) (not (empty .Values.dataStorage.requestedSize)) (empty .Values.dataStorage.persistentVolumeClaimName) }} apiVersion: v1 kind: PersistentVolumeClaim metadata: diff --git a/valkey/templates/service-headless.yaml b/valkey/templates/service-headless.yaml index 733ca683..796ccd90 100644 --- a/valkey/templates/service-headless.yaml +++ b/valkey/templates/service-headless.yaml @@ -1,4 +1,4 @@ -{{- if .Values.replica.enabled }} +{{- if or .Values.replica.enabled .Values.cluster.enabled }} apiVersion: v1 kind: Service metadata: @@ -15,6 +15,12 @@ spec: port: {{ .Values.service.port }} targetPort: tcp protocol: TCP + {{- if .Values.cluster.enabled }} + - name: tcp-bus + port: {{ .Values.cluster.busPort }} + targetPort: tcp-bus + protocol: TCP + {{- end }} selector: {{- include "valkey.selectorLabels" . | nindent 4 }} {{- end }} diff --git a/valkey/templates/service.yaml b/valkey/templates/service.yaml index 1e786826..dbfc38fb 100644 --- a/valkey/templates/service.yaml +++ b/valkey/templates/service.yaml @@ -31,8 +31,15 @@ spec: {{- if .Values.service.appProtocol }} appProtocol: {{ .Values.service.appProtocol }} {{- end }} + {{- if .Values.cluster.enabled }} + - port: {{ .Values.cluster.busPort }} + targetPort: tcp-bus + protocol: TCP + name: tcp-bus + {{- end }} selector: {{- include "valkey.selectorLabels" . | nindent 4 }} {{- if .Values.replica.enabled }} statefulset.kubernetes.io/pod-name: {{ include "valkey.fullname" . }}-0 {{- end }} + {{- /* In cluster mode, the service routes to all nodes; clients handle redirections */}} diff --git a/valkey/tests/cluster_test.yaml b/valkey/tests/cluster_test.yaml new file mode 100644 index 00000000..72e7bfde --- /dev/null +++ b/valkey/tests/cluster_test.yaml @@ -0,0 +1,347 @@ +suite: cluster configuration +templates: + - templates/cluster-statefulset.yaml + - templates/cluster-script.yaml + - templates/service-headless.yaml + - templates/service.yaml + - templates/init_config.yaml +tests: + # Validation tests + - it: should fail when cluster enabled but no persistence size provided + set: + cluster.enabled: true + cluster.persistence.size: "" + template: templates/cluster-statefulset.yaml + asserts: + - failedTemplate: + errorPattern: "Cluster mode requires persistent storage.*" + + - it: should fail when cluster enabled with less than 3 shards + set: + cluster.enabled: true + cluster.shards: 2 + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - failedTemplate: + errorPattern: "Cluster mode requires at least 3 shards.*" + + - it: should fail when both cluster and replica are enabled + set: + cluster.enabled: true + replica.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - failedTemplate: + errorPattern: "cluster.enabled and replica.enabled are mutually exclusive.*" + + # StatefulSet tests + - it: should create StatefulSet when cluster is enabled + set: + cluster.enabled: true + cluster.shards: 3 + cluster.replicasPerShard: 1 + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - isKind: + of: StatefulSet + - equal: + path: spec.replicas + value: 6 # 3 shards * (1 + 1 replica) = 6 nodes + + - it: should create StatefulSet with 3 shards and 0 replicas (3 nodes total) + set: + cluster.enabled: true + cluster.shards: 3 + cluster.replicasPerShard: 0 + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - isKind: + of: StatefulSet + - equal: + path: spec.replicas + value: 3 + + - it: should create StatefulSet with 5 shards and 2 replicas (15 nodes total) + set: + cluster.enabled: true + cluster.shards: 5 + cluster.replicasPerShard: 2 + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - isKind: + of: StatefulSet + - equal: + path: spec.replicas + value: 15 # 5 shards * (1 + 2 replicas) = 15 nodes + + - it: should use Parallel pod management policy for cluster mode + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - equal: + path: spec.podManagementPolicy + value: Parallel + + - it: should configure PVC with correct storage settings + set: + cluster.enabled: true + cluster.persistence.size: "10Gi" + cluster.persistence.storageClass: "fast-ssd" + template: templates/cluster-statefulset.yaml + asserts: + - equal: + path: spec.volumeClaimTemplates[0].spec.resources.requests.storage + value: "10Gi" + - equal: + path: spec.volumeClaimTemplates[0].spec.storageClassName + value: "fast-ssd" + + - it: should expose both tcp and tcp-bus ports in cluster mode + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.busPort: 16379 + template: templates/cluster-statefulset.yaml + asserts: + - contains: + path: spec.template.spec.containers[0].ports + content: + name: tcp + containerPort: 6379 + protocol: TCP + - contains: + path: spec.template.spec.containers[0].ports + content: + name: tcp-bus + containerPort: 16379 + protocol: TCP + + # Init container tests + - it: should have init container with cluster environment variables + set: + cluster.enabled: true + cluster.shards: 4 + cluster.replicasPerShard: 2 + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - contains: + path: spec.template.spec.initContainers[0].env + content: + name: CLUSTER_SHARDS + value: "4" + - contains: + path: spec.template.spec.initContainers[0].env + content: + name: CLUSTER_REPLICAS_PER_SHARD + value: "2" + + # Service headless tests + - it: should create headless service with bus port in cluster mode + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.busPort: 16379 + template: templates/service-headless.yaml + asserts: + - isKind: + of: Service + - equal: + path: spec.clusterIP + value: None + - contains: + path: spec.ports + content: + name: tcp + port: 6379 + targetPort: tcp + protocol: TCP + - contains: + path: spec.ports + content: + name: tcp-bus + port: 16379 + targetPort: tcp-bus + protocol: TCP + + # Main service tests + - it: should create service with bus port in cluster mode + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.busPort: 16379 + template: templates/service.yaml + asserts: + - isKind: + of: Service + - contains: + path: spec.ports + content: + name: tcp-bus + port: 16379 + targetPort: tcp-bus + protocol: TCP + + # Cluster init script tests + - it: should create cluster-script ConfigMap when cluster is enabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-script.yaml + asserts: + - isKind: + of: ConfigMap + - equal: + path: metadata.name + value: RELEASE-NAME-valkey-cluster-script + + - it: cluster-script ConfigMap should contain init-cluster.sh + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-script.yaml + asserts: + - isNotNull: + path: data["init-cluster.sh"] + - matchRegex: + path: data["init-cluster.sh"] + pattern: "CLUSTER MEET" + + - it: cluster-script should contain cluster create logic + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-script.yaml + asserts: + - matchRegex: + path: data["init-cluster.sh"] + pattern: "--cluster create" + + - it: should run cluster init script as background process + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - matchRegex: + path: spec.template.spec.containers[0].args[0] + pattern: "/cluster-script/init-cluster.sh &" + + - it: should mount cluster-script volume in container + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: cluster-script + mountPath: /cluster-script + + - it: should define cluster-script volume + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - contains: + path: spec.template.spec.volumes + content: + name: cluster-script + configMap: + name: RELEASE-NAME-valkey-cluster-script + defaultMode: 365 + + # Authentication tests + - it: should fail when cluster auth enabled but replication user not in aclUsers + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + cluster.replicationUser: "clusteruser" + auth.aclUsers: + default: + password: "test" + permissions: "~* &* +@all" + template: templates/cluster-statefulset.yaml + asserts: + - failedTemplate: + errorPattern: "Cluster replication user 'clusteruser'.*must be defined in auth.aclUsers.*" + + - it: should succeed when cluster auth is properly configured + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + cluster.replicationUser: "default" + auth.aclUsers: + default: + password: "testpass" + permissions: "~* &* +@all" + template: templates/cluster-statefulset.yaml + asserts: + - isKind: + of: StatefulSet + + # TLS tests + - it: should configure TLS volume mount in cluster mode + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + tls.enabled: true + tls.existingSecret: "valkey-tls-secret" + template: templates/cluster-statefulset.yaml + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: RELEASE-NAME-valkey-tls + mountPath: /tls + + # Init config tests (cluster mode config generation) + - it: should generate cluster config in init script + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.nodeTimeout: 20000 + template: templates/init_config.yaml + asserts: + - matchRegex: + path: data["init.sh"] + pattern: "cluster-enabled yes" + - matchRegex: + path: data["init.sh"] + pattern: "cluster-config-file /data/nodes.conf" + - matchRegex: + path: data["init.sh"] + pattern: "cluster-node-timeout 20000" + + - it: should configure cluster-require-full-coverage when disabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.requireFullCoverage: false + template: templates/init_config.yaml + asserts: + - matchRegex: + path: data["init.sh"] + pattern: "cluster-require-full-coverage no" + + - it: should configure cluster-allow-reads-when-down when enabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.allowReadsWhenDown: true + template: templates/init_config.yaml + asserts: + - matchRegex: + path: data["init.sh"] + pattern: "cluster-allow-reads-when-down yes" diff --git a/valkey/tests/deployment_test.yaml b/valkey/tests/deployment_test.yaml index 28c2653d..27aaa72c 100644 --- a/valkey/tests/deployment_test.yaml +++ b/valkey/tests/deployment_test.yaml @@ -3,6 +3,42 @@ templates: - templates/deploy_valkey.yaml - templates/init_config.yaml tests: + - it: should not create Deployment when replica.enabled is true + set: + replica.enabled: true + template: templates/deploy_valkey.yaml + asserts: + - hasDocuments: + count: 0 + + - it: should not create Deployment when cluster.enabled is true + set: + cluster.enabled: true + template: templates/deploy_valkey.yaml + asserts: + - hasDocuments: + count: 0 + + - it: should not create Deployment when both replica.enabled and cluster.enabled are true + set: + replica.enabled: true + cluster.enabled: true + template: templates/deploy_valkey.yaml + asserts: + - hasDocuments: + count: 0 + + - it: should create Deployment when both replica.enabled and cluster.enabled are false + set: + replica.enabled: false + cluster.enabled: false + template: templates/deploy_valkey.yaml + asserts: + - hasDocuments: + count: 1 + - isKind: + of: Deployment + - it: should not have auth volumes when auth disabled set: auth.enabled: false diff --git a/valkey/tests/pvc_test.yaml b/valkey/tests/pvc_test.yaml new file mode 100644 index 00000000..003939f8 --- /dev/null +++ b/valkey/tests/pvc_test.yaml @@ -0,0 +1,165 @@ +suite: pvc configuration +templates: + - templates/pvc.yaml +tests: + - it: should not create PVC when replica.enabled is true + set: + dataStorage.enabled: true + dataStorage.requestedSize: "8Gi" + replica.enabled: true + asserts: + - hasDocuments: + count: 0 + + - it: should not create PVC when cluster.enabled is true + set: + dataStorage.enabled: true + dataStorage.requestedSize: "8Gi" + cluster.enabled: true + asserts: + - hasDocuments: + count: 0 + + - it: should not create PVC when both replica.enabled and cluster.enabled are true + set: + dataStorage.enabled: true + dataStorage.requestedSize: "8Gi" + replica.enabled: true + cluster.enabled: true + asserts: + - hasDocuments: + count: 0 + + - it: should create PVC when both replica.enabled and cluster.enabled are false and conditions are met + set: + replica.enabled: false + cluster.enabled: false + dataStorage.enabled: true + dataStorage.requestedSize: "8Gi" + dataStorage.persistentVolumeClaimName: "" + asserts: + - hasDocuments: + count: 1 + - isKind: + of: PersistentVolumeClaim + + - it: should not create PVC when dataStorage.enabled is false + set: + dataStorage.enabled: false + replica.enabled: false + cluster.enabled: false + asserts: + - hasDocuments: + count: 0 + + - it: should not create PVC when dataStorage.requestedSize is empty + set: + dataStorage.enabled: true + dataStorage.requestedSize: "" + replica.enabled: false + cluster.enabled: false + asserts: + - hasDocuments: + count: 0 + + - it: should not create PVC when dataStorage.persistentVolumeClaimName is set + set: + dataStorage.enabled: true + dataStorage.requestedSize: "8Gi" + dataStorage.persistentVolumeClaimName: "existing-pvc" + replica.enabled: false + cluster.enabled: false + asserts: + - hasDocuments: + count: 0 + + - it: should have correct storage size + set: + dataStorage.enabled: true + dataStorage.requestedSize: "16Gi" + replica.enabled: false + cluster.enabled: false + asserts: + - isKind: + of: PersistentVolumeClaim + - equal: + path: spec.resources.requests.storage + value: "16Gi" + + - it: should have keepPvc annotation when enabled + set: + dataStorage.enabled: true + dataStorage.requestedSize: "8Gi" + dataStorage.keepPvc: true + replica.enabled: false + cluster.enabled: false + asserts: + - isKind: + of: PersistentVolumeClaim + - equal: + path: metadata.annotations["helm.sh/resource-policy"] + value: keep + + - it: should have custom storage class when specified + set: + dataStorage.enabled: true + dataStorage.requestedSize: "8Gi" + dataStorage.className: "fast-ssd" + replica.enabled: false + cluster.enabled: false + asserts: + - isKind: + of: PersistentVolumeClaim + - equal: + path: spec.storageClassName + value: fast-ssd + + - it: should have custom labels when specified + set: + dataStorage.enabled: true + dataStorage.requestedSize: "8Gi" + dataStorage.labels: + custom.label: "value" + another.label: "test" + replica.enabled: false + cluster.enabled: false + asserts: + - isKind: + of: PersistentVolumeClaim + - equal: + path: metadata.labels["custom.label"] + value: value + - equal: + path: metadata.labels["another.label"] + value: test + + - it: should have custom annotations when specified + set: + dataStorage.enabled: true + dataStorage.requestedSize: "8Gi" + dataStorage.annotations: + custom.annotation: "value" + replica.enabled: false + cluster.enabled: false + asserts: + - isKind: + of: PersistentVolumeClaim + - equal: + path: metadata.annotations["custom.annotation"] + value: value + + - it: should have correct access modes + set: + dataStorage.enabled: true + dataStorage.requestedSize: "8Gi" + dataStorage.accessModes: + - ReadWriteOnce + replica.enabled: false + cluster.enabled: false + asserts: + - isKind: + of: PersistentVolumeClaim + - equal: + path: spec.accessModes + value: + - ReadWriteOnce diff --git a/valkey/tests/service_test.yaml b/valkey/tests/service_test.yaml index 115c137a..d7233c41 100644 --- a/valkey/tests/service_test.yaml +++ b/valkey/tests/service_test.yaml @@ -86,3 +86,58 @@ tests: content: app.kubernetes.io/instance: RELEASE-NAME app.kubernetes.io/name: valkey + - it: should pin to pod-0 when replica.enabled is true + set: + replica.enabled: true + template: templates/service.yaml + asserts: + - isKind: + of: Service + - equal: + path: spec.selector["statefulset.kubernetes.io/pod-name"] + value: RELEASE-NAME-valkey-0 + - it: should not pin to pod-0 when cluster.enabled is true + set: + cluster.enabled: true + template: templates/service.yaml + asserts: + - isKind: + of: Service + - notExists: + path: spec.selector["statefulset.kubernetes.io/pod-name"] + - it: should not pin to pod-0 when both replica.enabled and cluster.enabled are false + set: + replica.enabled: false + cluster.enabled: false + template: templates/service.yaml + asserts: + - isKind: + of: Service + - notExists: + path: spec.selector["statefulset.kubernetes.io/pod-name"] + - it: should have cluster bus port when cluster.enabled is true + set: + cluster.enabled: true + cluster.busPort: 16379 + template: templates/service.yaml + asserts: + - isKind: + of: Service + - contains: + path: spec.ports + content: + port: 16379 + targetPort: tcp-bus + protocol: TCP + name: tcp-bus + - it: should not have cluster bus port when cluster.enabled is false + set: + cluster.enabled: false + template: templates/service.yaml + asserts: + - isKind: + of: Service + - notContains: + path: spec.ports + content: + name: tcp-bus diff --git a/valkey/values.yaml b/valkey/values.yaml index 20e00b62..a4256620 100644 --- a/valkey/values.yaml +++ b/valkey/values.yaml @@ -264,6 +264,50 @@ replica: # More info: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#persistentvolumeclaim-retention persistentVolumeClaimRetentionPolicy: {} +# Cluster mode configuration for Valkey Cluster (sharded deployment) +# Note: cluster.enabled and replica.enabled are mutually exclusive +cluster: + # Enable cluster mode (creates a sharded Valkey cluster) + enabled: false + + # Number of shards (primary nodes). Minimum recommended is 3 for cluster mode. + # Each shard handles a portion of the hash slot range (16384 slots total). + shards: 3 + + # Number of replicas per shard (for high availability within each shard) + # Total nodes = shards × (1 + replicasPerShard) + # For example: 3 shards with 1 replica each = 6 nodes total + replicasPerShard: 1 + + # Username for cluster replication authentication, ignored if auth.enabled is false. + # IMPORTANT: When auth.enabled is true, this user MUST be defined in auth.aclUsers. + # The user must have appropriate replication permissions: +psync +replconf +ping + replicationUser: "default" + + # Cluster node timeout in milliseconds (how long before a node is considered failed) + nodeTimeout: 15000 + + # Require all hash slots to be covered for the cluster to accept writes + # Set to false to allow partial cluster operation + requireFullCoverage: true + + # Allow cluster to serve read requests when in down state + allowReadsWhenDown: false + + # Persistence configuration (required for cluster mode) + persistence: + # Size of the PVC for each node (required when cluster.enabled is true) + size: "" + # Storage class name (empty = use default storage class) + storageClass: "" + # Access modes for the PVC + accessModes: + - ReadWriteOnce + + # Bus port for cluster communication (default: service.port + 10000) + # This port is used for node-to-node communication in the cluster + busPort: 16379 + tls: # Enable TLS enabled: false From 2716c39d451be3b4f4b1e0119d097863fa561b70 Mon Sep 17 00:00:00 2001 From: Ankit Pati Date: Thu, 15 Jan 2026 17:26:52 +0530 Subject: [PATCH 02/23] fix: auth for Valkey cluster Signed-off-by: Ankit Pati --- valkey/scripts/cluster-init-script.sh | 25 +++- valkey/templates/cluster-statefulset.yaml | 10 ++ valkey/tests/cluster_test.yaml | 150 ++++++++++++++++++++++ 3 files changed, 181 insertions(+), 4 deletions(-) diff --git a/valkey/scripts/cluster-init-script.sh b/valkey/scripts/cluster-init-script.sh index 79e3514c..5925f539 100644 --- a/valkey/scripts/cluster-init-script.sh +++ b/valkey/scripts/cluster-init-script.sh @@ -12,12 +12,29 @@ REPLICAS_PER_SHARD=${CLUSTER_REPLICAS_PER_SHARD:-1} PRIMARIES=$(( CLUSTER_NODE_COUNT / (1 + REPLICAS_PER_SHARD) )) {{- if and .Values.auth.enabled .Values.auth.aclUsers }} -AUTH_OPTION="-a $(cat /etc/valkey/users.acl | grep '^user {{ .Values.cluster.replicationUser }} ' | sed 's/.*#\([a-f0-9]*\).*/\1/' | head -1)" -# If we have the password from environment, use that instead -if [ -n "${VALKEY_AUTH_PASSWORD}" ]; then - AUTH_OPTION="-a ${VALKEY_AUTH_PASSWORD}" +# Get password for cluster replication user from mounted secret +{{- $replUsername := .Values.cluster.replicationUser }} +{{- $replUser := index .Values.auth.aclUsers $replUsername }} +{{- $replPasswordKey := $replUser.passwordKey | default $replUsername }} +{{- if .Values.auth.usersExistingSecret }} +if [ -f "/valkey-users-secret/{{ $replPasswordKey }}" ]; then + AUTH_PASSWORD=$(cat "/valkey-users-secret/{{ $replPasswordKey }}") +elif [ -f "/valkey-auth-secret/{{ $replUsername }}-password" ]; then + AUTH_PASSWORD=$(cat "/valkey-auth-secret/{{ $replUsername }}-password") +else + echo "ERROR: No password found for cluster replication user {{ $replUsername }}" + exit 1 fi {{- else }} +if [ -f "/valkey-auth-secret/{{ $replUsername }}-password" ]; then + AUTH_PASSWORD=$(cat "/valkey-auth-secret/{{ $replUsername }}-password") +else + echo "ERROR: No password found for cluster replication user {{ $replUsername }}" + exit 1 +fi +{{- end }} +AUTH_OPTION="-a ${AUTH_PASSWORD}" +{{- else }} AUTH_OPTION="" {{- end }} diff --git a/valkey/templates/cluster-statefulset.yaml b/valkey/templates/cluster-statefulset.yaml index 30a94893..b6ca393b 100644 --- a/valkey/templates/cluster-statefulset.yaml +++ b/valkey/templates/cluster-statefulset.yaml @@ -177,6 +177,16 @@ spec: {{- if .Values.auth.enabled }} - name: valkey-acl mountPath: /etc/valkey + {{- if .Values.auth.usersExistingSecret }} + - name: valkey-users-secret + mountPath: /valkey-users-secret + readOnly: true + {{- end }} + {{- if or (include "valkey.hasInlinePasswords" . | eq "true") .Values.auth.aclConfig }} + - name: valkey-auth-secret + mountPath: /valkey-auth-secret + readOnly: true + {{- end }} {{- end }} {{- range $secret := .Values.extraValkeySecrets }} - name: {{ $secret.name }}-valkey diff --git a/valkey/tests/cluster_test.yaml b/valkey/tests/cluster_test.yaml index 72e7bfde..e2c70c21 100644 --- a/valkey/tests/cluster_test.yaml +++ b/valkey/tests/cluster_test.yaml @@ -345,3 +345,153 @@ tests: - matchRegex: path: data["init.sh"] pattern: "cluster-allow-reads-when-down yes" + + # Cluster auth secret mount tests (bug fix: ensure main container has access to plaintext password) + - it: should mount valkey-users-secret to main container when auth.usersExistingSecret is set + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + auth.usersExistingSecret: "my-valkey-users" + auth.aclUsers: + default: + permissions: "~* &* +@all" + template: templates/cluster-statefulset.yaml + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: valkey-users-secret + mountPath: /valkey-users-secret + readOnly: true + + - it: should mount valkey-auth-secret to main container when inline passwords are used + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + auth.aclUsers: + default: + password: "testpass" + permissions: "~* &* +@all" + template: templates/cluster-statefulset.yaml + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: valkey-auth-secret + mountPath: /valkey-auth-secret + readOnly: true + + - it: should mount both auth secrets to main container when both are configured + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + auth.usersExistingSecret: "my-valkey-users" + auth.aclUsers: + default: + permissions: "~* &* +@all" + password: "fallback" + template: templates/cluster-statefulset.yaml + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: valkey-users-secret + mountPath: /valkey-users-secret + readOnly: true + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: valkey-auth-secret + mountPath: /valkey-auth-secret + readOnly: true + + # Cluster init script password retrieval tests (bug fix: read from secret, not ACL hash) + - it: cluster-script should read password from valkey-users-secret when usersExistingSecret is set + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + auth.usersExistingSecret: "my-valkey-users" + auth.aclUsers: + default: + permissions: "~* &* +@all" + template: templates/cluster-script.yaml + asserts: + - matchRegex: + path: data["init-cluster.sh"] + pattern: '/valkey-users-secret/' + - notMatchRegex: + path: data["init-cluster.sh"] + pattern: '/etc/valkey/users.acl' + + - it: cluster-script should read password from valkey-auth-secret when inline passwords are used + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + auth.aclUsers: + default: + password: "testpass" + permissions: "~* &* +@all" + template: templates/cluster-script.yaml + asserts: + - matchRegex: + path: data["init-cluster.sh"] + pattern: '/valkey-auth-secret/default-password' + - notMatchRegex: + path: data["init-cluster.sh"] + pattern: '/etc/valkey/users.acl' + + - it: cluster-script should use custom passwordKey when configured + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + auth.usersExistingSecret: "my-valkey-users" + auth.aclUsers: + default: + permissions: "~* &* +@all" + passwordKey: "default-pwd" + template: templates/cluster-script.yaml + asserts: + - matchRegex: + path: data["init-cluster.sh"] + pattern: '/valkey-users-secret/default-pwd' + + - it: cluster-script should use custom replicationUser for auth + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.replicationUser: "clusteruser" + auth.enabled: true + auth.aclUsers: + default: + password: "defaultpass" + permissions: "~* &* +@all" + clusteruser: + password: "clusterpass" + permissions: "~* &* +@all" + template: templates/cluster-script.yaml + asserts: + - matchRegex: + path: data["init-cluster.sh"] + pattern: '/valkey-auth-secret/clusteruser-password' + + - it: cluster-script should NOT parse password hash from ACL file + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + auth.aclUsers: + default: + password: "testpass" + permissions: "~* &* +@all" + template: templates/cluster-script.yaml + asserts: + # Ensure we don't try to extract the hash from the ACL file + - notMatchRegex: + path: data["init-cluster.sh"] + pattern: 'grep.*users\.acl' From a7473926b144289a56e92391ec868b447938209f Mon Sep 17 00:00:00 2001 From: Ankit Pati Date: Wed, 21 Jan 2026 12:18:12 +0530 Subject: [PATCH 03/23] docs: update `README.md`s & `NOTES.txt` for cluster mode Signed-off-by: Ankit Pati --- valkey/README.md | 108 +++++++++++++++++++++++++++++++++++++ valkey/templates/NOTES.txt | 69 +++++++++++++++++++++++- 2 files changed, 175 insertions(+), 2 deletions(-) diff --git a/valkey/README.md b/valkey/README.md index d069809a..7fea2332 100644 --- a/valkey/README.md +++ b/valkey/README.md @@ -58,6 +58,60 @@ replica: If fewer than `minReplicasToWrite` replicas are available, the master will reject write operations. +### Cluster Mode + +Deploy a sharded Valkey cluster for horizontal scaling and high availability: + +```bash +helm install valkey valkey/valkey --set cluster.enabled=true --set cluster.persistence.size=5Gi +``` + +**Architecture:** + +* Data is automatically sharded across multiple primary nodes (16384 hash slots distributed across shards) +* Each shard can have replicas for high availability within the shard +* Total nodes = `shards` × (1 + `replicasPerShard`) + +**Default Configuration (6 nodes):** + +```yaml +cluster: + enabled: true + shards: 3 # Minimum 3 shards required + replicasPerShard: 1 # 1 replica per shard + persistence: + size: 5Gi # Required +``` + +This creates 6 nodes: 3 primary shards + 3 replicas. + +**High Availability Configuration (15 nodes):** + +```yaml +cluster: + enabled: true + shards: 5 # 5 primary shards + replicasPerShard: 2 # 2 replicas per shard for extra redundancy + persistence: + size: 10Gi + storageClass: "fast-ssd" +``` + +**Services:** + +* `valkey`: Main service for client connections (routes to all nodes) +* `valkey-headless`: Headless service for pod discovery and cluster communication + +**Cluster Configuration Options:** + +```yaml +cluster: + nodeTimeout: 15000 # Milliseconds before a node is considered failed + requireFullCoverage: true # Require all hash slots covered to accept writes + allowReadsWhenDown: false # Allow reads when cluster is in down state + busPort: 16379 # Port for inter-node cluster communication +``` + ## Storage ### Standalone Storage @@ -93,6 +147,20 @@ replica: storageClass: "fast-ssd" # Optional ``` +### Cluster Storage + +Persistent storage is **mandatory** in cluster mode. Each node in the cluster maintains its own data partition and cluster state configuration. + +```yaml +cluster: + enabled: true + persistence: + size: 10Gi # Required + storageClass: "fast-ssd" # Optional + accessModes: + - ReadWriteOnce +``` + ## Authentication This chart supports ACL-based authentication for Valkey. @@ -174,6 +242,35 @@ replica: * This user MUST be defined in `auth.aclUsers` with appropriate permissions * Minimum permissions: `+psync +replconf +ping` +### Cluster with Authentication + +When using ACL authentication in cluster mode, nodes need credentials to authenticate with each other for cluster operations: + +```yaml +auth: + enabled: true + usersExistingSecret: "my-valkey-users" + aclUsers: + default: + permissions: "~* &* +@all" + cluster-user: + permissions: "+psync +replconf +ping" + +cluster: + enabled: true + shards: 3 + replicasPerShard: 1 + replicationUser: "cluster-user" # Must be defined in auth.aclUsers + persistence: + size: 5Gi +``` + +**Important Notes:** + +* `cluster.replicationUser` specifies which ACL user cluster nodes use to authenticate +* This user MUST be defined in `auth.aclUsers` with appropriate permissions +* Minimum permissions: `+psync +replconf +ping` + ## Metrics This chart supports Prometheus metrics collection using the [Redis exporter](https://github.com/oliver006/redis_exporter). @@ -349,6 +446,17 @@ tls: | replica.persistence.size | string | `""` | Required if replica is enabled | | replica.persistence.storageClass | string | `""` | | | replica.persistence.accessModes | list | `""` | | +| cluster.enabled | bool | `false` | Enable cluster mode (mutually exclusive with replica.enabled) | +| cluster.shards | int | `3` | Number of primary shards (minimum 3) | +| cluster.replicasPerShard | int | `1` | Number of replicas per shard | +| cluster.replicationUser | string | `"default"` | ACL user for cluster authentication (must be in auth.aclUsers) | +| cluster.nodeTimeout | int | `15000` | Milliseconds before node is considered failed | +| cluster.requireFullCoverage | bool | `true` | Require all slots covered to accept writes | +| cluster.allowReadsWhenDown | bool | `false` | Allow reads when cluster is down | +| cluster.busPort | int | `16379` | Port for inter-node cluster communication | +| cluster.persistence.size | string | `""` | Required if cluster is enabled | +| cluster.persistence.storageClass | string | `""` | | +| cluster.persistence.accessModes | list | `["ReadWriteOnce"]` | | | resources | object | `{}` | | | securityContext.capabilities.drop[0] | string | `"ALL"` | | | securityContext.readOnlyRootFilesystem | bool | `true` | | diff --git a/valkey/templates/NOTES.txt b/valkey/templates/NOTES.txt index 07ddb6dd..e59b325b 100644 --- a/valkey/templates/NOTES.txt +++ b/valkey/templates/NOTES.txt @@ -10,7 +10,56 @@ Namespace: {{ .Release.Namespace }} Chart: {{ .Chart.Name }} {{ .Chart.Version }} App version: {{ .Chart.AppVersion }} -{{- if .Values.replica.enabled }} +{{- if .Values.cluster.enabled }} +================================================================================ +🌐 CLUSTER MODE (Sharded) +================================================================================ + +Your Valkey deployment is running in CLUSTER mode: +- {{ .Values.cluster.shards }} Shard(s) (primary nodes) +- {{ .Values.cluster.replicasPerShard }} Replica(s) per shard +- {{ include "valkey.clusterNodeCount" . }} Total node(s) + +Hash slots (16384 total) are distributed across the {{ .Values.cluster.shards }} shards. + +Service: {{ include "valkey.fullname" . }} +Type: {{ .Values.service.type }} +Port: {{ .Values.service.port }} +Bus Port: {{ .Values.cluster.busPort }} (for inter-node communication) + +1) In-cluster access + From another Pod: + $ valkey-cli -h {{ include "valkey.fullname" . }} -p {{ .Values.service.port }}{{ if .Values.tls.enabled }} --tls{{- end }} -c PING + + Note: Use the `-c` flag to enable cluster mode in valkey-cli. + +2) Local access via kubectl port-forward + $ kubectl -n {{ .Release.Namespace }} port-forward svc/{{ include "valkey.fullname" . }} 6379:{{ .Values.service.port }} + In another terminal: + $ valkey-cli -h 127.0.0.1 -p 6379{{ if .Values.tls.enabled }} --tls{{- end }} -c PING +{{ if eq .Values.service.type "LoadBalancer" }} +3) External access (LoadBalancer) + $ export SERVICE_IP=$(kubectl -n {{ .Release.Namespace }} get svc {{ include "valkey.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + $ valkey-cli -h $SERVICE_IP -p {{ .Values.service.port }}{{ if .Values.tls.enabled }} --tls{{- end }} -c PING +{{ else if eq .Values.service.type "NodePort" }} +3) External access (NodePort) + $ export NODE_PORT=$(kubectl -n {{ .Release.Namespace }} get svc {{ include "valkey.fullname" . }} -o jsonpath='{.spec.ports[0].nodePort}') + $ export NODE_IP=$(kubectl get nodes -o jsonpath='{.items[0].status.addresses[?(@.type=="InternalIP")].address}') + $ valkey-cli -h $NODE_IP -p $NODE_PORT{{ if .Values.tls.enabled }} --tls{{- end }} -c PING +{{ end }} +Direct Pod Access (Headless Service): +{{- $shards := int .Values.cluster.shards }} +{{- $replicasPerShard := int .Values.cluster.replicasPerShard }} +{{- $totalNodes := mul $shards (add 1 $replicasPerShard) }} +{{- range $i := until (int $totalNodes) }} + {{ include "valkey.fullname" $ }}-{{ $i }}.{{ include "valkey.headlessServiceName" $ }}.{{ $.Release.Namespace }}.svc.{{ $.Values.clusterDomain }} +{{- end }} + +Cluster Info: + $ valkey-cli -h {{ include "valkey.fullname" . }} -p {{ .Values.service.port }}{{ if .Values.tls.enabled }} --tls{{- end }}{{ if .Values.auth.enabled }} --user -a {{ end }} cluster info + $ valkey-cli -h {{ include "valkey.fullname" . }} -p {{ .Values.service.port }}{{ if .Values.tls.enabled }} --tls{{- end }}{{ if .Values.auth.enabled }} --user -a {{ end }} cluster nodes + +{{- else if .Values.replica.enabled }} ================================================================================ 🔄 REPLICATION MODE ================================================================================ @@ -99,13 +148,29 @@ Port: {{ .Values.service.port }} {{ end }} ✅ Quick test +{{- if .Values.cluster.enabled }} +$ valkey-cli -h {{ include "valkey.fullname" . }} -p {{ .Values.service.port }}{{ if .Values.tls.enabled }} --tls{{- end }}{{ if .Values.auth.enabled }} --user -a {{ end }} -c +valkey> SET foo bar +valkey> GET foo +"bar" +valkey> CLUSTER INFO +{{- else }} $ valkey-cli -h {{ include "valkey.fullname" . }} -p {{ .Values.service.port }}{{ if .Values.tls.enabled }} --tls{{- end }}{{ if .Values.auth.enabled }} --user -a {{ end }} valkey> SET foo bar valkey> GET foo "bar" +{{- end }} 💾 Persistence -{{- if .Values.replica.enabled }} +{{- if .Values.cluster.enabled }} +- Persistence is ENABLED (required for cluster mode). Each node has its own volume. +- Size: {{ .Values.cluster.persistence.size }} +{{- if .Values.cluster.persistence.storageClass }} +- Storage class: {{ .Values.cluster.persistence.storageClass }} +{{- end }} +- To see PVCs: + $ kubectl -n {{ .Release.Namespace }} get pvc -l app.kubernetes.io/instance={{ .Release.Name }} +{{- else if .Values.replica.enabled }} - Persistence is ENABLED (required for replication mode). Each instance has its own volume. - Size: {{ .Values.replica.persistence.size }} {{- if .Values.replica.persistence.storageClass }} From bcd6b4b2313c3718e821f9c1caf4fe4f524b6872 Mon Sep 17 00:00:00 2001 From: Ankit Pati Date: Sat, 24 Jan 2026 18:01:55 +0530 Subject: [PATCH 04/23] fix: schema updated with missing values Signed-off-by: Ankit Pati --- valkey/values.schema.json | 46 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/valkey/values.schema.json b/valkey/values.schema.json index 5db0f4ac..28bf4e53 100644 --- a/valkey/values.schema.json +++ b/valkey/values.schema.json @@ -22,6 +22,52 @@ } } }, + "cluster": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "shards": { + "type": "integer" + }, + "replicasPerShard": { + "type": "integer" + }, + "replicationUser": { + "type": "string" + }, + "nodeTimeout": { + "type": "integer" + }, + "requireFullCoverage": { + "type": "boolean" + }, + "allowReadsWhenDown": { + "type": "boolean" + }, + "persistence": { + "type": "object", + "properties": { + "size": { + "type": "string" + }, + "storageClass": { + "type": "string" + }, + "accessModes": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "busPort": { + "type": "integer" + } + } + }, "clusterDomain": { "type": "string" }, From 58be01c0f9b999b8ed3712e9e0ac4f8a503a9ee4 Mon Sep 17 00:00:00 2001 From: Ankit Pati Date: Sat, 28 Feb 2026 23:23:38 +0530 Subject: [PATCH 05/23] feat: `Job` instead of startup script Signed-off-by: Ankit Pati --- valkey/scripts/cluster-init-script.sh | 218 +++++---- valkey/templates/cluster-init-job.yaml | 112 +++++ valkey/templates/cluster-statefulset.yaml | 27 +- valkey/tests/cluster_test.yaml | 558 +++++++++++++++++++--- 4 files changed, 713 insertions(+), 202 deletions(-) create mode 100644 valkey/templates/cluster-init-job.yaml diff --git a/valkey/scripts/cluster-init-script.sh b/valkey/scripts/cluster-init-script.sh index 5925f539..18f29e2d 100644 --- a/valkey/scripts/cluster-init-script.sh +++ b/valkey/scripts/cluster-init-script.sh @@ -7,7 +7,6 @@ if [ "${CLUSTER_NODE_COUNT}" -eq "1" ]; then exit 0 fi -ORDINAL=$(echo "${POD_NAME}" | rev | cut -d'-' -f1 | rev) REPLICAS_PER_SHARD=${CLUSTER_REPLICAS_PER_SHARD:-1} PRIMARIES=$(( CLUSTER_NODE_COUNT / (1 + REPLICAS_PER_SHARD) )) @@ -44,131 +43,150 @@ TLS_OPTION="--tls --cacert /tls/{{ .Values.tls.caPublicKey }}" TLS_OPTION="" {{- end }} -echo "Initializing as ordinal ${ORDINAL}. Total nodes: ${CLUSTER_NODE_COUNT}, Primaries: ${PRIMARIES}, Replicas per shard: ${REPLICAS_PER_SHARD}" +echo "Cluster init job starting. Total nodes: ${CLUSTER_NODE_COUNT}, Primaries: ${PRIMARIES}, Replicas per shard: ${REPLICAS_PER_SHARD}" HEADLESS_SVC="{{ include "valkey.headlessServiceName" . }}" NAMESPACE="{{ .Release.Namespace }}" CLUSTER_DOMAIN="{{ .Values.clusterDomain }}" -MY_IP=$(hostname -i) -# Wait for the local Valkey server process to start -until valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h localhost -p {{ .Values.service.port }} ping 2>/dev/null | grep -q "PONG"; do - echo "Waiting for local Valkey to start..." - sleep 2 +# --- Wait for all Valkey nodes to be ready --- +for i in $(seq 0 $((CLUSTER_NODE_COUNT - 1))); do + NODE_HOST="{{ include "valkey.fullname" . }}-${i}.${HEADLESS_SVC}.${NAMESPACE}.svc.${CLUSTER_DOMAIN}" + until valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${NODE_HOST}" -p {{ .Values.service.port }} ping 2>/dev/null | grep -q "PONG"; do + echo "Waiting for ${NODE_HOST} to be ready..." + sleep 2 + done + echo "Node ${NODE_HOST} is ready." done -echo "Local Valkey is ready at ${MY_IP}" + +echo "All ${CLUSTER_NODE_COUNT} nodes are ready." # --- Discover Existing Cluster --- HEALTHY_NODE="" for i in $(seq 0 $((CLUSTER_NODE_COUNT - 1))); do - if [ "${i}" != "${ORDINAL}" ]; then - NODE_HOST="{{ include "valkey.fullname" . }}-${i}.${HEADLESS_SVC}.${NAMESPACE}.svc.${CLUSTER_DOMAIN}" - if valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${NODE_HOST}" -p {{ .Values.service.port }} cluster info 2>/dev/null | grep -q "cluster_state:ok"; then - HEALTHY_NODE="${NODE_HOST}" - echo "Found healthy cluster node: ${HEALTHY_NODE}" - break - fi + NODE_HOST="{{ include "valkey.fullname" . }}-${i}.${HEADLESS_SVC}.${NAMESPACE}.svc.${CLUSTER_DOMAIN}" + if valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${NODE_HOST}" -p {{ .Values.service.port }} cluster info 2>/dev/null | grep -q "cluster_state:ok"; then + HEALTHY_NODE="${NODE_HOST}" + echo "Found healthy cluster node: ${HEALTHY_NODE}" + break fi done -# --- Logic for Joining an Existing Cluster --- +# --- Logic for Joining an Existing Cluster (scaling up) --- if [ -n "${HEALTHY_NODE}" ]; then - echo "Healthy cluster found. Attempting to join..." - - # 1. Forget any old, failed instance of ourselves - FAILED_NODE_ID=$(valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${HEALTHY_NODE}" -p {{ .Values.service.port }} cluster nodes 2>/dev/null | grep "${MY_IP}:{{ .Values.service.port }}" | grep "fail" | awk '{print $1}' || echo "") - if [ -n "${FAILED_NODE_ID}" ]; then - echo "Found my IP (${MY_IP}) marked as failed with ID ${FAILED_NODE_ID}. Forgetting it..." - valkey-cli ${AUTH_OPTION} ${TLS_OPTION} --cluster call "${HEALTHY_NODE}:{{ .Values.service.port }}" cluster forget "${FAILED_NODE_ID}" > /dev/null 2>&1 || true - sleep 3 - fi + echo "Existing cluster found. Checking for new nodes to add..." - # 2. Meet the cluster - HEALTHY_NODE_IP=$(getent hosts "${HEALTHY_NODE}" | awk '{print $1}') - echo "Sending CLUSTER MEET to ${HEALTHY_NODE} (${HEALTHY_NODE_IP})" - valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h localhost -p {{ .Values.service.port }} cluster meet "${HEALTHY_NODE_IP}" {{ .Values.service.port }} - sleep 5 + KNOWN_NODES=$(valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${HEALTHY_NODE}" -p {{ .Values.service.port }} cluster nodes 2>/dev/null) - # 3. Find an orphaned master and become its replica - echo "Searching for a master to replicate..." - - MY_NODE_ID=$(valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h localhost -p {{ .Values.service.port }} cluster myid) - echo "My Node ID is ${MY_NODE_ID}" - - # This prevents race conditions from the order of 'cluster nodes' output - TARGET_MASTER_ID=$(valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${HEALTHY_NODE}" -p {{ .Values.service.port }} cluster nodes | awk -v replicas_needed="${REPLICAS_PER_SHARD}" -v my_id="${MY_NODE_ID}" ' - # Pass 1: Build maps of masters and replica counts - /master/ && !/fail/ { masters[$1] = 1 } - /slave/ && !/fail/ { master_replicas[$4]++ } - END { - # Pass 2: Iterate over the masters we found - for (master_id in masters) { - # Check if it needs a replica AND it is not ourself - if ( master_id != my_id && (master_replicas[master_id] < replicas_needed || master_replicas[master_id] == "") ) { - print master_id - exit # Found a suitable master - } - } - } - ') - - if [ -n "${TARGET_MASTER_ID}" ]; then - echo "Found target master ${TARGET_MASTER_ID} that needs a replica." - echo "Sending CLUSTER REPLICATE command..." - - if valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h localhost -p {{ .Values.service.port }} cluster replicate "${TARGET_MASTER_ID}"; then - echo "Successfully configured as a replica for ${TARGET_MASTER_ID}." - else - echo "ERROR: Failed to replicate master ${TARGET_MASTER_ID}. Manual intervention required." - exit 1 + NEW_NODE_COUNT=0 + for i in $(seq 0 $((CLUSTER_NODE_COUNT - 1))); do + NODE_HOST="{{ include "valkey.fullname" . }}-${i}.${HEADLESS_SVC}.${NAMESPACE}.svc.${CLUSTER_DOMAIN}" + NODE_IP=$(getent hosts "${NODE_HOST}" | awk '{print $1}') + + if echo "${KNOWN_NODES}" | grep -v "fail" | grep -q "${NODE_IP}:{{ .Values.service.port }}"; then + echo "Node ${NODE_HOST} (${NODE_IP}) already in cluster." + continue fi - else - echo "WARNING: Could not find a master that needs a replica. Staying as a master with no slots. Attempting rebalance..." - - # Wait for cluster propagation before rebalancing - PROPAGATION_ATTEMPTS=0 - MAX_PROPAGATION_ATTEMPTS=60 - while [ ${PROPAGATION_ATTEMPTS} -lt ${MAX_PROPAGATION_ATTEMPTS} ]; do - CLUSTER_STATE=$(valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h localhost -p {{ .Values.service.port }} cluster info 2>/dev/null | grep "cluster_state:" | cut -d: -f2 | tr -d '\r\n') - if [ "${CLUSTER_STATE}" = "ok" ]; then - echo "Cluster state is OK. Proceeding with rebalance." - break - fi - echo "Cluster state is ${CLUSTER_STATE}. Waiting for propagation... (${PROPAGATION_ATTEMPTS}/${MAX_PROPAGATION_ATTEMPTS})" - PROPAGATION_ATTEMPTS=$((PROPAGATION_ATTEMPTS + 1)) - sleep 5 - done - valkey-cli ${AUTH_OPTION} ${TLS_OPTION} --cluster rebalance "${HEALTHY_NODE}:{{ .Values.service.port }}" --cluster-use-empty-masters --cluster-yes || true + echo "New node found: ${NODE_HOST} (${NODE_IP}). Adding to cluster..." + NEW_NODE_COUNT=$((NEW_NODE_COUNT + 1)) + + # Forget any old, failed instance of this node + FAILED_NODE_ID=$(echo "${KNOWN_NODES}" | grep "${NODE_IP}:{{ .Values.service.port }}" | grep "fail" | awk '{print $1}' || echo "") + if [ -n "${FAILED_NODE_ID}" ]; then + echo "Found node IP (${NODE_IP}) marked as failed with ID ${FAILED_NODE_ID}. Forgetting it..." + valkey-cli ${AUTH_OPTION} ${TLS_OPTION} --cluster call "${HEALTHY_NODE}:{{ .Values.service.port }}" cluster forget "${FAILED_NODE_ID}" > /dev/null 2>&1 || true + sleep 3 + fi + + # Meet the cluster via the new node + HEALTHY_NODE_IP=$(getent hosts "${HEALTHY_NODE}" | awk '{print $1}') + echo "Sending CLUSTER MEET from ${NODE_HOST} to ${HEALTHY_NODE} (${HEALTHY_NODE_IP})" + valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${NODE_HOST}" -p {{ .Values.service.port }} cluster meet "${HEALTHY_NODE_IP}" {{ .Values.service.port }} + done + + if [ "${NEW_NODE_COUNT}" -eq 0 ]; then + echo "No new nodes to add. Cluster is up to date." + exit 0 fi - exit 0 -fi -echo "No healthy cluster found. Proceeding with initial creation logic." -if [ "${ORDINAL}" = "0" ]; then - echo "This is the primary-0 node, creating a new cluster..." - NODES="" + sleep 5 + + # Assign roles to new nodes: find masters needing replicas for i in $(seq 0 $((CLUSTER_NODE_COUNT - 1))); do NODE_HOST="{{ include "valkey.fullname" . }}-${i}.${HEADLESS_SVC}.${NAMESPACE}.svc.${CLUSTER_DOMAIN}" - until valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${NODE_HOST}" -p {{ .Values.service.port }} ping 2>/dev/null | grep -q "PONG"; do - echo "Waiting for ${NODE_HOST} to be ready..." - sleep 2 - done - NODES="${NODES} ${NODE_HOST}:{{ .Values.service.port }}" + NODE_ID=$(valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${NODE_HOST}" -p {{ .Values.service.port }} cluster myid) + + # Re-fetch cluster state from healthy node for current view + CURRENT_NODES=$(valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${HEALTHY_NODE}" -p {{ .Values.service.port }} cluster nodes) + + # Check if this node is a master with no slots (new node) + NODE_INFO=$(echo "${CURRENT_NODES}" | grep "${NODE_ID}") + IS_MASTER=$(echo "${NODE_INFO}" | grep -c "master" || true) + HAS_SLOTS=$(echo "${NODE_INFO}" | awk '{for(i=9;i<=NF;i++) print $i}' | head -1) + + if [ "${IS_MASTER}" -gt 0 ] && [ -z "${HAS_SLOTS}" ]; then + echo "Node ${NODE_HOST} is an empty master. Searching for a master to replicate..." + + TARGET_MASTER_ID=$(echo "${CURRENT_NODES}" | awk -v replicas_needed="${REPLICAS_PER_SHARD}" -v my_id="${NODE_ID}" ' + /master/ && !/fail/ { masters[$1] = 1 } + /slave/ && !/fail/ { master_replicas[$4]++ } + END { + for (master_id in masters) { + if ( master_id != my_id && (master_replicas[master_id] < replicas_needed || master_replicas[master_id] == "") ) { + print master_id + exit + } + } + } + ') + + if [ -n "${TARGET_MASTER_ID}" ]; then + echo "Found target master ${TARGET_MASTER_ID} that needs a replica." + if valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${NODE_HOST}" -p {{ .Values.service.port }} cluster replicate "${TARGET_MASTER_ID}"; then + echo "Successfully configured ${NODE_HOST} as a replica for ${TARGET_MASTER_ID}." + else + echo "WARNING: Failed to replicate master ${TARGET_MASTER_ID} from ${NODE_HOST}." + fi + fi + fi done - sleep 10 - echo "Creating cluster with nodes: ${NODES}" - echo "yes" | valkey-cli ${AUTH_OPTION} ${TLS_OPTION} --cluster create ${NODES} --cluster-replicas "${REPLICAS_PER_SHARD}" - echo "Cluster created successfully." -else - echo "Waiting for pod-0 to initialize the cluster..." - PRIMARY_HOST="{{ include "valkey.fullname" . }}-0.${HEADLESS_SVC}.${NAMESPACE}.svc.${CLUSTER_DOMAIN}" - until valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${PRIMARY_HOST}" -p {{ .Values.service.port }} cluster info 2>/dev/null | grep -q "cluster_state:ok"; do - echo "Waiting for cluster to be initialized by pod-0..." + # Rebalance if needed + echo "Attempting cluster rebalance..." + + PROPAGATION_ATTEMPTS=0 + MAX_PROPAGATION_ATTEMPTS=60 + while [ ${PROPAGATION_ATTEMPTS} -lt ${MAX_PROPAGATION_ATTEMPTS} ]; do + CLUSTER_STATE=$(valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${HEALTHY_NODE}" -p {{ .Values.service.port }} cluster info 2>/dev/null | grep "cluster_state:" | cut -d: -f2 | tr -d '\r\n') + if [ "${CLUSTER_STATE}" = "ok" ]; then + echo "Cluster state is OK. Proceeding with rebalance." + break + fi + echo "Cluster state is ${CLUSTER_STATE}. Waiting for propagation... (${PROPAGATION_ATTEMPTS}/${MAX_PROPAGATION_ATTEMPTS})" + PROPAGATION_ATTEMPTS=$((PROPAGATION_ATTEMPTS + 1)) sleep 5 done - echo "Cluster is initialized. My role has been assigned by the creator." + + valkey-cli ${AUTH_OPTION} ${TLS_OPTION} --cluster rebalance "${HEALTHY_NODE}:{{ .Values.service.port }}" --cluster-use-empty-masters --cluster-yes || true + + echo "Cluster update completed." + exit 0 fi +# --- Create New Cluster --- +echo "No existing cluster found. Creating new cluster..." +NODES="" +for i in $(seq 0 $((CLUSTER_NODE_COUNT - 1))); do + NODE_HOST="{{ include "valkey.fullname" . }}-${i}.${HEADLESS_SVC}.${NAMESPACE}.svc.${CLUSTER_DOMAIN}" + NODES="${NODES} ${NODE_HOST}:{{ .Values.service.port }}" +done + +# Allow time for cluster-enabled nodes to fully initialize +sleep 10 + +echo "Creating cluster with nodes: ${NODES}" +echo "yes" | valkey-cli ${AUTH_OPTION} ${TLS_OPTION} --cluster create ${NODES} --cluster-replicas "${REPLICAS_PER_SHARD}" +echo "Cluster created successfully." + exit 0 diff --git a/valkey/templates/cluster-init-job.yaml b/valkey/templates/cluster-init-job.yaml new file mode 100644 index 00000000..40ddea57 --- /dev/null +++ b/valkey/templates/cluster-init-job.yaml @@ -0,0 +1,112 @@ +{{- if .Values.cluster.enabled }} +{{- include "valkey.validateClusterConfig" . }} +{{- include "valkey.validateClusterAuth" . }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "valkey.fullname" . }}-cluster-init + labels: + {{- include "valkey.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": post-install,post-upgrade + "helm.sh/hook-weight": "0" + "helm.sh/hook-delete-policy": before-hook-creation +spec: + backoffLimit: 6 + template: + metadata: + labels: + {{- include "valkey.selectorLabels" . | nindent 8 }} + {{- with .Values.commonLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- (include "valkey.imagePullSecrets" .) | nindent 6 }} + automountServiceAccountToken: false + serviceAccountName: {{ include "valkey.serviceAccountName" . }} + restartPolicy: OnFailure + {{- if .Values.priorityClassName }} + priorityClassName: {{ .Values.priorityClassName | quote }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: cluster-init + image: {{ include "valkey.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + command: [ "/bin/sh", "/cluster-script/init-cluster.sh" ] + env: + - name: CLUSTER_NODE_COUNT + value: {{ include "valkey.clusterNodeCount" . | quote }} + - name: CLUSTER_REPLICAS_PER_SHARD + value: {{ .Values.cluster.replicasPerShard | quote }} + {{- with .Values.initResources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: cluster-script + mountPath: /cluster-script + {{- if .Values.tls.enabled }} + - name: {{ include "valkey.fullname" . }}-tls + mountPath: /tls + {{- end }} + {{- if .Values.auth.enabled }} + {{- if .Values.auth.usersExistingSecret }} + - name: valkey-users-secret + mountPath: /valkey-users-secret + readOnly: true + {{- end }} + {{- if or (include "valkey.hasInlinePasswords" . | eq "true") .Values.auth.aclConfig }} + - name: valkey-auth-secret + mountPath: /valkey-auth-secret + readOnly: true + {{- end }} + {{- end }} + volumes: + - name: cluster-script + configMap: + name: {{ include "valkey.fullname" . }}-cluster-script + defaultMode: 0555 + {{- if .Values.tls.enabled }} + - name: {{ include "valkey.fullname" . }}-tls + secret: + secretName: {{ required "An existing secret is required to enable TLS" .Values.tls.existingSecret }} + defaultMode: 0400 + {{- end }} + {{- if .Values.auth.enabled }} + {{- if .Values.auth.usersExistingSecret }} + - name: valkey-users-secret + secret: + secretName: {{ .Values.auth.usersExistingSecret }} + defaultMode: 0400 + {{- end }} + {{- if or (include "valkey.hasInlinePasswords" . | eq "true") .Values.auth.aclConfig }} + - name: valkey-auth-secret + secret: + secretName: {{ include "valkey.fullname" . }}-auth + defaultMode: 0400 + {{- end }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/valkey/templates/cluster-statefulset.yaml b/valkey/templates/cluster-statefulset.yaml index b6ca393b..d00f0ea6 100644 --- a/valkey/templates/cluster-statefulset.yaml +++ b/valkey/templates/cluster-statefulset.yaml @@ -110,28 +110,11 @@ spec: - name: {{ include "valkey.fullname" . }} image: {{ include "valkey.image" . }} imagePullPolicy: {{ .Values.image.pullPolicy }} - command: [ "/bin/sh", "-c" ] - args: - - | - /cluster-script/init-cluster.sh & - valkey-server /data/conf/valkey.conf + command: [ "valkey-server" ] + args: [ "/data/conf/valkey.conf" ] securityContext: {{- toYaml .Values.securityContext | nindent 12 }} env: - - name: POD_INDEX - valueFrom: - fieldRef: - fieldPath: metadata.labels['apps.kubernetes.io/pod-index'] - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: VALKEY_NODES - value: {{ include "valkey.clusterNodes" . | quote }} - - name: CLUSTER_NODE_COUNT - value: {{ include "valkey.clusterNodeCount" . | quote }} - - name: CLUSTER_REPLICAS_PER_SHARD - value: {{ .Values.cluster.replicasPerShard | quote }} {{- range $key, $val := .Values.env }} - name: {{ $key }} value: "{{ $val }}" @@ -168,8 +151,6 @@ spec: volumeMounts: - name: valkey-data mountPath: /data - - name: cluster-script - mountPath: /cluster-script {{- if .Values.tls.enabled }} - name: {{ include "valkey.fullname" . }}-tls mountPath: /tls @@ -246,10 +227,6 @@ spec: configMap: name: {{ include "valkey.fullname" . }}-init-scripts defaultMode: 0555 - - name: cluster-script - configMap: - name: {{ include "valkey.fullname" . }}-cluster-script - defaultMode: 0555 {{- if .Values.auth.enabled }} - name: valkey-acl emptyDir: diff --git a/valkey/tests/cluster_test.yaml b/valkey/tests/cluster_test.yaml index e2c70c21..686a34c6 100644 --- a/valkey/tests/cluster_test.yaml +++ b/valkey/tests/cluster_test.yaml @@ -2,6 +2,7 @@ suite: cluster configuration templates: - templates/cluster-statefulset.yaml - templates/cluster-script.yaml + - templates/cluster-init-job.yaml - templates/service-headless.yaml - templates/service.yaml - templates/init_config.yaml @@ -123,6 +124,44 @@ tests: containerPort: 16379 protocol: TCP + # StatefulSet runs valkey-server directly (no background init script) + - it: should run valkey-server directly without background init script + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - equal: + path: spec.template.spec.containers[0].command + value: [ "valkey-server" ] + - equal: + path: spec.template.spec.containers[0].args + value: [ "/data/conf/valkey.conf" ] + + - it: should not mount cluster-script volume in StatefulSet container + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - notContains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: cluster-script + mountPath: /cluster-script + + - it: should not define cluster-script volume in StatefulSet + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - notContains: + path: spec.template.spec.volumes + content: + name: cluster-script + any: true + # Init container tests - it: should have init container with cluster environment variables set: @@ -189,7 +228,7 @@ tests: targetPort: tcp-bus protocol: TCP - # Cluster init script tests + # Cluster init script ConfigMap tests - it: should create cluster-script ConfigMap when cluster is enabled set: cluster.enabled: true @@ -224,34 +263,76 @@ tests: path: data["init-cluster.sh"] pattern: "--cluster create" - - it: should run cluster init script as background process + # --- Cluster Init Job tests --- + - it: should create cluster-init Job when cluster is enabled set: cluster.enabled: true cluster.persistence.size: "5Gi" - template: templates/cluster-statefulset.yaml + template: templates/cluster-init-job.yaml asserts: - - matchRegex: - path: spec.template.spec.containers[0].args[0] - pattern: "/cluster-script/init-cluster.sh &" + - isKind: + of: Job + - equal: + path: metadata.name + value: RELEASE-NAME-valkey-cluster-init - - it: should mount cluster-script volume in container + - it: Job should have Helm hook annotations for post-install and post-upgrade set: cluster.enabled: true cluster.persistence.size: "5Gi" - template: templates/cluster-statefulset.yaml + template: templates/cluster-init-job.yaml asserts: - - contains: - path: spec.template.spec.containers[0].volumeMounts - content: - name: cluster-script - mountPath: /cluster-script + - equal: + path: metadata.annotations["helm.sh/hook"] + value: "post-install,post-upgrade" + - equal: + path: metadata.annotations["helm.sh/hook-weight"] + value: "0" + - equal: + path: metadata.annotations["helm.sh/hook-delete-policy"] + value: "before-hook-creation" - - it: should define cluster-script volume + - it: Job should have backoffLimit of 6 set: cluster.enabled: true cluster.persistence.size: "5Gi" - template: templates/cluster-statefulset.yaml + template: templates/cluster-init-job.yaml asserts: + - equal: + path: spec.backoffLimit + value: 6 + + - it: Job should use OnFailure restart policy + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.spec.restartPolicy + value: OnFailure + + - it: Job should run init-cluster.sh from cluster-script ConfigMap + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.spec.containers[0].command + value: [ "/bin/sh", "/cluster-script/init-cluster.sh" ] + + - it: Job should mount cluster-script volume + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-init-job.yaml + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: cluster-script + mountPath: /cluster-script - contains: path: spec.template.spec.volumes content: @@ -260,94 +341,237 @@ tests: name: RELEASE-NAME-valkey-cluster-script defaultMode: 365 - # Authentication tests - - it: should fail when cluster auth enabled but replication user not in aclUsers + - it: Job should have CLUSTER_NODE_COUNT and CLUSTER_REPLICAS_PER_SHARD env vars set: cluster.enabled: true + cluster.shards: 4 + cluster.replicasPerShard: 2 cluster.persistence.size: "5Gi" - auth.enabled: true - cluster.replicationUser: "clusteruser" - auth.aclUsers: - default: - password: "test" - permissions: "~* &* +@all" - template: templates/cluster-statefulset.yaml + template: templates/cluster-init-job.yaml asserts: - - failedTemplate: - errorPattern: "Cluster replication user 'clusteruser'.*must be defined in auth.aclUsers.*" + - contains: + path: spec.template.spec.containers[0].env + content: + name: CLUSTER_NODE_COUNT + value: "12" + - contains: + path: spec.template.spec.containers[0].env + content: + name: CLUSTER_REPLICAS_PER_SHARD + value: "2" - - it: should succeed when cluster auth is properly configured + - it: Job should use same image as StatefulSet set: cluster.enabled: true cluster.persistence.size: "5Gi" - auth.enabled: true - cluster.replicationUser: "default" - auth.aclUsers: - default: - password: "testpass" - permissions: "~* &* +@all" - template: templates/cluster-statefulset.yaml + image.registry: "myregistry.io" + image.repository: "valkey/valkey" + image.tag: "7.0.0" + template: templates/cluster-init-job.yaml asserts: - - isKind: - of: StatefulSet + - equal: + path: spec.template.spec.containers[0].image + value: "myregistry.io/valkey/valkey:7.0.0" - # TLS tests - - it: should configure TLS volume mount in cluster mode + - it: Job should use pod security context set: cluster.enabled: true cluster.persistence.size: "5Gi" - tls.enabled: true - tls.existingSecret: "valkey-tls-secret" - template: templates/cluster-statefulset.yaml + podSecurityContext: + fsGroup: 1000 + runAsUser: 1000 + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.spec.securityContext.fsGroup + value: 1000 + - equal: + path: spec.template.spec.securityContext.runAsUser + value: 1000 + + - it: Job should use container security context + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + securityContext: + allowPrivilegeEscalation: false + runAsNonRoot: true + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.spec.containers[0].securityContext.allowPrivilegeEscalation + value: false + - equal: + path: spec.template.spec.containers[0].securityContext.runAsNonRoot + value: true + + - it: Job should use initResources when set + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + initResources: + limits: + cpu: 200m + memory: 256Mi + requests: + cpu: 100m + memory: 128Mi + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.spec.containers[0].resources.limits.cpu + value: 200m + - equal: + path: spec.template.spec.containers[0].resources.requests.memory + value: 128Mi + + - it: Job should use service account + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + serviceAccount.create: true + serviceAccount.name: "my-sa" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.spec.serviceAccountName + value: "my-sa" + + - it: Job should not automount service account token + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.spec.automountServiceAccountToken + value: false + + - it: Job should include common labels + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-init-job.yaml + asserts: + - isNotNull: + path: metadata.labels["helm.sh/chart"] + - isNotNull: + path: metadata.labels["app.kubernetes.io/name"] + + - it: Job should include pod labels and annotations when set + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + podLabels: + custom-label: my-value + podAnnotations: + custom-annotation: my-annotation + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.metadata.labels["custom-label"] + value: my-value + - equal: + path: spec.template.metadata.annotations["custom-annotation"] + value: my-annotation + + - it: Job should include node selector when set + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + nodeSelector: + kubernetes.io/os: linux + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.spec.nodeSelector["kubernetes.io/os"] + value: linux + + - it: Job should include tolerations when set + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + tolerations: + - key: "dedicated" + operator: "Equal" + value: "valkey" + effect: "NoSchedule" + template: templates/cluster-init-job.yaml asserts: - contains: - path: spec.template.spec.containers[0].volumeMounts + path: spec.template.spec.tolerations content: - name: RELEASE-NAME-valkey-tls - mountPath: /tls + key: "dedicated" + operator: "Equal" + value: "valkey" + effect: "NoSchedule" - # Init config tests (cluster mode config generation) - - it: should generate cluster config in init script + - it: Job should include affinity when set set: cluster.enabled: true cluster.persistence.size: "5Gi" - cluster.nodeTimeout: 20000 - template: templates/init_config.yaml + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-type + operator: In + values: + - cache + template: templates/cluster-init-job.yaml asserts: - - matchRegex: - path: data["init.sh"] - pattern: "cluster-enabled yes" - - matchRegex: - path: data["init.sh"] - pattern: "cluster-config-file /data/nodes.conf" - - matchRegex: - path: data["init.sh"] - pattern: "cluster-node-timeout 20000" + - isNotNull: + path: spec.template.spec.affinity.nodeAffinity - - it: should configure cluster-require-full-coverage when disabled + - it: Job should include priority class name when set set: cluster.enabled: true cluster.persistence.size: "5Gi" - cluster.requireFullCoverage: false - template: templates/init_config.yaml + priorityClassName: "high-priority" + template: templates/cluster-init-job.yaml asserts: - - matchRegex: - path: data["init.sh"] - pattern: "cluster-require-full-coverage no" + - equal: + path: spec.template.spec.priorityClassName + value: "high-priority" - - it: should configure cluster-allow-reads-when-down when enabled + # --- Job TLS tests --- + - it: Job should mount TLS volume when TLS is enabled set: cluster.enabled: true cluster.persistence.size: "5Gi" - cluster.allowReadsWhenDown: true - template: templates/init_config.yaml + tls.enabled: true + tls.existingSecret: "valkey-tls-secret" + template: templates/cluster-init-job.yaml asserts: - - matchRegex: - path: data["init.sh"] - pattern: "cluster-allow-reads-when-down yes" + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: RELEASE-NAME-valkey-tls + mountPath: /tls + - contains: + path: spec.template.spec.volumes + content: + name: RELEASE-NAME-valkey-tls + secret: + secretName: valkey-tls-secret + defaultMode: 256 - # Cluster auth secret mount tests (bug fix: ensure main container has access to plaintext password) - - it: should mount valkey-users-secret to main container when auth.usersExistingSecret is set + - it: Job should not mount TLS volume when TLS is disabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + tls.enabled: false + template: templates/cluster-init-job.yaml + asserts: + - notContains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: RELEASE-NAME-valkey-tls + mountPath: /tls + + # --- Job Authentication tests --- + - it: Job should mount valkey-users-secret when usersExistingSecret is set set: cluster.enabled: true cluster.persistence.size: "5Gi" @@ -356,7 +580,7 @@ tests: auth.aclUsers: default: permissions: "~* &* +@all" - template: templates/cluster-statefulset.yaml + template: templates/cluster-init-job.yaml asserts: - contains: path: spec.template.spec.containers[0].volumeMounts @@ -364,8 +588,15 @@ tests: name: valkey-users-secret mountPath: /valkey-users-secret readOnly: true + - contains: + path: spec.template.spec.volumes + content: + name: valkey-users-secret + secret: + secretName: my-valkey-users + defaultMode: 256 - - it: should mount valkey-auth-secret to main container when inline passwords are used + - it: Job should mount valkey-auth-secret when inline passwords are used set: cluster.enabled: true cluster.persistence.size: "5Gi" @@ -374,7 +605,7 @@ tests: default: password: "testpass" permissions: "~* &* +@all" - template: templates/cluster-statefulset.yaml + template: templates/cluster-init-job.yaml asserts: - contains: path: spec.template.spec.containers[0].volumeMounts @@ -382,8 +613,15 @@ tests: name: valkey-auth-secret mountPath: /valkey-auth-secret readOnly: true + - contains: + path: spec.template.spec.volumes + content: + name: valkey-auth-secret + secret: + secretName: RELEASE-NAME-valkey-auth + defaultMode: 256 - - it: should mount both auth secrets to main container when both are configured + - it: Job should mount both auth secrets when both are configured set: cluster.enabled: true cluster.persistence.size: "5Gi" @@ -393,7 +631,7 @@ tests: default: permissions: "~* &* +@all" password: "fallback" - template: templates/cluster-statefulset.yaml + template: templates/cluster-init-job.yaml asserts: - contains: path: spec.template.spec.containers[0].volumeMounts @@ -408,7 +646,25 @@ tests: mountPath: /valkey-auth-secret readOnly: true - # Cluster init script password retrieval tests (bug fix: read from secret, not ACL hash) + - it: Job should not mount auth secrets when auth is disabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: false + template: templates/cluster-init-job.yaml + asserts: + - notContains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: valkey-users-secret + any: true + - notContains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: valkey-auth-secret + any: true + + # --- Cluster init script password retrieval tests --- - it: cluster-script should read password from valkey-users-secret when usersExistingSecret is set set: cluster.enabled: true @@ -495,3 +751,151 @@ tests: - notMatchRegex: path: data["init-cluster.sh"] pattern: 'grep.*users\.acl' + + # Authentication tests (StatefulSet) + - it: should fail when cluster auth enabled but replication user not in aclUsers + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + cluster.replicationUser: "clusteruser" + auth.aclUsers: + default: + password: "test" + permissions: "~* &* +@all" + template: templates/cluster-statefulset.yaml + asserts: + - failedTemplate: + errorPattern: "Cluster replication user 'clusteruser'.*must be defined in auth.aclUsers.*" + + - it: should succeed when cluster auth is properly configured + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + cluster.replicationUser: "default" + auth.aclUsers: + default: + password: "testpass" + permissions: "~* &* +@all" + template: templates/cluster-statefulset.yaml + asserts: + - isKind: + of: StatefulSet + + # TLS tests (StatefulSet) + - it: should configure TLS volume mount in cluster mode + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + tls.enabled: true + tls.existingSecret: "valkey-tls-secret" + template: templates/cluster-statefulset.yaml + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: RELEASE-NAME-valkey-tls + mountPath: /tls + + # Init config tests (cluster mode config generation) + - it: should generate cluster config in init script + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.nodeTimeout: 20000 + template: templates/init_config.yaml + asserts: + - matchRegex: + path: data["init.sh"] + pattern: "cluster-enabled yes" + - matchRegex: + path: data["init.sh"] + pattern: "cluster-config-file /data/nodes.conf" + - matchRegex: + path: data["init.sh"] + pattern: "cluster-node-timeout 20000" + + - it: should configure cluster-require-full-coverage when disabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.requireFullCoverage: false + template: templates/init_config.yaml + asserts: + - matchRegex: + path: data["init.sh"] + pattern: "cluster-require-full-coverage no" + + - it: should configure cluster-allow-reads-when-down when enabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.allowReadsWhenDown: true + template: templates/init_config.yaml + asserts: + - matchRegex: + path: data["init.sh"] + pattern: "cluster-allow-reads-when-down yes" + + # Cluster auth secret mount tests (StatefulSet - main container still needs ACL for Valkey server) + - it: should mount valkey-users-secret to main container when auth.usersExistingSecret is set + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + auth.usersExistingSecret: "my-valkey-users" + auth.aclUsers: + default: + permissions: "~* &* +@all" + template: templates/cluster-statefulset.yaml + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: valkey-users-secret + mountPath: /valkey-users-secret + readOnly: true + + - it: should mount valkey-auth-secret to main container when inline passwords are used + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + auth.aclUsers: + default: + password: "testpass" + permissions: "~* &* +@all" + template: templates/cluster-statefulset.yaml + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: valkey-auth-secret + mountPath: /valkey-auth-secret + readOnly: true + + - it: should mount both auth secrets to main container when both are configured + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + auth.usersExistingSecret: "my-valkey-users" + auth.aclUsers: + default: + permissions: "~* &* +@all" + password: "fallback" + template: templates/cluster-statefulset.yaml + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: valkey-users-secret + mountPath: /valkey-users-secret + readOnly: true + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: valkey-auth-secret + mountPath: /valkey-auth-secret + readOnly: true From 8542e08d13a21bad3005a09fb3f8b0dc04153c8c Mon Sep 17 00:00:00 2001 From: Ankit Pati Date: Sat, 28 Feb 2026 23:39:51 +0530 Subject: [PATCH 06/23] feat: Istio compatibility Signed-off-by: Ankit Pati --- valkey/templates/istio-destination-rule.yaml | 41 ++ .../templates/istio-peer-authentication.yaml | 21 + valkey/tests/istio_test.yaml | 447 ++++++++++++++++++ valkey/values.schema.json | 48 ++ valkey/values.yaml | 25 + 5 files changed, 582 insertions(+) create mode 100644 valkey/templates/istio-destination-rule.yaml create mode 100644 valkey/templates/istio-peer-authentication.yaml create mode 100644 valkey/tests/istio_test.yaml diff --git a/valkey/templates/istio-destination-rule.yaml b/valkey/templates/istio-destination-rule.yaml new file mode 100644 index 00000000..19bc74b7 --- /dev/null +++ b/valkey/templates/istio-destination-rule.yaml @@ -0,0 +1,41 @@ +{{- if .Values.istio.enabled }} +apiVersion: networking.istio.io/v1 +kind: DestinationRule +metadata: + name: {{ include "valkey.fullname" . }} + labels: + {{- include "valkey.labels" . | nindent 4 }} + {{- with .Values.istio.destinationRule.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.istio.destinationRule.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + host: {{ include "valkey.fullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }} + trafficPolicy: + tls: + mode: {{ .Values.istio.destinationRule.mode }} +{{- if or .Values.replica.enabled .Values.cluster.enabled }} +--- +apiVersion: networking.istio.io/v1 +kind: DestinationRule +metadata: + name: {{ include "valkey.headlessServiceName" . }} + labels: + {{- include "valkey.labels" . | nindent 4 }} + {{- with .Values.istio.destinationRule.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.istio.destinationRule.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + host: {{ include "valkey.headlessServiceName" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }} + trafficPolicy: + tls: + mode: {{ .Values.istio.destinationRule.mode }} +{{- end }} +{{- end }} diff --git a/valkey/templates/istio-peer-authentication.yaml b/valkey/templates/istio-peer-authentication.yaml new file mode 100644 index 00000000..83d468bd --- /dev/null +++ b/valkey/templates/istio-peer-authentication.yaml @@ -0,0 +1,21 @@ +{{- if .Values.istio.enabled }} +apiVersion: security.istio.io/v1 +kind: PeerAuthentication +metadata: + name: {{ include "valkey.fullname" . }} + labels: + {{- include "valkey.labels" . | nindent 4 }} + {{- with .Values.istio.peerAuthentication.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.istio.peerAuthentication.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + matchLabels: + {{- include "valkey.selectorLabels" . | nindent 6 }} + mtls: + mode: {{ .Values.istio.peerAuthentication.mode }} +{{- end }} diff --git a/valkey/tests/istio_test.yaml b/valkey/tests/istio_test.yaml new file mode 100644 index 00000000..3d081fb4 --- /dev/null +++ b/valkey/tests/istio_test.yaml @@ -0,0 +1,447 @@ +suite: istio service mesh integration +templates: + - templates/istio-peer-authentication.yaml + - templates/istio-destination-rule.yaml +tests: + # --- Feature flag tests --- + - it: should not create PeerAuthentication when istio is disabled + set: + istio.enabled: false + template: templates/istio-peer-authentication.yaml + asserts: + - hasDocuments: + count: 0 + + - it: should not create DestinationRule when istio is disabled + set: + istio.enabled: false + template: templates/istio-destination-rule.yaml + asserts: + - hasDocuments: + count: 0 + + - it: should create PeerAuthentication when istio is enabled + set: + istio.enabled: true + template: templates/istio-peer-authentication.yaml + asserts: + - hasDocuments: + count: 1 + - isKind: + of: PeerAuthentication + - isAPIVersion: + of: security.istio.io/v1 + + - it: should create DestinationRule when istio is enabled + set: + istio.enabled: true + template: templates/istio-destination-rule.yaml + asserts: + - hasDocuments: + count: 1 + - isKind: + of: DestinationRule + - isAPIVersion: + of: networking.istio.io/v1 + + # --- PeerAuthentication tests --- + - it: PeerAuthentication should target Valkey pods via selector labels + set: + istio.enabled: true + template: templates/istio-peer-authentication.yaml + asserts: + - equal: + path: spec.selector.matchLabels["app.kubernetes.io/name"] + value: valkey + - equal: + path: spec.selector.matchLabels["app.kubernetes.io/instance"] + value: RELEASE-NAME + + - it: PeerAuthentication should default to STRICT mTLS mode + set: + istio.enabled: true + template: templates/istio-peer-authentication.yaml + asserts: + - equal: + path: spec.mtls.mode + value: STRICT + + - it: PeerAuthentication should allow overriding mTLS mode to PERMISSIVE + set: + istio.enabled: true + istio.peerAuthentication.mode: PERMISSIVE + template: templates/istio-peer-authentication.yaml + asserts: + - equal: + path: spec.mtls.mode + value: PERMISSIVE + + - it: PeerAuthentication should allow overriding mTLS mode to DISABLE + set: + istio.enabled: true + istio.peerAuthentication.mode: DISABLE + template: templates/istio-peer-authentication.yaml + asserts: + - equal: + path: spec.mtls.mode + value: DISABLE + + - it: PeerAuthentication should allow overriding mTLS mode to UNSET + set: + istio.enabled: true + istio.peerAuthentication.mode: UNSET + template: templates/istio-peer-authentication.yaml + asserts: + - equal: + path: spec.mtls.mode + value: UNSET + + - it: PeerAuthentication should have correct name + set: + istio.enabled: true + template: templates/istio-peer-authentication.yaml + asserts: + - equal: + path: metadata.name + value: RELEASE-NAME-valkey + + - it: PeerAuthentication should include chart labels + set: + istio.enabled: true + template: templates/istio-peer-authentication.yaml + asserts: + - isNotNull: + path: metadata.labels["helm.sh/chart"] + - isNotNull: + path: metadata.labels["app.kubernetes.io/name"] + - isNotNull: + path: metadata.labels["app.kubernetes.io/managed-by"] + + - it: PeerAuthentication should include custom labels + set: + istio.enabled: true + istio.peerAuthentication.labels: + security.example.com/policy: strict + team: platform + template: templates/istio-peer-authentication.yaml + asserts: + - equal: + path: metadata.labels["security.example.com/policy"] + value: strict + - equal: + path: metadata.labels["team"] + value: platform + + - it: PeerAuthentication should include custom annotations + set: + istio.enabled: true + istio.peerAuthentication.annotations: + security.example.com/reviewed: "true" + template: templates/istio-peer-authentication.yaml + asserts: + - equal: + path: metadata.annotations["security.example.com/reviewed"] + value: "true" + + - it: PeerAuthentication should not have annotations when none are set + set: + istio.enabled: true + template: templates/istio-peer-authentication.yaml + asserts: + - notExists: + path: metadata.annotations + + - it: PeerAuthentication should include common labels when set + set: + istio.enabled: true + commonLabels: + env: production + template: templates/istio-peer-authentication.yaml + asserts: + - equal: + path: metadata.labels.env + value: production + + # --- DestinationRule tests (main service) --- + - it: DestinationRule should target the main service host in standalone mode + set: + istio.enabled: true + template: templates/istio-destination-rule.yaml + asserts: + - equal: + path: spec.host + value: RELEASE-NAME-valkey.NAMESPACE.svc.cluster.local + + - it: DestinationRule should default to ISTIO_MUTUAL TLS mode + set: + istio.enabled: true + template: templates/istio-destination-rule.yaml + asserts: + - equal: + path: spec.trafficPolicy.tls.mode + value: ISTIO_MUTUAL + + - it: DestinationRule should allow overriding TLS mode + set: + istio.enabled: true + istio.destinationRule.mode: MUTUAL + template: templates/istio-destination-rule.yaml + asserts: + - equal: + path: spec.trafficPolicy.tls.mode + value: MUTUAL + + - it: DestinationRule should allow SIMPLE TLS mode + set: + istio.enabled: true + istio.destinationRule.mode: SIMPLE + template: templates/istio-destination-rule.yaml + asserts: + - equal: + path: spec.trafficPolicy.tls.mode + value: SIMPLE + + - it: DestinationRule should allow DISABLE TLS mode + set: + istio.enabled: true + istio.destinationRule.mode: DISABLE + template: templates/istio-destination-rule.yaml + asserts: + - equal: + path: spec.trafficPolicy.tls.mode + value: DISABLE + + - it: DestinationRule should have correct name + set: + istio.enabled: true + template: templates/istio-destination-rule.yaml + asserts: + - equal: + path: metadata.name + value: RELEASE-NAME-valkey + + - it: DestinationRule should include chart labels + set: + istio.enabled: true + template: templates/istio-destination-rule.yaml + asserts: + - isNotNull: + path: metadata.labels["helm.sh/chart"] + - isNotNull: + path: metadata.labels["app.kubernetes.io/name"] + + - it: DestinationRule should include custom labels + set: + istio.enabled: true + istio.destinationRule.labels: + networking.example.com/managed: "true" + template: templates/istio-destination-rule.yaml + asserts: + - equal: + path: metadata.labels["networking.example.com/managed"] + value: "true" + + - it: DestinationRule should include custom annotations + set: + istio.enabled: true + istio.destinationRule.annotations: + networking.example.com/reviewed: "true" + template: templates/istio-destination-rule.yaml + asserts: + - equal: + path: metadata.annotations["networking.example.com/reviewed"] + value: "true" + + - it: DestinationRule should not have annotations when none are set + set: + istio.enabled: true + template: templates/istio-destination-rule.yaml + asserts: + - notExists: + path: metadata.annotations + + - it: DestinationRule should use custom cluster domain + set: + istio.enabled: true + clusterDomain: my.custom.domain + template: templates/istio-destination-rule.yaml + asserts: + - equal: + path: spec.host + value: RELEASE-NAME-valkey.NAMESPACE.svc.my.custom.domain + + # --- DestinationRule headless service tests (cluster mode) --- + - it: should create headless DestinationRule when cluster mode is enabled + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/istio-destination-rule.yaml + asserts: + - hasDocuments: + count: 2 + + - it: headless DestinationRule should target the headless service host + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/istio-destination-rule.yaml + documentIndex: 1 + asserts: + - isKind: + of: DestinationRule + - equal: + path: metadata.name + value: RELEASE-NAME-valkey-headless + - equal: + path: spec.host + value: RELEASE-NAME-valkey-headless.NAMESPACE.svc.cluster.local + + - it: headless DestinationRule should use same TLS mode as main + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/istio-destination-rule.yaml + documentIndex: 1 + asserts: + - equal: + path: spec.trafficPolicy.tls.mode + value: ISTIO_MUTUAL + + - it: headless DestinationRule should respect overridden TLS mode + set: + istio.enabled: true + istio.destinationRule.mode: MUTUAL + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/istio-destination-rule.yaml + documentIndex: 1 + asserts: + - equal: + path: spec.trafficPolicy.tls.mode + value: MUTUAL + + - it: headless DestinationRule should include custom labels + set: + istio.enabled: true + istio.destinationRule.labels: + networking.example.com/managed: "true" + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/istio-destination-rule.yaml + documentIndex: 1 + asserts: + - equal: + path: metadata.labels["networking.example.com/managed"] + value: "true" + + - it: headless DestinationRule should include custom annotations + set: + istio.enabled: true + istio.destinationRule.annotations: + networking.example.com/reviewed: "true" + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/istio-destination-rule.yaml + documentIndex: 1 + asserts: + - equal: + path: metadata.annotations["networking.example.com/reviewed"] + value: "true" + + - it: headless DestinationRule should include chart labels + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/istio-destination-rule.yaml + documentIndex: 1 + asserts: + - isNotNull: + path: metadata.labels["helm.sh/chart"] + - isNotNull: + path: metadata.labels["app.kubernetes.io/name"] + + # --- DestinationRule headless service tests (replica mode) --- + - it: should create headless DestinationRule when replica mode is enabled + set: + istio.enabled: true + replica.enabled: true + replica.persistence.size: "5Gi" + template: templates/istio-destination-rule.yaml + asserts: + - hasDocuments: + count: 2 + + - it: headless DestinationRule should target headless service in replica mode + set: + istio.enabled: true + replica.enabled: true + replica.persistence.size: "5Gi" + template: templates/istio-destination-rule.yaml + documentIndex: 1 + asserts: + - isKind: + of: DestinationRule + - equal: + path: metadata.name + value: RELEASE-NAME-valkey-headless + - equal: + path: spec.host + value: RELEASE-NAME-valkey-headless.NAMESPACE.svc.cluster.local + + # --- Standalone mode tests --- + - it: should only create main DestinationRule in standalone mode (no headless) + set: + istio.enabled: true + cluster.enabled: false + replica.enabled: false + template: templates/istio-destination-rule.yaml + asserts: + - hasDocuments: + count: 1 + - equal: + path: metadata.name + value: RELEASE-NAME-valkey + + # --- Name override tests --- + - it: should use fullnameOverride in PeerAuthentication + set: + istio.enabled: true + fullnameOverride: "my-valkey" + template: templates/istio-peer-authentication.yaml + asserts: + - equal: + path: metadata.name + value: my-valkey + + - it: should use fullnameOverride in DestinationRule + set: + istio.enabled: true + fullnameOverride: "my-valkey" + template: templates/istio-destination-rule.yaml + asserts: + - equal: + path: metadata.name + value: my-valkey + - equal: + path: spec.host + value: my-valkey.NAMESPACE.svc.cluster.local + + - it: should use fullnameOverride in headless DestinationRule + set: + istio.enabled: true + fullnameOverride: "my-valkey" + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/istio-destination-rule.yaml + documentIndex: 1 + asserts: + - equal: + path: metadata.name + value: my-valkey-headless + - equal: + path: spec.host + value: my-valkey-headless.NAMESPACE.svc.cluster.local diff --git a/valkey/values.schema.json b/valkey/values.schema.json index 28bf4e53..4c4ab001 100644 --- a/valkey/values.schema.json +++ b/valkey/values.schema.json @@ -180,6 +180,54 @@ "initResources": { "type": "object" }, + "istio": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "peerAuthentication": { + "type": "object", + "properties": { + "mode": { + "type": "string", + "enum": [ + "STRICT", + "PERMISSIVE", + "DISABLE", + "UNSET" + ] + }, + "labels": { + "type": "object" + }, + "annotations": { + "type": "object" + } + } + }, + "destinationRule": { + "type": "object", + "properties": { + "mode": { + "type": "string", + "enum": [ + "DISABLE", + "SIMPLE", + "MUTUAL", + "ISTIO_MUTUAL" + ] + }, + "labels": { + "type": "object" + }, + "annotations": { + "type": "object" + } + } + } + } + }, "metrics": { "type": "object", "properties": { diff --git a/valkey/values.yaml b/valkey/values.yaml index a4256620..062b234c 100644 --- a/valkey/values.yaml +++ b/valkey/values.yaml @@ -324,6 +324,31 @@ tls: # Require that clients authenticate with a certificate requireClientCertificate: false +istio: + # Enable Istio + enabled: false + + # PeerAuthentication controls mTLS enforcement on inbound connections + peerAuthentication: + # mTLS mode for inbound traffic (STRICT, PERMISSIVE, DISABLE, UNSET) + # STRICT: Require mTLS on all ports + # PERMISSIVE: Accept both plaintext and mTLS + mode: STRICT + # Additional labels for the PeerAuthentication resource + labels: {} + # Additional annotations for the PeerAuthentication resource + annotations: {} + + # DestinationRule configures mTLS for outbound connections to Valkey services + destinationRule: + # TLS mode for outbound traffic (DISABLE, SIMPLE, MUTUAL, ISTIO_MUTUAL) + # ISTIO_MUTUAL: Use Istio-managed certificates for mTLS + mode: ISTIO_MUTUAL + # Additional labels for the DestinationRule resource + labels: {} + # Additional annotations for the DestinationRule resource + annotations: {} + # Node selector for pod assignment nodeSelector: {} From c288a5a39e099a35f371f33c6493ff9b8e4c1cb2 Mon Sep 17 00:00:00 2001 From: Ankit Pati Date: Fri, 1 May 2026 17:07:17 +0530 Subject: [PATCH 07/23] test: functional tests with `kind` Signed-off-by: Ankit Pati --- Justfile | 23 ++++ functional-tests/kind-config.yaml | 5 + functional-tests/lib.sh | 30 +++++ functional-tests/run-all.sh | 73 +++++++++++ functional-tests/run-scenario.sh | 209 ++++++++++++++++++++++++++++++ functional-tests/setup.sh | 83 ++++++++++++ functional-tests/teardown.sh | 32 +++++ 7 files changed, 455 insertions(+) create mode 100644 functional-tests/kind-config.yaml create mode 100755 functional-tests/lib.sh create mode 100755 functional-tests/run-all.sh create mode 100755 functional-tests/run-scenario.sh create mode 100755 functional-tests/setup.sh create mode 100755 functional-tests/teardown.sh diff --git a/Justfile b/Justfile index 7cd0c6b3..ce4fd8bc 100644 --- a/Justfile +++ b/Justfile @@ -28,3 +28,26 @@ package: validate: lint test @echo "=== All validations passed ===" +# Create the kind cluster and shared fixtures used by the functional suite +functional-setup: + ./functional-tests/setup.sh + +# Tear down fixtures (pass --cluster to also delete the kind cluster) +functional-teardown *ARGS: + ./functional-tests/teardown.sh {{ARGS}} + +# Run one scenario against the already-set-up kind cluster, e.g. +# just functional-scenario off off on on +functional-scenario tls auth shard rep: + ./functional-tests/run-scenario.sh {{tls}} {{auth}} {{shard}} {{rep}} + +# Run the full 16-scenario matrix (set FILTER='tls=on auth=on' to narrow) +functional-run: + ./functional-tests/run-all.sh + +# Full functional suite: setup + matrix + teardown including cluster +functional-test: + ./functional-tests/setup.sh + ./functional-tests/run-all.sh + ./functional-tests/teardown.sh --cluster + diff --git a/functional-tests/kind-config.yaml b/functional-tests/kind-config.yaml new file mode 100644 index 00000000..3c58a0b5 --- /dev/null +++ b/functional-tests/kind-config.yaml @@ -0,0 +1,5 @@ +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +name: valkey-functional +nodes: + - role: control-plane diff --git a/functional-tests/lib.sh b/functional-tests/lib.sh new file mode 100755 index 00000000..9d6a3513 --- /dev/null +++ b/functional-tests/lib.sh @@ -0,0 +1,30 @@ +# Shared helpers for Valkey functional tests. +# Sourced by every script under functional-tests/. + +set -euo pipefail + +HERE=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) +REPO_ROOT=$(cd -- "${HERE}/.." && pwd) +CHART_DIR=${REPO_ROOT}/valkey + +CLUSTER_NAME=${VALKEY_KIND_CLUSTER:-valkey-functional} +KUBE_CONTEXT=kind-${CLUSTER_NAME} +NAMESPACE=${VALKEY_FUNCTIONAL_NAMESPACE:-default} +RELEASE=${VALKEY_RELEASE:-valkey} + +AUTH_SECRET=valkey-auth +TLS_SECRET=valkey-tls +TESTBENCH_POD=valkey-testbench +AUTH_PASSWORD=password + +log() { printf '=== %s ===\n' "$*"; } + +kctl() { kubectl --context="${KUBE_CONTEXT}" --namespace="${NAMESPACE}" "$@"; } +hctl() { helm --kube-context="${KUBE_CONTEXT}" --namespace="${NAMESPACE}" "$@"; } + +# kubectl exec into the testbench. Pipes stderr through so failures are legible. +testbench_exec() { kctl exec "${TESTBENCH_POD}" -- "$@"; } + +wait_for_testbench() { + kctl wait --for=condition=Ready "pod/${TESTBENCH_POD}" --timeout=120s +} diff --git a/functional-tests/run-all.sh b/functional-tests/run-all.sh new file mode 100755 index 00000000..2633c797 --- /dev/null +++ b/functional-tests/run-all.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +# Drive every scenario in the matrix, sequentially. Assumes `setup.sh` +# has already created the kind cluster and fixtures. + +HERE=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=lib.sh +. "${HERE}/lib.sh" + +SCENARIOS=( + # tls auth shard rep + "off off off off" + "off off off on" + "off off on off" + "off off on on" + "off on off off" + "off on off on" + "off on on off" + "off on on on" + "on off off off" + "on off off on" + "on off on off" + "on off on on" + "on on off off" + "on on off on" + "on on on off" + "on on on on" +) + +# Optional filter: skip scenarios matching the first arg, e.g. `./run-all.sh tls=on`. +# Kept intentionally simple — pass one or more "key=on" / "key=off" selectors. +matches() { + local spec=$1 tls=$2 auth=$3 shard=$4 rep=$5 + for sel in ${FILTER:-}; do + local k=${sel%=*} v=${sel#*=} + local have + case "${k}" in + tls) have=${tls} ;; + auth) have=${auth} ;; + shard) have=${shard} ;; + rep) have=${rep} ;; + *) echo "bad filter key: ${k}" >&2; exit 2 ;; + esac + [[ ${have} == "${v}" ]] || return 1 + done + return 0 +} + +passed=0 +failed=0 +failures=() + +for s in "${SCENARIOS[@]}"; do + # shellcheck disable=SC2086 + read -r tls auth shard rep <<<"${s}" + if ! matches "${s}" "${tls}" "${auth}" "${shard}" "${rep}"; then + continue + fi + + log "SCENARIO: tls=${tls} auth=${auth} shard=${shard} rep=${rep}" + if "${HERE}/run-scenario.sh" "${tls}" "${auth}" "${shard}" "${rep}"; then + passed=$(( passed + 1 )) + else + failed=$(( failed + 1 )) + failures+=("tls=${tls} auth=${auth} shard=${shard} rep=${rep}") + fi +done + +echo +log "Summary: ${passed} passed, ${failed} failed" +if (( failed > 0 )); then + printf ' failed: %s\n' "${failures[@]}" + exit 1 +fi diff --git a/functional-tests/run-scenario.sh b/functional-tests/run-scenario.sh new file mode 100755 index 00000000..8abb0209 --- /dev/null +++ b/functional-tests/run-scenario.sh @@ -0,0 +1,209 @@ +#!/usr/bin/env bash +# Run a single scenario of the Valkey functional matrix against the +# already-created kind cluster. +# +# Usage: +# ./run-scenario.sh +# Each arg is "on" or "off". Example: +# ./run-scenario.sh off off on on +# drives the "TLS off, auth off, shard on, rep on" scenario. + +HERE=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=lib.sh +. "${HERE}/lib.sh" + +if (( $# != 4 )); then + echo "usage: $0 (each on|off)" >&2 + exit 2 +fi + +on_or_off() { + case "$1" in + on|off) return 0 ;; + *) echo "expected 'on' or 'off', got: $1" >&2; return 1 ;; + esac +} +for v in "$@"; do on_or_off "${v}"; done + +TLS=$1; AUTH=$2; SHARD=$3; REP=$4 +SCENARIO="tls=${TLS} auth=${AUTH} shard=${SHARD} rep=${REP}" + +flag() { [[ $1 == on ]] && echo true || echo false; } +is_on() { [[ $1 == on ]]; } + +# --------------------------------------------------------------------------- +# Build helm flags for this scenario. +# --------------------------------------------------------------------------- +helm_flags=( + --set-string='podLabels.sidecar\.istio\.io/inject=false' +) + +if is_on "${AUTH}"; then + helm_flags+=( + --set=auth.enabled=true + --set=auth.usersExistingSecret="${AUTH_SECRET}" + --set=auth.aclUsers.default.permissions='~* &* +@all' + ) +fi + +if is_on "${TLS}"; then + helm_flags+=( + --set=tls.enabled=true + --set=tls.existingSecret="${TLS_SECRET}" + ) +fi + +if is_on "${SHARD}"; then + helm_flags+=( + --set=cluster.enabled=true + --set=cluster.persistence.size=1Gi + --set=cluster.shards=3 + ) + if is_on "${REP}"; then + helm_flags+=(--set=cluster.replicasPerShard=1) + expected_node_count=6 + else + helm_flags+=(--set=cluster.replicasPerShard=0) + expected_node_count=3 + fi +elif is_on "${REP}"; then + helm_flags+=( + --set=replica.enabled=true + --set=replica.persistence.size=1Gi + ) + expected_node_count=0 # unused +else + expected_node_count=0 # unused +fi + +# --------------------------------------------------------------------------- +# Install. +# --------------------------------------------------------------------------- +log "Installing scenario: ${SCENARIO}" +hctl install "${RELEASE}" "${CHART_DIR}" "${helm_flags[@]}" + +# Clean up on exit regardless of pass/fail — the next scenario needs a clean slate. +cleanup() { + local rc=$? + log "Cleaning up scenario: ${SCENARIO}" + hctl uninstall "${RELEASE}" 2>/dev/null || true + kctl delete pvc --selector="app.kubernetes.io/instance=${RELEASE}" --ignore-not-found + exit "${rc}" +} +trap cleanup EXIT + +# --------------------------------------------------------------------------- +# Wait for pods to become ready. +# --------------------------------------------------------------------------- +log "Waiting for workload to be ready" +if is_on "${SHARD}"; then + kctl rollout status "statefulset/${RELEASE}" --timeout=300s + # The cluster-init Job is a post-install hook; wait for it to complete. + kctl wait --for=condition=complete "job/${RELEASE}-cluster-init" --timeout=300s +elif is_on "${REP}"; then + kctl rollout status "statefulset/${RELEASE}" --timeout=300s +else + kctl rollout status "deployment/${RELEASE}" --timeout=300s +fi + +# --------------------------------------------------------------------------- +# Build the canonical "working" valkey-cli argv for this scenario. +# --------------------------------------------------------------------------- +cli_args_good=(valkey-cli -h "valkey.${NAMESPACE}.svc.cluster.local" --no-auth-warning) +if is_on "${AUTH}"; then + cli_args_good+=(-a "${AUTH_PASSWORD}") +fi +if is_on "${TLS}"; then + cli_args_good+=(--tls --cacert /tls/ca.crt) +fi + +# --------------------------------------------------------------------------- +# Assertions. +# --------------------------------------------------------------------------- +fail() { echo "FAIL: $*" >&2; exit 1; } + +assert_eq() { + local expected=$1 actual=$2 what=$3 + if [[ ${actual} != "${expected}" ]]; then + fail "${what}: expected '${expected}', got '${actual}'" + fi +} + +# Positive: the fully-correct invocation should succeed. +log "Positive check" +if is_on "${SHARD}"; then + # Even after the cluster-init Job completes, gossip needs a few seconds to converge + # — each node updates `cluster_state` only after it sees the others. Poll for that. + state=fail + for _ in $(seq 1 30); do + state=$(testbench_exec "${cli_args_good[@]}" cluster info | awk -F: '/^cluster_state:/{print $2}' | tr -d '\r\n') + [[ ${state} == ok ]] && break + sleep 2 + done + assert_eq "ok" "${state}" "cluster_state" + + # Inspect the topology: exact count + master/slave split. + nodes=$(testbench_exec "${cli_args_good[@]}" cluster nodes) + actual_nodes=$(printf '%s\n' "${nodes}" | sed '/^$/d' | wc -l | tr -d ' ') + assert_eq "${expected_node_count}" "${actual_nodes}" "cluster node count" + + master_count=$(printf '%s\n' "${nodes}" | grep -c 'master' || true) + assert_eq "3" "${master_count}" "master count" + + if is_on "${REP}"; then + slave_count=$(printf '%s\n' "${nodes}" | grep -c 'slave' || true) + assert_eq "3" "${slave_count}" "slave count" + fi +else + pong=$(testbench_exec "${cli_args_good[@]}" ping | tr -d '\r\n') + assert_eq "PONG" "${pong}" "ping" +fi + +# Negative — auth. No password should be rejected with NOAUTH. +if is_on "${AUTH}"; then + log "Negative check: missing password must be rejected" + cli_args_noauth=(valkey-cli -h "valkey.${NAMESPACE}.svc.cluster.local" --no-auth-warning) + if is_on "${TLS}"; then + cli_args_noauth+=(--tls --cacert /tls/ca.crt) + fi + if is_on "${SHARD}"; then + probe_cmd=(cluster info) + else + probe_cmd=(ping) + fi + set +e + out=$(testbench_exec "${cli_args_noauth[@]}" "${probe_cmd[@]}" 2>&1) + rc=$? + set -e + if ! grep -qi 'NOAUTH' <<<"${out}"; then + fail "expected NOAUTH error, got (rc=${rc}): ${out}" + fi +fi + +# Negative — TLS. No --tls at all, and --tls without the CA, must both fail. +if is_on "${TLS}"; then + log "Negative check: plaintext client against TLS server must fail" + cli_args_plaintext=(valkey-cli -h "valkey.${NAMESPACE}.svc.cluster.local" --no-auth-warning) + if is_on "${AUTH}"; then cli_args_plaintext+=(-a "${AUTH_PASSWORD}"); fi + if is_on "${SHARD}"; then probe_cmd=(cluster info); else probe_cmd=(ping); fi + set +e + out=$(testbench_exec "${cli_args_plaintext[@]}" "${probe_cmd[@]}" 2>&1) + rc=$? + set -e + if (( rc == 0 )); then + fail "plaintext client should have failed but succeeded: ${out}" + fi + + log "Negative check: TLS client without CA must fail to verify" + cli_args_nocacert=(valkey-cli -h "valkey.${NAMESPACE}.svc.cluster.local" --tls --no-auth-warning) + if is_on "${AUTH}"; then cli_args_nocacert+=(-a "${AUTH_PASSWORD}"); fi + set +e + out=$(testbench_exec "${cli_args_nocacert[@]}" "${probe_cmd[@]}" 2>&1) + rc=$? + set -e + if (( rc == 0 )) || ! grep -qi 'certificate verify failed' <<<"${out}"; then + fail "expected 'certificate verify failed', got (rc=${rc}): ${out}" + fi +fi + +log "PASS: ${SCENARIO}" diff --git a/functional-tests/setup.sh b/functional-tests/setup.sh new file mode 100755 index 00000000..0678f2dd --- /dev/null +++ b/functional-tests/setup.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash +# Bring up the kind cluster and create the shared fixtures (auth secret, +# TLS secret, testbench pod) used by every scenario. + +HERE=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=lib.sh +. "${HERE}/lib.sh" + +log "Creating kind cluster ${CLUSTER_NAME}" +if kind get clusters | grep -Fxq "${CLUSTER_NAME}"; then + echo "kind cluster '${CLUSTER_NAME}' already exists; reusing" +else + kind create cluster --config "${HERE}/kind-config.yaml" --wait 120s +fi + +log "Creating ${AUTH_SECRET} secret" +kctl delete secret "${AUTH_SECRET}" --ignore-not-found +kctl create secret generic "${AUTH_SECRET}" \ + --from-literal="default=${AUTH_PASSWORD}" + +log "Generating self-signed TLS material" +CERT_DIR=$(mktemp -d) +trap 'rm -rf -- "${CERT_DIR}"' EXIT + +# CA +openssl req -x509 -nodes -days 365 -newkey rsa:2048 \ + -keyout "${CERT_DIR}/valkey-ca.key" \ + -out "${CERT_DIR}/valkey-ca.crt" \ + -subj /CN=valkey-ca 2>/dev/null + +# Server CSR with SANs the chart's pods present on +openssl req -nodes -newkey rsa:2048 \ + -keyout "${CERT_DIR}/valkey-server.key" \ + -out "${CERT_DIR}/valkey-server.csr" \ + -subj "/CN=valkey.${NAMESPACE}.svc.cluster.local" \ + -addext "subjectAltName=DNS:valkey.${NAMESPACE}.svc.cluster.local,DNS:valkey-headless.${NAMESPACE}.svc.cluster.local,DNS:*.valkey-headless.${NAMESPACE}.svc.cluster.local" \ + 2>/dev/null + +openssl x509 -req \ + -in "${CERT_DIR}/valkey-server.csr" \ + -CA "${CERT_DIR}/valkey-ca.crt" \ + -CAkey "${CERT_DIR}/valkey-ca.key" \ + -CAcreateserial \ + -out "${CERT_DIR}/valkey-server.crt" \ + -days 365 \ + -copy_extensions copyall \ + 2>/dev/null + +log "Creating ${TLS_SECRET} secret" +kctl delete secret "${TLS_SECRET}" --ignore-not-found +kctl create secret generic "${TLS_SECRET}" \ + --from-file="server.crt=${CERT_DIR}/valkey-server.crt" \ + --from-file="server.key=${CERT_DIR}/valkey-server.key" \ + --from-file="ca.crt=${CERT_DIR}/valkey-ca.crt" + +log "Launching ${TESTBENCH_POD}" +kctl delete pod "${TESTBENCH_POD}" --ignore-not-found --wait=true +kctl run "${TESTBENCH_POD}" \ + --image=valkey/valkey:9.0.1 \ + --labels='sidecar.istio.io/inject=false' \ + --restart=Never \ + --overrides='{ + "spec": { + "containers": [{ + "name": "'"${TESTBENCH_POD}"'", + "image": "valkey/valkey:9.0.1", + "command": ["sleep", "infinity"], + "volumeMounts": [{ + "name": "tls", + "mountPath": "/tls", + "readOnly": true + }] + }], + "volumes": [{ + "name": "tls", + "secret": {"secretName": "'"${TLS_SECRET}"'"} + }] + } + }' \ + --command -- sleep infinity +wait_for_testbench + +log "Setup complete" diff --git a/functional-tests/teardown.sh b/functional-tests/teardown.sh new file mode 100755 index 00000000..be8922bb --- /dev/null +++ b/functional-tests/teardown.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Remove the shared fixtures and (optionally) the kind cluster itself. +# +# Usage: +# ./teardown.sh # remove fixtures, keep cluster +# ./teardown.sh --cluster # also delete the kind cluster + +HERE=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=lib.sh +. "${HERE}/lib.sh" + +DELETE_CLUSTER=0 +for arg in "$@"; do + case "${arg}" in + --cluster) DELETE_CLUSTER=1 ;; + *) echo "unknown arg: ${arg}" >&2; exit 2 ;; + esac +done + +if kind get clusters | grep -Fxq "${CLUSTER_NAME}"; then + log "Removing fixtures from ${CLUSTER_NAME}" + # Best-effort: any lingering release + PVCs. + hctl uninstall "${RELEASE}" 2>/dev/null || true + kctl delete pvc --selector="app.kubernetes.io/instance=${RELEASE}" --ignore-not-found + kctl delete pod "${TESTBENCH_POD}" --ignore-not-found + kctl delete secret "${AUTH_SECRET}" "${TLS_SECRET}" --ignore-not-found +fi + +if (( DELETE_CLUSTER )); then + log "Deleting kind cluster ${CLUSTER_NAME}" + kind delete cluster --name "${CLUSTER_NAME}" +fi From e1c39561af82397d812155caac9ceec3614e3563 Mon Sep 17 00:00:00 2001 From: Ankit Pati Date: Fri, 1 May 2026 18:05:30 +0530 Subject: [PATCH 08/23] test: functional tests for Istio Signed-off-by: Ankit Pati --- Justfile | 8 +-- functional-tests/lib.sh | 19 ++++++- functional-tests/run-all.sh | 48 +++++++--------- functional-tests/run-scenario.sh | 68 +++++++++++++++++++---- functional-tests/setup.sh | 68 +++++++++++++++++------ functional-tests/teardown.sh | 4 +- valkey/templates/cluster-statefulset.yaml | 4 ++ 7 files changed, 154 insertions(+), 65 deletions(-) diff --git a/Justfile b/Justfile index ce4fd8bc..8ac91e93 100644 --- a/Justfile +++ b/Justfile @@ -37,11 +37,11 @@ functional-teardown *ARGS: ./functional-tests/teardown.sh {{ARGS}} # Run one scenario against the already-set-up kind cluster, e.g. -# just functional-scenario off off on on -functional-scenario tls auth shard rep: - ./functional-tests/run-scenario.sh {{tls}} {{auth}} {{shard}} {{rep}} +# just functional-scenario off off on on off +functional-scenario tls auth shard rep istio: + ./functional-tests/run-scenario.sh {{tls}} {{auth}} {{shard}} {{rep}} {{istio}} -# Run the full 16-scenario matrix (set FILTER='tls=on auth=on' to narrow) +# Run the full 32-scenario matrix (set FILTER='tls=on istio=on' to narrow) functional-run: ./functional-tests/run-all.sh diff --git a/functional-tests/lib.sh b/functional-tests/lib.sh index 9d6a3513..27140018 100755 --- a/functional-tests/lib.sh +++ b/functional-tests/lib.sh @@ -14,17 +14,30 @@ RELEASE=${VALKEY_RELEASE:-valkey} AUTH_SECRET=valkey-auth TLS_SECRET=valkey-tls +# Two testbenches: one never gets an Envoy sidecar (istio=off scenarios, or when +# Istio isn't installed at all), one does (istio=on scenarios). TESTBENCH_POD=valkey-testbench +TESTBENCH_POD_INJECTED=valkey-testbench-injected AUTH_PASSWORD=password +ISTIO_NAMESPACE=istio-system + log() { printf '=== %s ===\n' "$*"; } kctl() { kubectl --context="${KUBE_CONTEXT}" --namespace="${NAMESPACE}" "$@"; } hctl() { helm --kube-context="${KUBE_CONTEXT}" --namespace="${NAMESPACE}" "$@"; } -# kubectl exec into the testbench. Pipes stderr through so failures are legible. -testbench_exec() { kctl exec "${TESTBENCH_POD}" -- "$@"; } +# kubectl exec into a testbench. First arg is the pod name; rest is the command. +testbench_exec_in() { + local pod=$1; shift + kctl exec "${pod}" -c "${pod}" -- "$@" +} wait_for_testbench() { - kctl wait --for=condition=Ready "pod/${TESTBENCH_POD}" --timeout=120s + local pod=$1 + kctl wait --for=condition=Ready "pod/${pod}" --timeout=180s +} + +istio_installed() { + kubectl --context="${KUBE_CONTEXT}" get namespace "${ISTIO_NAMESPACE}" >/dev/null 2>&1 } diff --git a/functional-tests/run-all.sh b/functional-tests/run-all.sh index 2633c797..1862148a 100755 --- a/functional-tests/run-all.sh +++ b/functional-tests/run-all.sh @@ -6,30 +6,23 @@ HERE=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=lib.sh . "${HERE}/lib.sh" -SCENARIOS=( - # tls auth shard rep - "off off off off" - "off off off on" - "off off on off" - "off off on on" - "off on off off" - "off on off on" - "off on on off" - "off on on on" - "on off off off" - "on off off on" - "on off on off" - "on off on on" - "on on off off" - "on on off on" - "on on on off" - "on on on on" -) +# 32 scenarios: every combination of tls/auth/shard/rep/istio. +SCENARIOS=() +for istio in off on; do + for tls in off on; do + for auth in off on; do + for shard in off on; do + for rep in off on; do + SCENARIOS+=("${tls} ${auth} ${shard} ${rep} ${istio}") + done + done + done + done +done -# Optional filter: skip scenarios matching the first arg, e.g. `./run-all.sh tls=on`. -# Kept intentionally simple — pass one or more "key=on" / "key=off" selectors. +# Optional filter: `FILTER='tls=on istio=on'` runs only matching scenarios. matches() { - local spec=$1 tls=$2 auth=$3 shard=$4 rep=$5 + local tls=$1 auth=$2 shard=$3 rep=$4 istio=$5 for sel in ${FILTER:-}; do local k=${sel%=*} v=${sel#*=} local have @@ -38,6 +31,7 @@ matches() { auth) have=${auth} ;; shard) have=${shard} ;; rep) have=${rep} ;; + istio) have=${istio} ;; *) echo "bad filter key: ${k}" >&2; exit 2 ;; esac [[ ${have} == "${v}" ]] || return 1 @@ -51,17 +45,17 @@ failures=() for s in "${SCENARIOS[@]}"; do # shellcheck disable=SC2086 - read -r tls auth shard rep <<<"${s}" - if ! matches "${s}" "${tls}" "${auth}" "${shard}" "${rep}"; then + read -r tls auth shard rep istio <<<"${s}" + if ! matches "${tls}" "${auth}" "${shard}" "${rep}" "${istio}"; then continue fi - log "SCENARIO: tls=${tls} auth=${auth} shard=${shard} rep=${rep}" - if "${HERE}/run-scenario.sh" "${tls}" "${auth}" "${shard}" "${rep}"; then + log "SCENARIO: tls=${tls} auth=${auth} shard=${shard} rep=${rep} istio=${istio}" + if "${HERE}/run-scenario.sh" "${tls}" "${auth}" "${shard}" "${rep}" "${istio}"; then passed=$(( passed + 1 )) else failed=$(( failed + 1 )) - failures+=("tls=${tls} auth=${auth} shard=${shard} rep=${rep}") + failures+=("tls=${tls} auth=${auth} shard=${shard} rep=${rep} istio=${istio}") fi done diff --git a/functional-tests/run-scenario.sh b/functional-tests/run-scenario.sh index 8abb0209..e45707a5 100755 --- a/functional-tests/run-scenario.sh +++ b/functional-tests/run-scenario.sh @@ -3,17 +3,17 @@ # already-created kind cluster. # # Usage: -# ./run-scenario.sh +# ./run-scenario.sh # Each arg is "on" or "off". Example: -# ./run-scenario.sh off off on on -# drives the "TLS off, auth off, shard on, rep on" scenario. +# ./run-scenario.sh off off on on on +# drives the "TLS off, auth off, shard on, rep on, Istio on" scenario. HERE=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=lib.sh . "${HERE}/lib.sh" -if (( $# != 4 )); then - echo "usage: $0 (each on|off)" >&2 +if (( $# != 5 )); then + echo "usage: $0 (each on|off)" >&2 exit 2 fi @@ -25,18 +25,31 @@ on_or_off() { } for v in "$@"; do on_or_off "${v}"; done -TLS=$1; AUTH=$2; SHARD=$3; REP=$4 -SCENARIO="tls=${TLS} auth=${AUTH} shard=${SHARD} rep=${REP}" +TLS=$1; AUTH=$2; SHARD=$3; REP=$4; ISTIO=$5 +SCENARIO="tls=${TLS} auth=${AUTH} shard=${SHARD} rep=${REP} istio=${ISTIO}" -flag() { [[ $1 == on ]] && echo true || echo false; } is_on() { [[ $1 == on ]]; } +if is_on "${ISTIO}"; then + TESTBENCH=${TESTBENCH_POD_INJECTED} +else + TESTBENCH=${TESTBENCH_POD} +fi +testbench_exec() { testbench_exec_in "${TESTBENCH}" "$@"; } + # --------------------------------------------------------------------------- # Build helm flags for this scenario. # --------------------------------------------------------------------------- -helm_flags=( - --set-string='podLabels.sidecar\.istio\.io/inject=false' -) +helm_flags=() + +if is_on "${ISTIO}"; then + # Let Envoy get injected into every chart pod; turn on the chart's Istio templates. + helm_flags+=(--set=istio.enabled=true) +else + # Opt out of injection when Istio isn't the target — the sidecar would break + # the probe and the cluster-init Job would never finish. + helm_flags+=(--set-string='podLabels.sidecar\.istio\.io/inject=false') +fi if is_on "${AUTH}"; then helm_flags+=( @@ -129,6 +142,39 @@ assert_eq() { fi } +# Istio resources: PeerAuthentication + DestinationRule (headless DR only exists +# in replica / cluster mode) should be present iff istio=on. +if is_on "${ISTIO}"; then + log "Istio check: chart-owned resources must exist" + kctl get peerauthentication "${RELEASE}" >/dev/null \ + || fail "PeerAuthentication/${RELEASE} missing" + kctl get destinationrule "${RELEASE}" >/dev/null \ + || fail "DestinationRule/${RELEASE} missing" + if is_on "${SHARD}" || is_on "${REP}"; then + kctl get destinationrule "${RELEASE}-headless" >/dev/null \ + || fail "DestinationRule/${RELEASE}-headless missing" + fi + + # Chart pods must actually have the Envoy sidecar. Istio >=1.29 injects it + # as a native sidecar (initContainer with restartPolicy=Always), so check + # both containers and initContainers. + pod=$(kctl get pod -l "app.kubernetes.io/instance=${RELEASE}" \ + -o jsonpath='{.items[0].metadata.name}') + if ! kctl get pod "${pod}" \ + -o jsonpath='{.spec.containers[*].name} {.spec.initContainers[*].name}' \ + | tr ' ' '\n' | grep -Fxq istio-proxy; then + fail "pod ${pod} has no istio-proxy container" + fi +else + log "Istio check: chart-owned resources must be absent" + if kctl get peerauthentication "${RELEASE}" >/dev/null 2>&1; then + fail "PeerAuthentication/${RELEASE} should not exist when istio=off" + fi + if kctl get destinationrule "${RELEASE}" >/dev/null 2>&1; then + fail "DestinationRule/${RELEASE} should not exist when istio=off" + fi +fi + # Positive: the fully-correct invocation should succeed. log "Positive check" if is_on "${SHARD}"; then diff --git a/functional-tests/setup.sh b/functional-tests/setup.sh index 0678f2dd..f7ac759e 100755 --- a/functional-tests/setup.sh +++ b/functional-tests/setup.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash -# Bring up the kind cluster and create the shared fixtures (auth secret, -# TLS secret, testbench pod) used by every scenario. +# Bring up the kind cluster, install Istio (demo profile), and create the +# shared fixtures (auth secret, TLS secret, two testbench pods) used by +# every scenario. HERE=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=lib.sh @@ -13,6 +14,21 @@ else kind create cluster --config "${HERE}/kind-config.yaml" --wait 120s fi +log "Installing Istio (demo profile)" +if istio_installed; then + echo "istio-system namespace already exists; assuming Istio is installed" +else + # `demo` gives us istiod + an ingress/egress gateway. We only need istiod, + # but the profile is the simplest path and adds no meaningful overhead. + istioctl install --context="${KUBE_CONTEXT}" --set profile=demo --skip-confirmation +fi + +log "Enabling sidecar injection on namespace ${NAMESPACE}" +# Label idempotently — `kubectl label --overwrite` works whether or not the +# label exists. +kubectl --context="${KUBE_CONTEXT}" label namespace "${NAMESPACE}" \ + istio-injection=enabled --overwrite + log "Creating ${AUTH_SECRET} secret" kctl delete secret "${AUTH_SECRET}" --ignore-not-found kctl create secret generic "${AUTH_SECRET}" \ @@ -53,31 +69,47 @@ kctl create secret generic "${TLS_SECRET}" \ --from-file="server.key=${CERT_DIR}/valkey-server.key" \ --from-file="ca.crt=${CERT_DIR}/valkey-ca.crt" -log "Launching ${TESTBENCH_POD}" -kctl delete pod "${TESTBENCH_POD}" --ignore-not-found --wait=true -kctl run "${TESTBENCH_POD}" \ - --image=valkey/valkey:9.0.1 \ - --labels='sidecar.istio.io/inject=false' \ - --restart=Never \ - --overrides='{ +# --------------------------------------------------------------------------- +# Testbench pods. Two flavours: +# valkey-testbench — never injected (sidecar.istio.io/inject=false) +# valkey-testbench-injected — injected, used for istio=on scenarios +# --------------------------------------------------------------------------- +launch_testbench() { + local pod=$1 inject=$2 overrides + local labels + if [[ ${inject} == "false" ]]; then + labels='sidecar.istio.io/inject=false' + else + labels='sidecar.istio.io/inject=true' + fi + overrides='{ "spec": { "containers": [{ - "name": "'"${TESTBENCH_POD}"'", + "name": "'"${pod}"'", "image": "valkey/valkey:9.0.1", "command": ["sleep", "infinity"], - "volumeMounts": [{ - "name": "tls", - "mountPath": "/tls", - "readOnly": true - }] + "volumeMounts": [{"name": "tls", "mountPath": "/tls", "readOnly": true}] }], "volumes": [{ "name": "tls", "secret": {"secretName": "'"${TLS_SECRET}"'"} }] } - }' \ - --command -- sleep infinity -wait_for_testbench + }' + kctl delete pod "${pod}" --ignore-not-found --wait=true + kctl run "${pod}" \ + --image=valkey/valkey:9.0.1 \ + --labels="${labels}" \ + --restart=Never \ + --overrides="${overrides}" \ + --command -- sleep infinity + wait_for_testbench "${pod}" +} + +log "Launching ${TESTBENCH_POD} (no sidecar)" +launch_testbench "${TESTBENCH_POD}" false + +log "Launching ${TESTBENCH_POD_INJECTED} (with Envoy sidecar)" +launch_testbench "${TESTBENCH_POD_INJECTED}" true log "Setup complete" diff --git a/functional-tests/teardown.sh b/functional-tests/teardown.sh index be8922bb..f48b8e22 100755 --- a/functional-tests/teardown.sh +++ b/functional-tests/teardown.sh @@ -22,8 +22,8 @@ if kind get clusters | grep -Fxq "${CLUSTER_NAME}"; then # Best-effort: any lingering release + PVCs. hctl uninstall "${RELEASE}" 2>/dev/null || true kctl delete pvc --selector="app.kubernetes.io/instance=${RELEASE}" --ignore-not-found - kctl delete pod "${TESTBENCH_POD}" --ignore-not-found - kctl delete secret "${AUTH_SECRET}" "${TLS_SECRET}" --ignore-not-found + kctl delete pod "${TESTBENCH_POD}" "${TESTBENCH_POD_INJECTED}" --ignore-not-found + kctl delete secret "${AUTH_SECRET}" "${TLS_SECRET}" --ignore-not-found fi if (( DELETE_CLUSTER )); then diff --git a/valkey/templates/cluster-statefulset.yaml b/valkey/templates/cluster-statefulset.yaml index d00f0ea6..013c6a1d 100644 --- a/valkey/templates/cluster-statefulset.yaml +++ b/valkey/templates/cluster-statefulset.yaml @@ -44,6 +44,10 @@ spec: {{- if .Values.valkeyConfig }} checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum | trunc 32 | quote }} {{- end }} + {{- if .Values.istio.enabled }} + traffic.sidecar.istio.io/excludeInboundPorts: {{ .Values.cluster.busPort | quote }} + traffic.sidecar.istio.io/excludeOutboundPorts: {{ .Values.cluster.busPort | quote }} + {{- end }} spec: {{- (include "valkey.imagePullSecrets" .) | nindent 6 }} automountServiceAccountToken: {{ .Values.serviceAccount.automount }} From 399134cc60d31071a00b004170f7f0e3d660e770 Mon Sep 17 00:00:00 2001 From: Ankit Pati Date: Sat, 2 May 2026 00:42:02 +0530 Subject: [PATCH 09/23] fix: multiple issues identified in testing Signed-off-by: Ankit Pati --- Justfile | 5 +- functional-tests/lib.sh | 11 +- functional-tests/run-all.sh | 9 +- functional-tests/run-extra-scenarios.sh | 239 +++++++++++++++++++++ valkey/scripts/cluster-init-script.sh | 73 ++++--- valkey/templates/NOTES.txt | 5 +- valkey/templates/_helpers.tpl | 39 +--- valkey/templates/cluster-init-job.yaml | 1 + valkey/templates/cluster-statefulset.yaml | 65 ++++-- valkey/templates/deploy_valkey.yaml | 49 +++-- valkey/templates/init_config.yaml | 27 ++- valkey/templates/netpolicy.yaml | 26 ++- valkey/templates/poddisruptionbudget.yaml | 2 +- valkey/templates/service-read.yaml | 5 +- valkey/templates/service.yaml | 11 +- valkey/templates/statefulset.yaml | 53 ++--- valkey/templates/tests/auth.yaml | 8 + valkey/tests/cluster_test.yaml | 88 +++++++- valkey/tests/deployment_test.yaml | 47 ++++ valkey/tests/init_config_test.yaml | 41 ++++ valkey/tests/netpolicy_test.yaml | 49 +++++ valkey/tests/poddisruptionbudget_test.yaml | 19 +- valkey/tests/service_test.yaml | 35 +-- valkey/tests/statefulset_test.yaml | 31 +++ valkey/values.schema.json | 6 +- valkey/values.yaml | 16 +- 26 files changed, 785 insertions(+), 175 deletions(-) create mode 100755 functional-tests/run-extra-scenarios.sh create mode 100644 valkey/tests/netpolicy_test.yaml diff --git a/Justfile b/Justfile index 8ac91e93..d3a4e9bb 100644 --- a/Justfile +++ b/Justfile @@ -45,9 +45,12 @@ functional-scenario tls auth shard rep istio: functional-run: ./functional-tests/run-all.sh +# Run the extra (non-matrix) regression scenarios on their own +functional-extras: + ./functional-tests/run-extra-scenarios.sh + # Full functional suite: setup + matrix + teardown including cluster functional-test: ./functional-tests/setup.sh ./functional-tests/run-all.sh ./functional-tests/teardown.sh --cluster - diff --git a/functional-tests/lib.sh b/functional-tests/lib.sh index 27140018..0ccf9fb5 100755 --- a/functional-tests/lib.sh +++ b/functional-tests/lib.sh @@ -18,7 +18,16 @@ TLS_SECRET=valkey-tls # Istio isn't installed at all), one does (istio=on scenarios). TESTBENCH_POD=valkey-testbench TESTBENCH_POD_INJECTED=valkey-testbench-injected -AUTH_PASSWORD=password +# Deliberately hostile: spaces, shell metacharacters ($, `, &, !), a backslash, +# and a double-quote. Every auth=on scenario then exercises both layers of +# quoting on the chart side: +# - the init container's ACL hash pipe (printf %s | sha256sum) +# - the masterauth line in valkey.conf (must be quoted+escaped) +# - the cluster-init Job's REDISCLI_AUTH path +# - the helm-test pod's `cat /valkey-auth/...-password | xargs valkey-cli -a` +# Keeping these in one place means every future auth=on scenario inherits the +# coverage for free. +AUTH_PASSWORD='p@ss w/ spaces & $chars `backticks` "quoted" \backslash' ISTIO_NAMESPACE=istio-system diff --git a/functional-tests/run-all.sh b/functional-tests/run-all.sh index 1862148a..56c7acbf 100755 --- a/functional-tests/run-all.sh +++ b/functional-tests/run-all.sh @@ -60,8 +60,15 @@ for s in "${SCENARIOS[@]}"; do done echo -log "Summary: ${passed} passed, ${failed} failed" +log "Matrix summary: ${passed} passed, ${failed} failed" if (( failed > 0 )); then printf ' failed: %s\n' "${failures[@]}" exit 1 fi + +# Extra, non-matrix regressions (aclConfig+metrics, default-deny netpol, etc). +# Skipped when FILTER is set — filters are matrix-scoped, so the extras +# wouldn't match anyway and running them would be surprising. +if [[ -z ${FILTER:-} ]]; then + "${HERE}/run-extra-scenarios.sh" +fi diff --git a/functional-tests/run-extra-scenarios.sh b/functional-tests/run-extra-scenarios.sh new file mode 100755 index 00000000..c77b11ba --- /dev/null +++ b/functional-tests/run-extra-scenarios.sh @@ -0,0 +1,239 @@ +#!/usr/bin/env bash +# Targeted regressions that don't fit the tls/auth/shard/rep/istio matrix. +# Each scenario is self-contained: install, assert, uninstall. + +HERE=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=lib.sh +. "${HERE}/lib.sh" + +RESULTS=() +pass() { RESULTS+=("PASS: $1"); } +fail() { RESULTS+=("FAIL: $1: $2"); return 1; } + +cleanup_release() { + hctl uninstall "${RELEASE}" 2>/dev/null || true + kctl delete pvc --selector="app.kubernetes.io/instance=${RELEASE}" --ignore-not-found >/dev/null +} + +# --------------------------------------------------------------------------- +# Scenario: auth.enabled=true with aclConfig only (no aclUsers) and metrics +# enabled. This used to CrashLoop the exporter with CreateContainerConfigError +# because the chart pointed REDIS_PASSWORD at a key `default-password` that +# only exists when there's an inline aclUsers.default.password. The fix is to +# only wire REDIS_PASSWORD when a real key exists. +# --------------------------------------------------------------------------- +scenario_aclconfig_metrics() { + local name="aclConfig-only + metrics exporter must not crash" + log "SCENARIO: ${name}" + cleanup_release + + # Use an alternate release name to avoid colliding with the shared + # fixture secret `valkey-auth` (managed by setup.sh, not Helm). The chart + # generates `${release}-auth`, so a different release ⇒ a different secret. + local release="${RELEASE}-aclcfg" + hctl uninstall "${release}" 2>/dev/null || true + kctl delete pvc --selector="app.kubernetes.io/instance=${release}" --ignore-not-found >/dev/null + + if ! hctl install "${release}" "${CHART_DIR}" \ + --set=metrics.enabled=true \ + --set=auth.enabled=true \ + --set-string="auth.aclConfig=user default on >simplepass ~* &* +@all" \ + --set-string='podLabels.sidecar\.istio\.io/inject=false' \ + --wait --timeout=180s >/dev/null; then + fail "${name}" "helm install failed" + hctl uninstall "${release}" 2>/dev/null || true + return + fi + + # Main container must be Running, metrics sidecar must be Ready. The bug + # made the metrics container stick in CreateContainerConfigError forever — + # no amount of probe-waiting would ever flip it to Ready. + local pod + pod=$(kctl get pod -l "app.kubernetes.io/instance=${release}" \ + -o jsonpath='{.items[0].metadata.name}') + if ! kctl wait "pod/${pod}" \ + --for=condition=Ready --timeout=120s >/dev/null; then + local status + status=$(kctl get "pod/${pod}" -o jsonpath='{.status.containerStatuses[*].state}') + fail "${name}" "pod never became Ready (state=${status})" + hctl uninstall "${release}" 2>/dev/null || true + return + fi + + # Metrics endpoint actually responds. Use `kubectl port-forward` into a + # local port — lets us hit the exporter from the host with curl, without + # relying on either container having an HTTP client. + local pf_port=19121 pf_pid + kctl port-forward "pod/${pod}" "${pf_port}:9121" >/dev/null 2>&1 & + pf_pid=$! + # Give port-forward a moment to establish. + for _ in $(seq 1 20); do + if curl -sf --max-time 1 "http://127.0.0.1:${pf_port}/metrics" \ + >/dev/null 2>&1; then + break + fi + sleep 0.5 + done + + local metrics_out + metrics_out=$(curl -sf --max-time 5 "http://127.0.0.1:${pf_port}/metrics" \ + 2>/dev/null || true) + kill "${pf_pid}" 2>/dev/null || true + wait "${pf_pid}" 2>/dev/null || true + + if ! grep -q 'redis_exporter_' <<<"${metrics_out}"; then + fail "${name}" "/metrics did not serve redis_exporter_* counters" + hctl uninstall "${release}" 2>/dev/null || true + return + fi + + hctl uninstall "${release}" 2>/dev/null || true + kctl delete pvc --selector="app.kubernetes.io/instance=${release}" --ignore-not-found >/dev/null + pass "${name}" +} + +# --------------------------------------------------------------------------- +# Scenario: default-deny NetworkPolicy. Previously `networkPolicy.ingress: []` +# rendered an invalid policy (policyTypes: []), which the API accepts but is a +# no-op. The fix gates on hasKey, so an empty list still opts in. +# --------------------------------------------------------------------------- +scenario_default_deny_netpol() { + local name="networkPolicy.ingress=[] produces a real default-deny policy" + log "SCENARIO: ${name}" + cleanup_release + + if ! hctl install "${RELEASE}" "${CHART_DIR}" \ + --set-string='podLabels.sidecar\.istio\.io/inject=false' \ + --set-json='networkPolicy={"ingress":[]}' \ + --wait --timeout=120s >/dev/null; then + fail "${name}" "helm install failed" + return + fi + + # The original bug: `networkPolicy.ingress: []` rendered `policyTypes: []`, + # which Kubernetes treats as "no policy in either direction" — silently + # allowing all traffic despite the user clearly opting into default-deny. + # The fix is to gate on hasKey, not truthiness. + # + # Checking via the API alone is fragile (kube-apiserver drops empty lists + # on serialization), so: + # 1) Assert policyTypes contains Ingress. + # 2) Actually attempt a TCP connection from the testbench — a real + # default-deny policy blocks it; a no-op policy lets it through. + local types + types=$(kctl get networkpolicy "${RELEASE}" \ + -o jsonpath='{.spec.policyTypes[*]}') + if [[ ${types} != *Ingress* ]]; then + fail "${name}" "policyTypes=${types} (want to include Ingress)" + return + fi + + # Live traffic check. Use a short timeout — a default-deny policy drops + # SYN packets, so the testbench will sit in CONNECT until the timeout. + set +e + testbench_exec_in "${TESTBENCH_POD}" sh -c \ + "timeout 5 valkey-cli -h valkey.${NAMESPACE}.svc.cluster.local ping" \ + >/dev/null 2>&1 + local rc=$? + set -e + if (( rc == 0 )); then + fail "${name}" "ping succeeded — default-deny ingress policy is a no-op" + return + fi + + cleanup_release + pass "${name}" +} + +# --------------------------------------------------------------------------- +# Scenario: frontend Service must never expose the cluster bus port. The bus +# port is pod-to-pod gossip; routing it through a round-robin ClusterIP +# misdirects clients to arbitrary nodes. +# --------------------------------------------------------------------------- +scenario_bus_port_hidden() { + local name="frontend service does not expose the cluster bus port" + log "SCENARIO: ${name}" + cleanup_release + + if ! hctl install "${RELEASE}" "${CHART_DIR}" \ + --set=cluster.enabled=true \ + --set=cluster.persistence.size=100Mi \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=0 \ + --set=cluster.busPort=16379 \ + --set-string='podLabels.sidecar\.istio\.io/inject=false' \ + --wait --timeout=300s >/dev/null; then + fail "${name}" "helm install failed" + return + fi + + local frontend_ports headless_ports + frontend_ports=$(kctl get service "${RELEASE}" \ + -o jsonpath='{.spec.ports[*].name}') + headless_ports=$(kctl get service "${RELEASE}-headless" \ + -o jsonpath='{.spec.ports[*].name}') + + if grep -qw tcp-bus <<<"${frontend_ports}"; then + fail "${name}" "frontend exposes tcp-bus (ports=${frontend_ports})" + return + fi + if ! grep -qw tcp-bus <<<"${headless_ports}"; then + fail "${name}" "headless missing tcp-bus (ports=${headless_ports})" + return + fi + + cleanup_release + pass "${name}" +} + +# --------------------------------------------------------------------------- +# Scenario: readiness probe must exist on the valkey container. Previously +# only startup+liveness were defined, so a pod that lost server health but +# kept the TCP socket would keep receiving traffic. +# --------------------------------------------------------------------------- +scenario_readiness_probe_exists() { + local name="valkey container declares a readiness probe" + log "SCENARIO: ${name}" + cleanup_release + + if ! hctl install "${RELEASE}" "${CHART_DIR}" \ + --set-string='podLabels.sidecar\.istio\.io/inject=false' \ + --wait --timeout=120s >/dev/null; then + fail "${name}" "helm install failed" + return + fi + + local probe + probe=$(kctl get deployment "${RELEASE}" \ + -o jsonpath='{.spec.template.spec.containers[0].readinessProbe.exec.command}') + if [[ -z ${probe} ]]; then + fail "${name}" "readinessProbe is missing" + return + fi + # And it must be the NOAUTH-tolerant flavour. + if ! grep -q 'NOAUTH' <<<"${probe}"; then + fail "${name}" "readinessProbe does not tolerate NOAUTH (${probe})" + return + fi + + cleanup_release + pass "${name}" +} + +trap 'cleanup_release' EXIT + +scenario_aclconfig_metrics || true +scenario_default_deny_netpol || true +scenario_bus_port_hidden || true +scenario_readiness_probe_exists || true + +echo +log "Extra scenario summary" +passed=0; failed=0 +for r in "${RESULTS[@]}"; do + printf ' %s\n' "${r}" + [[ ${r} == PASS:* ]] && passed=$(( passed + 1 )) || failed=$(( failed + 1 )) +done +echo +log "Extras: ${passed} passed, ${failed} failed" +(( failed == 0 )) diff --git a/valkey/scripts/cluster-init-script.sh b/valkey/scripts/cluster-init-script.sh index 18f29e2d..ff9a96f8 100644 --- a/valkey/scripts/cluster-init-script.sh +++ b/valkey/scripts/cluster-init-script.sh @@ -1,5 +1,5 @@ #!/bin/sh -set -e +set -eu # --- Configuration & Initial Checks --- if [ "${CLUSTER_NODE_COUNT}" -eq "1" ]; then @@ -11,48 +11,56 @@ REPLICAS_PER_SHARD=${CLUSTER_REPLICAS_PER_SHARD:-1} PRIMARIES=$(( CLUSTER_NODE_COUNT / (1 + REPLICAS_PER_SHARD) )) {{- if and .Values.auth.enabled .Values.auth.aclUsers }} -# Get password for cluster replication user from mounted secret {{- $replUsername := .Values.cluster.replicationUser }} {{- $replUser := index .Values.auth.aclUsers $replUsername }} {{- $replPasswordKey := $replUser.passwordKey | default $replUsername }} {{- if .Values.auth.usersExistingSecret }} if [ -f "/valkey-users-secret/{{ $replPasswordKey }}" ]; then - AUTH_PASSWORD=$(cat "/valkey-users-secret/{{ $replPasswordKey }}") + REDISCLI_AUTH=$(cat "/valkey-users-secret/{{ $replPasswordKey }}") elif [ -f "/valkey-auth-secret/{{ $replUsername }}-password" ]; then - AUTH_PASSWORD=$(cat "/valkey-auth-secret/{{ $replUsername }}-password") + REDISCLI_AUTH=$(cat "/valkey-auth-secret/{{ $replUsername }}-password") else - echo "ERROR: No password found for cluster replication user {{ $replUsername }}" + echo "ERROR: No password found for cluster replication user {{ $replUsername }}" >&2 exit 1 fi {{- else }} if [ -f "/valkey-auth-secret/{{ $replUsername }}-password" ]; then - AUTH_PASSWORD=$(cat "/valkey-auth-secret/{{ $replUsername }}-password") + REDISCLI_AUTH=$(cat "/valkey-auth-secret/{{ $replUsername }}-password") else - echo "ERROR: No password found for cluster replication user {{ $replUsername }}" + echo "ERROR: No password found for cluster replication user {{ $replUsername }}" >&2 exit 1 fi {{- end }} -AUTH_OPTION="-a ${AUTH_PASSWORD}" -{{- else }} -AUTH_OPTION="" +# Valkey/Redis clients honour REDISCLI_AUTH, which avoids passing the password +# on the command line (where it would leak via `ps` and trip over shell +# metacharacters). +export REDISCLI_AUTH {{- end }} +# vcli: thin wrapper that inherits REDISCLI_AUTH and always adds TLS args when +# configured. Callers pass only host/port/subcommand. +vcli() { {{- if .Values.tls.enabled }} -TLS_OPTION="--tls --cacert /tls/{{ .Values.tls.caPublicKey }}" + valkey-cli --no-auth-warning --tls --cacert "/tls/{{ .Values.tls.caPublicKey }}" "$@" {{- else }} -TLS_OPTION="" + valkey-cli --no-auth-warning "$@" {{- end }} +} echo "Cluster init job starting. Total nodes: ${CLUSTER_NODE_COUNT}, Primaries: ${PRIMARIES}, Replicas per shard: ${REPLICAS_PER_SHARD}" HEADLESS_SVC="{{ include "valkey.headlessServiceName" . }}" NAMESPACE="{{ .Release.Namespace }}" CLUSTER_DOMAIN="{{ .Values.clusterDomain }}" +PORT="{{ .Values.service.port }}" +FULLNAME="{{ include "valkey.fullname" . }}" + +node_host() { echo "${FULLNAME}-$1.${HEADLESS_SVC}.${NAMESPACE}.svc.${CLUSTER_DOMAIN}"; } # --- Wait for all Valkey nodes to be ready --- for i in $(seq 0 $((CLUSTER_NODE_COUNT - 1))); do - NODE_HOST="{{ include "valkey.fullname" . }}-${i}.${HEADLESS_SVC}.${NAMESPACE}.svc.${CLUSTER_DOMAIN}" - until valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${NODE_HOST}" -p {{ .Values.service.port }} ping 2>/dev/null | grep -q "PONG"; do + NODE_HOST=$(node_host "${i}") + until vcli -h "${NODE_HOST}" -p "${PORT}" ping 2>/dev/null | grep -q "PONG"; do echo "Waiting for ${NODE_HOST} to be ready..." sleep 2 done @@ -64,8 +72,8 @@ echo "All ${CLUSTER_NODE_COUNT} nodes are ready." # --- Discover Existing Cluster --- HEALTHY_NODE="" for i in $(seq 0 $((CLUSTER_NODE_COUNT - 1))); do - NODE_HOST="{{ include "valkey.fullname" . }}-${i}.${HEADLESS_SVC}.${NAMESPACE}.svc.${CLUSTER_DOMAIN}" - if valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${NODE_HOST}" -p {{ .Values.service.port }} cluster info 2>/dev/null | grep -q "cluster_state:ok"; then + NODE_HOST=$(node_host "${i}") + if vcli -h "${NODE_HOST}" -p "${PORT}" cluster info 2>/dev/null | grep -q "cluster_state:ok"; then HEALTHY_NODE="${NODE_HOST}" echo "Found healthy cluster node: ${HEALTHY_NODE}" break @@ -76,14 +84,14 @@ done if [ -n "${HEALTHY_NODE}" ]; then echo "Existing cluster found. Checking for new nodes to add..." - KNOWN_NODES=$(valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${HEALTHY_NODE}" -p {{ .Values.service.port }} cluster nodes 2>/dev/null) + KNOWN_NODES=$(vcli -h "${HEALTHY_NODE}" -p "${PORT}" cluster nodes 2>/dev/null) NEW_NODE_COUNT=0 for i in $(seq 0 $((CLUSTER_NODE_COUNT - 1))); do - NODE_HOST="{{ include "valkey.fullname" . }}-${i}.${HEADLESS_SVC}.${NAMESPACE}.svc.${CLUSTER_DOMAIN}" + NODE_HOST=$(node_host "${i}") NODE_IP=$(getent hosts "${NODE_HOST}" | awk '{print $1}') - if echo "${KNOWN_NODES}" | grep -v "fail" | grep -q "${NODE_IP}:{{ .Values.service.port }}"; then + if echo "${KNOWN_NODES}" | grep -v "fail" | grep -q "${NODE_IP}:${PORT}"; then echo "Node ${NODE_HOST} (${NODE_IP}) already in cluster." continue fi @@ -92,17 +100,17 @@ if [ -n "${HEALTHY_NODE}" ]; then NEW_NODE_COUNT=$((NEW_NODE_COUNT + 1)) # Forget any old, failed instance of this node - FAILED_NODE_ID=$(echo "${KNOWN_NODES}" | grep "${NODE_IP}:{{ .Values.service.port }}" | grep "fail" | awk '{print $1}' || echo "") + FAILED_NODE_ID=$(echo "${KNOWN_NODES}" | grep "${NODE_IP}:${PORT}" | grep "fail" | awk '{print $1}' || true) if [ -n "${FAILED_NODE_ID}" ]; then echo "Found node IP (${NODE_IP}) marked as failed with ID ${FAILED_NODE_ID}. Forgetting it..." - valkey-cli ${AUTH_OPTION} ${TLS_OPTION} --cluster call "${HEALTHY_NODE}:{{ .Values.service.port }}" cluster forget "${FAILED_NODE_ID}" > /dev/null 2>&1 || true + vcli --cluster call "${HEALTHY_NODE}:${PORT}" cluster forget "${FAILED_NODE_ID}" > /dev/null 2>&1 || true sleep 3 fi # Meet the cluster via the new node HEALTHY_NODE_IP=$(getent hosts "${HEALTHY_NODE}" | awk '{print $1}') echo "Sending CLUSTER MEET from ${NODE_HOST} to ${HEALTHY_NODE} (${HEALTHY_NODE_IP})" - valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${NODE_HOST}" -p {{ .Values.service.port }} cluster meet "${HEALTHY_NODE_IP}" {{ .Values.service.port }} + vcli -h "${NODE_HOST}" -p "${PORT}" cluster meet "${HEALTHY_NODE_IP}" "${PORT}" done if [ "${NEW_NODE_COUNT}" -eq 0 ]; then @@ -114,11 +122,11 @@ if [ -n "${HEALTHY_NODE}" ]; then # Assign roles to new nodes: find masters needing replicas for i in $(seq 0 $((CLUSTER_NODE_COUNT - 1))); do - NODE_HOST="{{ include "valkey.fullname" . }}-${i}.${HEADLESS_SVC}.${NAMESPACE}.svc.${CLUSTER_DOMAIN}" - NODE_ID=$(valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${NODE_HOST}" -p {{ .Values.service.port }} cluster myid) + NODE_HOST=$(node_host "${i}") + NODE_ID=$(vcli -h "${NODE_HOST}" -p "${PORT}" cluster myid) # Re-fetch cluster state from healthy node for current view - CURRENT_NODES=$(valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${HEALTHY_NODE}" -p {{ .Values.service.port }} cluster nodes) + CURRENT_NODES=$(vcli -h "${HEALTHY_NODE}" -p "${PORT}" cluster nodes) # Check if this node is a master with no slots (new node) NODE_INFO=$(echo "${CURRENT_NODES}" | grep "${NODE_ID}") @@ -143,7 +151,7 @@ if [ -n "${HEALTHY_NODE}" ]; then if [ -n "${TARGET_MASTER_ID}" ]; then echo "Found target master ${TARGET_MASTER_ID} that needs a replica." - if valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${NODE_HOST}" -p {{ .Values.service.port }} cluster replicate "${TARGET_MASTER_ID}"; then + if vcli -h "${NODE_HOST}" -p "${PORT}" cluster replicate "${TARGET_MASTER_ID}"; then echo "Successfully configured ${NODE_HOST} as a replica for ${TARGET_MASTER_ID}." else echo "WARNING: Failed to replicate master ${TARGET_MASTER_ID} from ${NODE_HOST}." @@ -158,7 +166,7 @@ if [ -n "${HEALTHY_NODE}" ]; then PROPAGATION_ATTEMPTS=0 MAX_PROPAGATION_ATTEMPTS=60 while [ ${PROPAGATION_ATTEMPTS} -lt ${MAX_PROPAGATION_ATTEMPTS} ]; do - CLUSTER_STATE=$(valkey-cli ${AUTH_OPTION} ${TLS_OPTION} -h "${HEALTHY_NODE}" -p {{ .Values.service.port }} cluster info 2>/dev/null | grep "cluster_state:" | cut -d: -f2 | tr -d '\r\n') + CLUSTER_STATE=$(vcli -h "${HEALTHY_NODE}" -p "${PORT}" cluster info 2>/dev/null | grep "cluster_state:" | cut -d: -f2 | tr -d '\r\n') if [ "${CLUSTER_STATE}" = "ok" ]; then echo "Cluster state is OK. Proceeding with rebalance." break @@ -168,7 +176,7 @@ if [ -n "${HEALTHY_NODE}" ]; then sleep 5 done - valkey-cli ${AUTH_OPTION} ${TLS_OPTION} --cluster rebalance "${HEALTHY_NODE}:{{ .Values.service.port }}" --cluster-use-empty-masters --cluster-yes || true + vcli --cluster rebalance "${HEALTHY_NODE}:${PORT}" --cluster-use-empty-masters --cluster-yes || true echo "Cluster update completed." exit 0 @@ -178,15 +186,16 @@ fi echo "No existing cluster found. Creating new cluster..." NODES="" for i in $(seq 0 $((CLUSTER_NODE_COUNT - 1))); do - NODE_HOST="{{ include "valkey.fullname" . }}-${i}.${HEADLESS_SVC}.${NAMESPACE}.svc.${CLUSTER_DOMAIN}" - NODES="${NODES} ${NODE_HOST}:{{ .Values.service.port }}" + NODE_HOST=$(node_host "${i}") + NODES="${NODES} ${NODE_HOST}:${PORT}" done # Allow time for cluster-enabled nodes to fully initialize sleep 10 -echo "Creating cluster with nodes: ${NODES}" -echo "yes" | valkey-cli ${AUTH_OPTION} ${TLS_OPTION} --cluster create ${NODES} --cluster-replicas "${REPLICAS_PER_SHARD}" +echo "Creating cluster with nodes:${NODES}" +# shellcheck disable=SC2086 +echo "yes" | vcli --cluster create ${NODES} --cluster-replicas "${REPLICAS_PER_SHARD}" echo "Cluster created successfully." exit 0 diff --git a/valkey/templates/NOTES.txt b/valkey/templates/NOTES.txt index e59b325b..c5ff2bba 100644 --- a/valkey/templates/NOTES.txt +++ b/valkey/templates/NOTES.txt @@ -25,7 +25,10 @@ Hash slots (16384 total) are distributed across the {{ .Values.cluster.shards }} Service: {{ include "valkey.fullname" . }} Type: {{ .Values.service.type }} Port: {{ .Values.service.port }} -Bus Port: {{ .Values.cluster.busPort }} (for inter-node communication) + +Bus port {{ .Values.cluster.busPort }} is reachable only through the headless +service — it carries cluster gossip + failover traffic between nodes, so it +must bypass the round-robin frontend service. 1) In-cluster access From another Pod: diff --git a/valkey/templates/_helpers.tpl b/valkey/templates/_helpers.tpl index abbfd4d8..df778284 100644 --- a/valkey/templates/_helpers.tpl +++ b/valkey/templates/_helpers.tpl @@ -82,19 +82,17 @@ Returns the Valkey exporter container image The common image function that renders the container image */}} {{- define "common.image" -}} -{{- $registryName := .image.registry }} -{{- $repositoryName := .image.repository }} -{{- $tag := .image.tag }} -{{- if .global }} - {{- if .global.imageRegistry }} - {{- $registryName = .global.imageRegistry }} - {{- end }} -{{- end }} -{{- if $registryName }} -{{- printf "%s/%s:%s" $registryName $repositoryName $tag }} -{{- else }} -{{- printf "%s:%s" $repositoryName $tag }} -{{ end }} +{{- $registryName := .image.registry -}} +{{- $repositoryName := .image.repository -}} +{{- $tag := .image.tag -}} +{{- if and .global .global.imageRegistry -}} +{{- $registryName = .global.imageRegistry -}} +{{- end -}} +{{- if $registryName -}} +{{- printf "%s/%s:%s" $registryName $repositoryName $tag -}} +{{- else -}} +{{- printf "%s:%s" $repositoryName $tag -}} +{{- end -}} {{- end -}} {{/* @@ -225,19 +223,4 @@ Calculate total number of nodes in the cluster {{- mul $shards (add 1 $replicasPerShard) -}} {{- end -}} -{{/* -Generate list of cluster nodes for VALKEY_NODES environment variable -*/}} -{{- define "valkey.clusterNodes" -}} -{{- $fullname := include "valkey.fullname" . -}} -{{- $headlessSvc := include "valkey.headlessServiceName" . -}} -{{- $namespace := .Release.Namespace -}} -{{- $clusterDomain := .Values.clusterDomain -}} -{{- $nodeCount := include "valkey.clusterNodeCount" . | int -}} -{{- $nodes := list -}} -{{- range $i := until $nodeCount -}} -{{- $nodes = append $nodes (printf "%s-%d.%s.%s.svc.%s" $fullname $i $headlessSvc $namespace $clusterDomain) -}} -{{- end -}} -{{- join " " $nodes -}} -{{- end -}} diff --git a/valkey/templates/cluster-init-job.yaml b/valkey/templates/cluster-init-job.yaml index 40ddea57..a4e8ab87 100644 --- a/valkey/templates/cluster-init-job.yaml +++ b/valkey/templates/cluster-init-job.yaml @@ -1,4 +1,5 @@ {{- if .Values.cluster.enabled }} +{{- include "valkey.validateAuthConfig" . }} {{- include "valkey.validateClusterConfig" . }} {{- include "valkey.validateClusterAuth" . }} apiVersion: batch/v1 diff --git a/valkey/templates/cluster-statefulset.yaml b/valkey/templates/cluster-statefulset.yaml index 013c6a1d..6ae63e57 100644 --- a/valkey/templates/cluster-statefulset.yaml +++ b/valkey/templates/cluster-statefulset.yaml @@ -8,10 +8,18 @@ metadata: name: {{ include "valkey.fullname" . }} labels: {{- include "valkey.labels" . | nindent 4 }} + {{- with .Values.workloadAnnotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} spec: serviceName: {{ include "valkey.fullname" . }}-headless replicas: {{ include "valkey.clusterNodeCount" . }} podManagementPolicy: Parallel + {{- if .Values.cluster.persistentVolumeClaimRetentionPolicy }} + persistentVolumeClaimRetentionPolicy: + {{- toYaml .Values.cluster.persistentVolumeClaimRetentionPolicy | nindent 4 }} + {{- end }} selector: matchLabels: {{- include "valkey.selectorLabels" . | nindent 6 }} @@ -85,10 +93,6 @@ spec: mountPath: /usr/local/etc/valkey/valkey.conf subPath: valkey.conf {{- end }} - {{- if .Values.extraSecretValkeyConfigs }} - - name: extravalkeyconfigs-volume - mountPath: /extravalkeyconfigs - {{- end }} {{- if .Values.auth.enabled }} - name: valkey-acl mountPath: /etc/valkey @@ -132,24 +136,31 @@ spec: - name: tcp-bus containerPort: {{ .Values.cluster.busPort }} protocol: TCP + {{- $pingCmd := "valkey-cli ping" }} + {{- if .Values.tls.enabled }} + {{- $pingCmd = printf "valkey-cli --tls --cacert /tls/%s ping" .Values.tls.caPublicKey }} + {{- end }} + {{- /* When auth is enforced the server returns 'NOAUTH Authentication required.' — accept it as proof of liveness. */}} + {{- $probeCmd := printf "%s 2>&1 | grep -qE 'PONG|NOAUTH'" $pingCmd }} startupProbe: exec: - {{- if .Values.tls.enabled }} - command: [ "sh", "-c", "valkey-cli --cacert /tls/{{ .Values.tls.caPublicKey }} --tls ping" ] - {{- else }} - command: [ "sh", "-c", "valkey-cli ping" ] - {{- end }} + command: [ "sh", "-c", {{ $probeCmd | quote }} ] initialDelaySeconds: 5 periodSeconds: 5 timeoutSeconds: 5 failureThreshold: 30 livenessProbe: exec: - {{- if .Values.tls.enabled }} - command: [ "sh", "-c", "valkey-cli --cacert /tls/{{ .Values.tls.caPublicKey }} --tls ping" ] - {{- else }} - command: [ "sh", "-c", "valkey-cli ping" ] - {{- end }} + command: [ "sh", "-c", {{ $probeCmd | quote }} ] + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 + readinessProbe: + exec: + command: [ "sh", "-c", {{ $probeCmd | quote }} ] + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 resources: {{- toYaml .Values.resources | nindent 12 }} volumeMounts: @@ -181,6 +192,9 @@ spec: - name: {{ $config.name }}-valkey mountPath: {{ $config.mountPath }} {{- end }} + {{- with .Values.extraVolumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} {{- if .Values.metrics.enabled }} - name: metrics image: {{ include "valkey.metrics.exporter.image" . }} @@ -221,11 +235,31 @@ spec: env: - name: REDIS_ALIAS value: {{ include "valkey.fullname" . }} + {{- if .Values.auth.enabled }} + {{- $defaultUser := get (.Values.auth.aclUsers | default dict) "default" | default dict }} + {{- $hasInlineDefaultPassword := hasKey $defaultUser "password" }} + {{- if .Values.auth.usersExistingSecret }} + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Values.auth.usersExistingSecret }} + key: {{ $defaultUser.passwordKey | default "default" }} + {{- else if $hasInlineDefaultPassword }} + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: {{ include "valkey.fullname" . }}-auth + key: default-password + {{- end }} + {{- end }} {{- range $key, $val := .Values.metrics.exporter.extraEnvs }} - name: {{ $key }} value: "{{ $val }}" {{- end }} {{- end }} + {{- with .Values.extraContainers }} + {{- toYaml . | nindent 8 }} + {{- end }} volumes: - name: scripts configMap: @@ -281,6 +315,9 @@ spec: defaultMode: 0400 {{- end }} {{- end }} + {{- with .Values.extraVolumes }} + {{- toYaml . | nindent 8 }} + {{- end }} {{- with .Values.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} diff --git a/valkey/templates/deploy_valkey.yaml b/valkey/templates/deploy_valkey.yaml index 19a96b7d..08516e52 100644 --- a/valkey/templates/deploy_valkey.yaml +++ b/valkey/templates/deploy_valkey.yaml @@ -69,10 +69,6 @@ spec: mountPath: /usr/local/etc/valkey/valkey.conf subPath: valkey.conf {{- end }} - {{- if .Values.extraSecretValkeyConfigs }} - - name: extravalkeyconfigs-volume - mountPath: /extravalkeyconfigs - {{- end }} {{- if .Values.auth.enabled }} - name: valkey-acl mountPath: /etc/valkey @@ -116,20 +112,30 @@ spec: - name: tcp containerPort: {{ .Values.service.port }} protocol: TCP + {{- $pingCmd := "valkey-cli ping" }} + {{- if .Values.tls.enabled }} + {{- $pingCmd = printf "valkey-cli --tls --cacert /tls/%s ping" .Values.tls.caPublicKey }} + {{- end }} + {{- /* When auth is enforced the server returns 'NOAUTH Authentication required.' — accept it as proof of liveness. */}} + {{- $probeCmd := printf "%s 2>&1 | grep -qE 'PONG|NOAUTH'" $pingCmd }} startupProbe: exec: - {{- if .Values.tls.enabled }} - command: [ "sh", "-c", "valkey-cli --cacert /tls/{{ .Values.tls.caPublicKey }} --tls ping" ] - {{- else }} - command: [ "sh", "-c", "valkey-cli ping" ] - {{- end }} + command: [ "sh", "-c", {{ $probeCmd | quote }} ] + periodSeconds: 5 + timeoutSeconds: 5 + failureThreshold: 30 livenessProbe: exec: - {{- if .Values.tls.enabled }} - command: [ "sh", "-c", "valkey-cli --cacert /tls/{{ .Values.tls.caPublicKey }} --tls ping" ] - {{- else }} - command: [ "sh", "-c", "valkey-cli ping" ] - {{- end }} + command: [ "sh", "-c", {{ $probeCmd | quote }} ] + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 + readinessProbe: + exec: + command: [ "sh", "-c", {{ $probeCmd | quote }} ] + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 resources: {{- toYaml .Values.resources | nindent 12 }} volumeMounts: @@ -198,18 +204,21 @@ spec: - name: REDIS_ALIAS value: {{ include "valkey.fullname" . }} {{- if .Values.auth.enabled }} + {{- $defaultUser := get (.Values.auth.aclUsers | default dict) "default" | default dict }} + {{- $hasInlineDefaultPassword := hasKey $defaultUser "password" }} + {{- if .Values.auth.usersExistingSecret }} - name: REDIS_PASSWORD valueFrom: secretKeyRef: - {{- if .Values.auth.usersExistingSecret }} - {{- $defaultUser := index .Values.auth.aclUsers "default" | default dict }} - {{- $passwordKey := $defaultUser.passwordKey | default "default" }} name: {{ .Values.auth.usersExistingSecret }} - key: {{ $passwordKey }} - {{- else }} + key: {{ $defaultUser.passwordKey | default "default" }} + {{- else if $hasInlineDefaultPassword }} + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: name: {{ include "valkey.fullname" . }}-auth key: default-password - {{- end }} + {{- end }} {{- end }} {{- range $key, $val := .Values.metrics.exporter.extraEnvs }} - name: {{ $key }} diff --git a/valkey/templates/init_config.yaml b/valkey/templates/init_config.yaml index 654e156d..4bce19e8 100644 --- a/valkey/templates/init_config.yaml +++ b/valkey/templates/init_config.yaml @@ -53,7 +53,10 @@ data: fi {{- end }} - echo "$password" + # printf is byte-safe; dash's `echo` quietly interprets backslash + # escapes (\b, \t, \\, etc.), corrupting any password that contains a + # backslash before it's hashed into the ACL. + printf '%s' "$password" } {{- end }} @@ -123,8 +126,9 @@ data: # User: {{ $username }} PASSWORD=$(get_user_password "{{ $username }}" "{{ $passwordKey }}") || exit 1 - # Hash the password and write ACL entry - PASSHASH=$(echo -n "$PASSWORD" | sha256sum | cut -f 1 -d " ") + # Hash the password and write ACL entry. printf (not echo -n) is POSIX — + # echo -n is implementation-defined and quietly emits `-n\n` under some shells. + PASSHASH=$(printf '%s' "$PASSWORD" | sha256sum | cut -f 1 -d " ") echo "user {{ $username }} on #$PASSHASH {{ $user.permissions }}" >> /etc/valkey/users.acl {{- end }} @@ -190,8 +194,10 @@ data: {{- $replPasswordKey := $replUser.passwordKey | default $replUsername }} REPL_PASSWORD=$(get_user_password "{{ $replUsername }}" "{{ $replPasswordKey }}") || exit 1 - # Write masterauth configuration - echo "masterauth $REPL_PASSWORD" >>"$VALKEY_CONFIG" + # Write masterauth configuration. Quote + backslash-escape so passwords + # containing quotes/backslashes survive valkey.conf parsing. + REPL_PASSWORD_ESC=$(printf '%s' "$REPL_PASSWORD" | sed 's/\\/\\\\/g; s/"/\\"/g') + printf 'masterauth "%s"\n' "$REPL_PASSWORD_ESC" >>"$VALKEY_CONFIG" echo "masteruser {{ $replUsername }}" >>"$VALKEY_CONFIG" log "Configured masterauth with user {{ $replUsername }}" {{- end }} @@ -252,12 +258,15 @@ data: {{- if .Values.auth.enabled }} # Configure cluster authentication {{- $replUsername := .Values.cluster.replicationUser }} - REPL_PASSWORD=$(get_user_password "{{ $replUsername }}") || exit 1 + {{- $replUser := index .Values.auth.aclUsers $replUsername }} + {{- $replPasswordKey := $replUser.passwordKey | default $replUsername }} + REPL_PASSWORD=$(get_user_password "{{ $replUsername }}" "{{ $replPasswordKey }}") || exit 1 + REPL_PASSWORD_ESC=$(printf '%s' "$REPL_PASSWORD" | sed 's/\\/\\\\/g; s/"/\\"/g') { echo "" echo "# Cluster authentication" - echo "masterauth $REPL_PASSWORD" + printf 'masterauth "%s"\n' "$REPL_PASSWORD_ESC" echo "masteruser {{ $replUsername }}" } >>"$VALKEY_CONFIG" log "Configured cluster authentication with user {{ $replUsername }}" @@ -280,8 +289,4 @@ data: log "Appending /usr/local/etc/valkey/valkey.conf" cat /usr/local/etc/valkey/valkey.conf >>"$VALKEY_CONFIG" fi - if [ -d /extravalkeyconfigs ]; then - log "Appending files in /extravalkeyconfigs/" - cat /extravalkeyconfigs/* >>"$VALKEY_CONFIG" - fi diff --git a/valkey/templates/netpolicy.yaml b/valkey/templates/netpolicy.yaml index f65c504d..a4272636 100644 --- a/valkey/templates/netpolicy.yaml +++ b/valkey/templates/netpolicy.yaml @@ -1,4 +1,11 @@ {{- with .Values.networkPolicy }} +{{- /* +Gate on `hasKey` rather than truthiness: an empty list still counts as +the user declaring a policy (e.g. `ingress: []` for default-deny). +Otherwise an empty array would produce a NetworkPolicy with `policyTypes: []` +which the API server accepts but does nothing useful. +*/}} +{{- if or (hasKey . "ingress") (hasKey . "egress") }} apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: @@ -16,18 +23,25 @@ spec: matchLabels: {{- include "valkey.selectorLabels" $ | nindent 6 }} policyTypes: - {{- if .ingress }} + {{- if hasKey . "ingress" }} - Ingress {{- end }} - {{- if .egress }} + {{- if hasKey . "egress" }} - Egress {{- end }} - {{- with .ingress }} + {{- if hasKey . "ingress" }} ingress: - {{- toYaml . | nindent 4 }} + {{- if .ingress }} + {{- toYaml .ingress | nindent 4 }} + {{- else }} [] + {{- end }} {{- end }} - {{- with .egress }} + {{- if hasKey . "egress" }} egress: - {{- toYaml . | nindent 4 }} + {{- if .egress }} + {{- toYaml .egress | nindent 4 }} + {{- else }} [] + {{- end }} {{- end }} {{- end }} +{{- end }} diff --git a/valkey/templates/poddisruptionbudget.yaml b/valkey/templates/poddisruptionbudget.yaml index ff123525..00430f71 100644 --- a/valkey/templates/poddisruptionbudget.yaml +++ b/valkey/templates/poddisruptionbudget.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.podDisruptionBudget.enabled .Values.replica.enabled }} +{{- if and .Values.podDisruptionBudget.enabled (or .Values.replica.enabled .Values.cluster.enabled) }} apiVersion: policy/v1 kind: PodDisruptionBudget metadata: diff --git a/valkey/templates/service-read.yaml b/valkey/templates/service-read.yaml index 49ec54e7..de84466d 100644 --- a/valkey/templates/service-read.yaml +++ b/valkey/templates/service-read.yaml @@ -18,8 +18,9 @@ spec: {{- if .Values.replica.service.loadBalancerClass }} loadBalancerClass: {{ .Values.replica.service.loadBalancerClass }} {{- end }} - {{- if .Values.replica.service.loadBalancerSourceRanges }} - loadBalancerSourceRanges: {{ .Values.replica.service.loadBalancerSourceRanges }} + {{- with .Values.replica.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- toYaml . | nindent 4 }} {{- end }} ports: - name: tcp diff --git a/valkey/templates/service.yaml b/valkey/templates/service.yaml index dbfc38fb..353375c2 100644 --- a/valkey/templates/service.yaml +++ b/valkey/templates/service.yaml @@ -17,8 +17,9 @@ spec: {{- if .Values.service.loadBalancerClass }} loadBalancerClass: {{ .Values.service.loadBalancerClass }} {{- end }} - {{- if .Values.service.loadBalancerSourceRanges }} - loadBalancerSourceRanges: {{ .Values.service.loadBalancerSourceRanges }} + {{- with .Values.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- toYaml . | nindent 4 }} {{- end }} ports: - port: {{ .Values.service.port }} @@ -31,12 +32,6 @@ spec: {{- if .Values.service.appProtocol }} appProtocol: {{ .Values.service.appProtocol }} {{- end }} - {{- if .Values.cluster.enabled }} - - port: {{ .Values.cluster.busPort }} - targetPort: tcp-bus - protocol: TCP - name: tcp-bus - {{- end }} selector: {{- include "valkey.selectorLabels" . | nindent 4 }} {{- if .Values.replica.enabled }} diff --git a/valkey/templates/statefulset.yaml b/valkey/templates/statefulset.yaml index 4a8d4caa..b5cb0e15 100644 --- a/valkey/templates/statefulset.yaml +++ b/valkey/templates/statefulset.yaml @@ -85,10 +85,6 @@ spec: mountPath: /usr/local/etc/valkey/valkey.conf subPath: valkey.conf {{- end }} - {{- if .Values.extraSecretValkeyConfigs }} - - name: extravalkeyconfigs-volume - mountPath: /extravalkeyconfigs - {{- end }} {{- if .Values.auth.enabled }} - name: valkey-acl mountPath: /etc/valkey @@ -119,10 +115,6 @@ spec: securityContext: {{- toYaml .Values.securityContext | nindent 12 }} env: - - name: POD_INDEX - valueFrom: - fieldRef: - fieldPath: metadata.labels['apps.kubernetes.io/pod-index'] {{- range $key, $val := .Values.env }} - name: {{ $key }} value: "{{ $val }}" @@ -133,20 +125,30 @@ spec: - name: tcp containerPort: {{ .Values.service.port }} protocol: TCP + {{- $pingCmd := "valkey-cli ping" }} + {{- if .Values.tls.enabled }} + {{- $pingCmd = printf "valkey-cli --tls --cacert /tls/%s ping" .Values.tls.caPublicKey }} + {{- end }} + {{- /* When auth is enforced the server returns 'NOAUTH Authentication required.' — accept it as proof of liveness. */}} + {{- $probeCmd := printf "%s 2>&1 | grep -qE 'PONG|NOAUTH'" $pingCmd }} startupProbe: exec: - {{- if .Values.tls.enabled }} - command: [ "sh", "-c", "valkey-cli --cacert /tls/{{ .Values.tls.caPublicKey }} --tls ping" ] - {{- else }} - command: [ "sh", "-c", "valkey-cli ping" ] - {{- end }} + command: [ "sh", "-c", {{ $probeCmd | quote }} ] + periodSeconds: 5 + timeoutSeconds: 5 + failureThreshold: 30 livenessProbe: exec: - {{- if .Values.tls.enabled }} - command: [ "sh", "-c", "valkey-cli --cacert /tls/{{ .Values.tls.caPublicKey }} --tls ping" ] - {{- else }} - command: [ "sh", "-c", "valkey-cli ping" ] - {{- end }} + command: [ "sh", "-c", {{ $probeCmd | quote }} ] + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 + readinessProbe: + exec: + command: [ "sh", "-c", {{ $probeCmd | quote }} ] + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 resources: {{- toYaml .Values.resources | nindent 12 }} volumeMounts: @@ -212,18 +214,21 @@ spec: - name: REDIS_ALIAS value: {{ include "valkey.fullname" . }} {{- if .Values.auth.enabled }} + {{- $defaultUser := get (.Values.auth.aclUsers | default dict) "default" | default dict }} + {{- $hasInlineDefaultPassword := hasKey $defaultUser "password" }} + {{- if .Values.auth.usersExistingSecret }} - name: REDIS_PASSWORD valueFrom: secretKeyRef: - {{- if .Values.auth.usersExistingSecret }} - {{- $defaultUser := index .Values.auth.aclUsers "default" | default dict }} - {{- $passwordKey := $defaultUser.passwordKey | default "default" }} name: {{ .Values.auth.usersExistingSecret }} - key: {{ $passwordKey }} - {{- else }} + key: {{ $defaultUser.passwordKey | default "default" }} + {{- else if $hasInlineDefaultPassword }} + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: name: {{ include "valkey.fullname" . }}-auth key: default-password - {{- end }} + {{- end }} {{- end }} {{- range $key, $val := .Values.metrics.exporter.extraEnvs }} - name: {{ $key }} diff --git a/valkey/templates/tests/auth.yaml b/valkey/templates/tests/auth.yaml index b289bb98..833d365a 100644 --- a/valkey/templates/tests/auth.yaml +++ b/valkey/templates/tests/auth.yaml @@ -19,9 +19,13 @@ metadata: "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded spec: restartPolicy: Never + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 4 }} containers: - name: test-auth image: {{ include "valkey.image" . | quote }} + securityContext: + {{- toYaml .Values.securityContext | nindent 8 }} command: - sh - -c @@ -95,9 +99,13 @@ metadata: "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded spec: restartPolicy: Never + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 4 }} containers: - name: test-auth image: {{ include "valkey.image" . | quote }} + securityContext: + {{- toYaml .Values.securityContext | nindent 8 }} command: - sh - -c diff --git a/valkey/tests/cluster_test.yaml b/valkey/tests/cluster_test.yaml index 686a34c6..485cc673 100644 --- a/valkey/tests/cluster_test.yaml +++ b/valkey/tests/cluster_test.yaml @@ -211,7 +211,8 @@ tests: protocol: TCP # Main service tests - - it: should create service with bus port in cluster mode + - it: should not expose the bus port on the frontend service in cluster mode + # Bus port is pod-to-pod only; clients reach nodes via the headless service. set: cluster.enabled: true cluster.persistence.size: "5Gi" @@ -220,13 +221,14 @@ tests: asserts: - isKind: of: Service - - contains: + - notContains: path: spec.ports content: name: tcp-bus + - notContains: + path: spec.ports + content: port: 16379 - targetPort: tcp-bus - protocol: TCP # Cluster init script ConfigMap tests - it: should create cluster-script ConfigMap when cluster is enabled @@ -899,3 +901,81 @@ tests: name: valkey-auth-secret mountPath: /valkey-auth-secret readOnly: true + + # Regression: probes must accept NOAUTH as proof of liveness. + - it: should use a PONG|NOAUTH-tolerant probe on the valkey container + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + auth.aclUsers: + default: + permissions: "~* &* +@all" + password: "p" + template: templates/cluster-statefulset.yaml + asserts: + - matchRegex: + path: spec.template.spec.containers[0].startupProbe.exec.command[2] + pattern: "PONG\\|NOAUTH" + - matchRegex: + path: spec.template.spec.containers[0].livenessProbe.exec.command[2] + pattern: "PONG\\|NOAUTH" + - matchRegex: + path: spec.template.spec.containers[0].readinessProbe.exec.command[2] + pattern: "PONG\\|NOAUTH" + + - it: should define a readiness probe + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - exists: + path: spec.template.spec.containers[0].readinessProbe + + # Regression: extraContainers and extraVolumes were unwired in cluster mode. + - it: should wire extraContainers and extraVolumes through in cluster mode + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + extraContainers: + - name: sidecar + image: busybox:1.36 + extraVolumes: + - name: extra + emptyDir: {} + template: templates/cluster-statefulset.yaml + asserts: + - contains: + path: spec.template.spec.containers + content: + name: sidecar + image: busybox:1.36 + - contains: + path: spec.template.spec.volumes + content: + name: extra + emptyDir: {} + + # Regression: REDIS_PASSWORD should be wired through in cluster mode too. + - it: should wire REDIS_PASSWORD to the metrics exporter from the generated auth secret + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + metrics.enabled: true + auth.enabled: true + auth.aclUsers: + default: + permissions: "~* &* +@all" + password: "p" + template: templates/cluster-statefulset.yaml + asserts: + - contains: + path: spec.template.spec.containers[1].env + content: + name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: RELEASE-NAME-valkey-auth + key: default-password + diff --git a/valkey/tests/deployment_test.yaml b/valkey/tests/deployment_test.yaml index 27aaa72c..5b4346c0 100644 --- a/valkey/tests/deployment_test.yaml +++ b/valkey/tests/deployment_test.yaml @@ -464,3 +464,50 @@ tests: secretKeyRef: name: my-custom-secret key: my-password-key + + # Regression: previously the exporter pointed REDIS_PASSWORD at a key + # (`default-password`) that is never created in aclConfig-only mode, so the + # container crash-looped on CreateContainerConfigError. + - it: should omit REDIS_PASSWORD when only auth.aclConfig is set + set: + auth.enabled: true + auth.aclConfig: "user default on >p ~* &* +@all" + metrics.enabled: true + template: templates/deploy_valkey.yaml + asserts: + - isKind: + of: Deployment + - notContains: + path: spec.template.spec.containers[1].env + content: + name: REDIS_PASSWORD + any: true + + # Regression: probes must accept NOAUTH as proof of liveness, otherwise every + # auth-enabled deployment's liveness probe silently passes on exit code 0 + # while not actually checking anything meaningful. + - it: should use a PONG|NOAUTH-tolerant probe on the valkey container + set: + auth.enabled: true + auth.aclUsers: + default: + permissions: "~* &* +@all" + password: "p" + template: templates/deploy_valkey.yaml + asserts: + - matchRegex: + path: spec.template.spec.containers[0].startupProbe.exec.command[2] + pattern: "PONG\\|NOAUTH" + - matchRegex: + path: spec.template.spec.containers[0].livenessProbe.exec.command[2] + pattern: "PONG\\|NOAUTH" + - matchRegex: + path: spec.template.spec.containers[0].readinessProbe.exec.command[2] + pattern: "PONG\\|NOAUTH" + + # Regression: there should be a readiness probe at all — previously missing. + - it: should define a readiness probe + template: templates/deploy_valkey.yaml + asserts: + - exists: + path: spec.template.spec.containers[0].readinessProbe diff --git a/valkey/tests/init_config_test.yaml b/valkey/tests/init_config_test.yaml index 0b1067de..cc19b36f 100644 --- a/valkey/tests/init_config_test.yaml +++ b/valkey/tests/init_config_test.yaml @@ -145,3 +145,44 @@ tests: - equal: path: metadata.labels["app.kubernetes.io/name"] value: valkey + + # Regression: `echo "$password"` in dash interprets \b, \t, \n, \\, etc. + # before writing them out. A password containing a backslash then gets a + # DIFFERENT sha256 than the bytes that were stored in the Secret — client + # auth with the real password fails WRONGPASS, since the stored ACL hash + # is of the mangled bytes. Must be `printf '%s'` (byte-safe). + - it: get_user_password must be byte-safe (no echo) + set: + auth.enabled: true + auth.aclUsers: + admin: + permissions: "~* &* +@all" + password: "admin-password" + asserts: + - notMatchRegex: + path: data["init.sh"] + pattern: 'echo "\$password"' + - matchRegex: + path: data["init.sh"] + pattern: "printf '%s' \"\\$password\"" + + # Regression: masterauth line in valkey.conf used to be unquoted, so a + # password with whitespace or `#` (valkey.conf comment char) would break + # the config parser. Must be double-quoted with backslash-escapes. + - it: masterauth must be written as a quoted+escaped string + set: + auth.enabled: true + replica.enabled: true + replica.persistence.size: "5Gi" + auth.aclUsers: + default: + permissions: "~* &* +@all" + password: "whatever" + asserts: + - matchRegex: + path: data["init.sh"] + pattern: "printf 'masterauth \"%s\"\\\\n' \"\\$REPL_PASSWORD_ESC\"" + - matchRegex: + path: data["init.sh"] + # The escape pass: s/\\/\\\\/g; s/"/\\"/g + pattern: "REPL_PASSWORD_ESC=\\$\\(printf '%s' \"\\$REPL_PASSWORD\"" diff --git a/valkey/tests/netpolicy_test.yaml b/valkey/tests/netpolicy_test.yaml new file mode 100644 index 00000000..43081f23 --- /dev/null +++ b/valkey/tests/netpolicy_test.yaml @@ -0,0 +1,49 @@ +suite: network policy configuration +templates: + - templates/netpolicy.yaml +tests: + - it: should render nothing when networkPolicy is empty + asserts: + - hasDocuments: + count: 0 + + # Default-deny ingress is expressed as an empty list; the chart must keep + # that distinct from "never declared" — otherwise the opt-in is lost. + - it: should render a default-deny ingress policy when ingress is an empty list + set: + networkPolicy: + ingress: [] + asserts: + - isKind: + of: NetworkPolicy + - equal: + path: spec.policyTypes + value: + - Ingress + - equal: + path: spec.ingress + value: [] + - notExists: + path: spec.egress + + - it: should render both ingress and egress when both declared + set: + networkPolicy: + ingress: + - from: + - podSelector: {} + egress: + - to: + - podSelector: {} + asserts: + - isKind: + of: NetworkPolicy + - equal: + path: spec.policyTypes + value: + - Ingress + - Egress + - isNotEmpty: + path: spec.ingress + - isNotEmpty: + path: spec.egress diff --git a/valkey/tests/poddisruptionbudget_test.yaml b/valkey/tests/poddisruptionbudget_test.yaml index dd7c6079..6999ea22 100644 --- a/valkey/tests/poddisruptionbudget_test.yaml +++ b/valkey/tests/poddisruptionbudget_test.yaml @@ -11,14 +11,31 @@ tests: - hasDocuments: count: 0 - - it: should not create PDB when replica is disabled + - it: should not create PDB when neither replica nor cluster is enabled set: replica.enabled: false + cluster.enabled: false podDisruptionBudget.enabled: true asserts: - hasDocuments: count: 0 + # Regression: PDB used to be gated on replica mode only; cluster mode was + # silently unprotected. + - it: should create PDB when enabled with cluster mode + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + podDisruptionBudget.enabled: true + asserts: + - hasDocuments: + count: 1 + - isKind: + of: PodDisruptionBudget + - equal: + path: spec.maxUnavailable + value: 1 + - it: should create PDB when enabled with replica set: replica.enabled: true diff --git a/valkey/tests/service_test.yaml b/valkey/tests/service_test.yaml index d7233c41..364fa083 100644 --- a/valkey/tests/service_test.yaml +++ b/valkey/tests/service_test.yaml @@ -115,7 +115,9 @@ tests: of: Service - notExists: path: spec.selector["statefulset.kubernetes.io/pod-name"] - - it: should have cluster bus port when cluster.enabled is true + - it: should never expose the cluster bus port on the frontend service + # The bus port carries gossip + failover traffic between nodes; it's + # reached via the headless service, not the round-robin frontend. set: cluster.enabled: true cluster.busPort: 16379 @@ -123,21 +125,28 @@ tests: asserts: - isKind: of: Service - - contains: + - notContains: path: spec.ports content: - port: 16379 - targetPort: tcp-bus - protocol: TCP name: tcp-bus - - it: should not have cluster bus port when cluster.enabled is false - set: - cluster.enabled: false - template: templates/service.yaml - asserts: - - isKind: - of: Service - notContains: path: spec.ports content: - name: tcp-bus + port: 16379 + + # Regression: loadBalancerSourceRanges used to render via Go's default + # slice-to-string pipeline ([a b]), which the API server rejects as invalid + # CIDR. The fix is to emit a proper YAML list. + - it: should render loadBalancerSourceRanges as a YAML list + set: + service.type: LoadBalancer + service.loadBalancerSourceRanges: + - "1.2.3.4/32" + - "5.6.7.8/32" + template: templates/service.yaml + asserts: + - equal: + path: spec.loadBalancerSourceRanges + value: + - "1.2.3.4/32" + - "5.6.7.8/32" diff --git a/valkey/tests/statefulset_test.yaml b/valkey/tests/statefulset_test.yaml index 6deb88ab..fb293673 100644 --- a/valkey/tests/statefulset_test.yaml +++ b/valkey/tests/statefulset_test.yaml @@ -371,3 +371,34 @@ tests: secretKeyRef: name: my-custom-secret key: my-password-key + + # Regression: probes must accept NOAUTH as proof of liveness. + - it: should use a PONG|NOAUTH-tolerant probe on the valkey container + set: + replica.enabled: true + replica.persistence.size: "5Gi" + auth.enabled: true + auth.aclUsers: + default: + permissions: "~* &* +@all" + password: "p" + template: templates/statefulset.yaml + asserts: + - matchRegex: + path: spec.template.spec.containers[0].startupProbe.exec.command[2] + pattern: "PONG\\|NOAUTH" + - matchRegex: + path: spec.template.spec.containers[0].livenessProbe.exec.command[2] + pattern: "PONG\\|NOAUTH" + - matchRegex: + path: spec.template.spec.containers[0].readinessProbe.exec.command[2] + pattern: "PONG\\|NOAUTH" + + - it: should define a readiness probe + set: + replica.enabled: true + replica.persistence.size: "5Gi" + template: templates/statefulset.yaml + asserts: + - exists: + path: spec.template.spec.containers[0].readinessProbe diff --git a/valkey/values.schema.json b/valkey/values.schema.json index 4c4ab001..94f5b553 100644 --- a/valkey/values.schema.json +++ b/valkey/values.schema.json @@ -65,6 +65,9 @@ }, "busPort": { "type": "integer" + }, + "persistentVolumeClaimRetentionPolicy": { + "type": "object" } } }, @@ -128,9 +131,6 @@ "extraInitContainers": { "type": "array" }, - "extraSecretValkeyConfigs": { - "type": "boolean" - }, "extraValkeyConfigs": { "type": "array" }, diff --git a/valkey/values.yaml b/valkey/values.yaml index 062b234c..d072d371 100644 --- a/valkey/values.yaml +++ b/valkey/values.yaml @@ -156,9 +156,6 @@ extraValkeySecrets: [] # Mount additional configMaps into the Valkey container extraValkeyConfigs: [] -# Mount extra secrets as volume to init container (deprecated, use extraValkeySecrets) -extraSecretValkeyConfigs: false - # Mount additional emptyDir or hostPath volumes (advanced use) extraVolumes: [] # - name: hostpath-volume @@ -308,6 +305,11 @@ cluster: # This port is used for node-to-node communication in the cluster busPort: 16379 + # PersistentVolumeClaim retention policy for StatefulSet + # Controls when PVCs are deleted (requires Kubernetes 1.23+) + # More info: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#persistentvolumeclaim-retention + persistentVolumeClaimRetentionPolicy: {} + tls: # Enable TLS enabled: false @@ -342,7 +344,13 @@ istio: # DestinationRule configures mTLS for outbound connections to Valkey services destinationRule: # TLS mode for outbound traffic (DISABLE, SIMPLE, MUTUAL, ISTIO_MUTUAL) - # ISTIO_MUTUAL: Use Istio-managed certificates for mTLS + # ISTIO_MUTUAL: Use Istio-managed certificates for mTLS. + # NOTE: When tls.enabled is true the Valkey pods already terminate TLS + # themselves. Keeping mode=ISTIO_MUTUAL wraps app-level TLS in Envoy mTLS + # (double encryption) and still works, but doubles crypto overhead. If you + # only want mesh-level mTLS, set tls.enabled=false here and rely on Istio. + # If you prefer app-level TLS only, set mode=DISABLE so Envoy passes the + # TLS bytes through untouched. mode: ISTIO_MUTUAL # Additional labels for the DestinationRule resource labels: {} From cbc3ccbc3d28393b0de85e136ef3c71385db0594 Mon Sep 17 00:00:00 2001 From: Ankit Pati Date: Sat, 2 May 2026 04:08:21 +0530 Subject: [PATCH 10/23] feat: multiple Valkey clusters in single K8s cluster Signed-off-by: Ankit Pati --- functional-tests/run-extra-scenarios.sh | 152 +++++++++++++++++- functional-tests/run-scenario.sh | 15 +- .../templates/cluster-isolation-netpol.yaml | 72 +++++++++ .../tests/cluster_isolation_netpol_test.yaml | 127 +++++++++++++++ valkey/values.schema.json | 8 + valkey/values.yaml | 19 +++ 6 files changed, 385 insertions(+), 8 deletions(-) create mode 100644 valkey/templates/cluster-isolation-netpol.yaml create mode 100644 valkey/tests/cluster_isolation_netpol_test.yaml diff --git a/functional-tests/run-extra-scenarios.sh b/functional-tests/run-extra-scenarios.sh index c77b11ba..9bf5c625 100755 --- a/functional-tests/run-extra-scenarios.sh +++ b/functional-tests/run-extra-scenarios.sh @@ -220,12 +220,154 @@ scenario_readiness_probe_exists() { pass "${name}" } -trap 'cleanup_release' EXIT +# --------------------------------------------------------------------------- +# Scenario: two independent Valkey clusters in the same namespace must stay +# independent. Valkey's CLUSTER MEET has no auth, so a MEET issued by (or +# forwarded through) a node in cluster A can merge cluster B into it. The +# chart's cluster-isolation NetworkPolicy pins the bus port to same-release +# pods; without it, a stray MEET wins. +# +# This test: +# 1) installs `valkey-a` and `valkey-b` in the same namespace, cluster mode; +# 2) issues CLUSTER MEET from a node in A targeting a node in B; +# 3) waits for gossip to propagate; +# 4) asserts A still has its original 3 nodes (not 6). +# +# Also runs a negative twin with `cluster.isolation.enabled=false` to prove +# the assertion has teeth — if isolation is the thing keeping them apart, +# disabling it must let the merge happen. +# --------------------------------------------------------------------------- + +# Install one cluster-mode release with a given name and isolation flag. +# Globals it expects: NAMESPACE, CHART_DIR, KUBE_CONTEXT. +install_cluster() { + local release=$1 isolation=$2 + hctl install "${release}" "${CHART_DIR}" \ + --set=cluster.enabled=true \ + --set=cluster.persistence.size=100Mi \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=0 \ + --set="cluster.isolation.enabled=${isolation}" \ + --set-string='podLabels.sidecar\.istio\.io/inject=false' \ + --wait --timeout=300s >/dev/null +} + +# Count unique nodes reported by `cluster nodes` on pod-0 of the given release. +# Returns 0 if the query itself fails (counts as "indeterminate"). +count_cluster_nodes() { + local release=$1 + # Filter blanks + the "myself" marker to get the real node count. + kctl exec "${release}-0" -c "${release}" -- sh -c \ + "valkey-cli cluster nodes 2>/dev/null | awk 'NF {print \$1}' | sort -u | wc -l" \ + 2>/dev/null | tr -d '[:space:]' || echo 0 +} + +# Fire CLUSTER MEET from src_release pod-0 targeting dst_release pod-0. +poison_meet() { + local src_release=$1 dst_release=$2 + local dst_ip + dst_ip=$(kctl get pod "${dst_release}-0" -o jsonpath='{.status.podIP}') + [[ -n ${dst_ip} ]] || return 1 + kctl exec "${src_release}-0" -c "${src_release}" -- \ + valkey-cli cluster meet "${dst_ip}" 6379 >/dev/null 2>&1 || true +} + +cleanup_pair() { + hctl uninstall valkey-iso-a 2>/dev/null || true + hctl uninstall valkey-iso-b 2>/dev/null || true + kctl delete pvc --selector='app.kubernetes.io/instance=valkey-iso-a' --ignore-not-found >/dev/null + kctl delete pvc --selector='app.kubernetes.io/instance=valkey-iso-b' --ignore-not-found >/dev/null +} + +scenario_two_clusters_isolated() { + local name="two cluster-mode releases in one namespace stay isolated" + log "SCENARIO: ${name}" + cleanup_pair + + if ! install_cluster valkey-iso-a true; then + fail "${name}" "install of valkey-iso-a failed"; cleanup_pair; return + fi + if ! install_cluster valkey-iso-b true; then + fail "${name}" "install of valkey-iso-b failed"; cleanup_pair; return + fi + + # Baseline — each cluster should see exactly 3 nodes (3 shards, 0 replicas). + local a_before b_before + a_before=$(count_cluster_nodes valkey-iso-a) + b_before=$(count_cluster_nodes valkey-iso-b) + if [[ ${a_before} != 3 || ${b_before} != 3 ]]; then + fail "${name}" "baseline wrong (a=${a_before}, b=${b_before}; want 3+3)" + cleanup_pair; return + fi + + # Try to merge B into A. + poison_meet valkey-iso-a valkey-iso-b + + # After a MEET, Valkey adds the peer to `cluster nodes` immediately as a + # handshake placeholder — so a count of 4 for a few seconds is EXPECTED + # whether or not the merge ultimately succeeds. The real signal is what + # happens *after* the handshake timeout: if bus connectivity exists, the + # node stays (count stays at 4+); if isolation blocks the bus, the + # handshake fails and the placeholder is evicted (count returns to 3). + # + # Cluster node-timeout defaults to 15s; give the failure detector + # multiple intervals to fire, then sample. + sleep 45 + + # After settling, the merge must NOT have stuck. + local a_after b_after + a_after=$(count_cluster_nodes valkey-iso-a) + b_after=$(count_cluster_nodes valkey-iso-b) + + if [[ ${a_after} != 3 || ${b_after} != 3 ]]; then + fail "${name}" "clusters merged (a=${a_after}, b=${b_after}; want 3+3 after settle)" + cleanup_pair; return + fi + + cleanup_pair + pass "${name}" +} + +# Negative twin: without isolation, the SAME MEET must succeed — otherwise +# the positive test isn't proving what we think it's proving. +scenario_isolation_off_lets_merge_happen() { + local name="disabling isolation lets CLUSTER MEET actually merge (teeth check)" + log "SCENARIO: ${name}" + cleanup_pair + + if ! install_cluster valkey-iso-a false; then + fail "${name}" "install of valkey-iso-a failed"; cleanup_pair; return + fi + if ! install_cluster valkey-iso-b false; then + fail "${name}" "install of valkey-iso-b failed"; cleanup_pair; return + fi + + poison_meet valkey-iso-a valkey-iso-b + + # Mirror the positive test's 45-second settle window: we're asking the + # SAME question (has the handshake completed?) and need the same amount + # of time for the node-timeout to fire. + sleep 45 + + local a_after + a_after=$(count_cluster_nodes valkey-iso-a) + if [[ ${a_after} -le 3 ]]; then + fail "${name}" "MEET did not merge even without isolation (a=${a_after}); positive test cannot prove isolation works" + cleanup_pair; return + fi + + cleanup_pair + pass "${name}" +} + +trap 'cleanup_release; cleanup_pair' EXIT -scenario_aclconfig_metrics || true -scenario_default_deny_netpol || true -scenario_bus_port_hidden || true -scenario_readiness_probe_exists || true +scenario_aclconfig_metrics || true +scenario_default_deny_netpol || true +scenario_bus_port_hidden || true +scenario_readiness_probe_exists || true +scenario_two_clusters_isolated || true +scenario_isolation_off_lets_merge_happen|| true echo log "Extra scenario summary" diff --git a/functional-tests/run-scenario.sh b/functional-tests/run-scenario.sh index e45707a5..6304be81 100755 --- a/functional-tests/run-scenario.sh +++ b/functional-tests/run-scenario.sh @@ -92,10 +92,12 @@ fi # --------------------------------------------------------------------------- # Install. # --------------------------------------------------------------------------- -log "Installing scenario: ${SCENARIO}" -hctl install "${RELEASE}" "${CHART_DIR}" "${helm_flags[@]}" -# Clean up on exit regardless of pass/fail — the next scenario needs a clean slate. +# Register cleanup BEFORE `helm install`. If the install itself fails +# (timeout, post-install hook never ready, etc.) Helm leaves a "failed" +# release in the cluster that blocks every subsequent scenario with a +# `cannot reuse a name that is still in use` error. Trap-before-install +# ensures we always clean up, even on install failure. cleanup() { local rc=$? log "Cleaning up scenario: ${SCENARIO}" @@ -105,6 +107,13 @@ cleanup() { } trap cleanup EXIT +# Also scrub anything left behind by a prior scenario that crashed hard +# (SIGKILL, harness panic) without running its trap. +hctl uninstall "${RELEASE}" 2>/dev/null || true + +log "Installing scenario: ${SCENARIO}" +hctl install "${RELEASE}" "${CHART_DIR}" "${helm_flags[@]}" + # --------------------------------------------------------------------------- # Wait for pods to become ready. # --------------------------------------------------------------------------- diff --git a/valkey/templates/cluster-isolation-netpol.yaml b/valkey/templates/cluster-isolation-netpol.yaml new file mode 100644 index 00000000..53fe8958 --- /dev/null +++ b/valkey/templates/cluster-isolation-netpol.yaml @@ -0,0 +1,72 @@ +{{- /* +Cluster-bus isolation NetworkPolicy. + +Valkey's gossip/cluster-bus protocol has no authentication of its own: a pod +that can open a TCP connection to a node's bus port (default 16379) can send +CLUSTER MEET and merge into the cluster. When two independent Valkey clusters +share a Kubernetes cluster (or even a namespace), nothing in Valkey itself +stops an accidental or malicious MEET from fusing them. + +This policy pins the bus port INBOUND to same-release traffic only, by +matching on `app.kubernetes.io/instance`. Blocking the receiving side of +the MEET handshake is sufficient: the handshake is bidirectional, so with +the receiver refusing connections, the placeholder node is evicted by the +cluster-node-timeout and the two clusters stay separate. + +Client (6379) and metrics (9121) ports stay open — they're application-level +and have their own auth (ACL/TLS). + +We deliberately do NOT set an Egress policyType. Adding Egress here would +require enumerating every destination a Valkey pod legitimately needs to +reach (kube-dns, Istio's xDS on istiod:15012, Envoy's health port, JWKS +endpoints for Istio AuthorizationPolicy, and so on); getting that wrong +breaks Istio sidecar bootstrap. Users who want egress isolation on top of +this should add an Istio AuthorizationPolicy (when they have Istio) or a +separate NetworkPolicy targeting `valkey.selectorLabels` — Kubernetes +combines those additively with this one. + +Kubernetes policies are additive: adding this one alongside the user-defined +`networkPolicy` value still allows the user's ingress/egress rules to match. + +Running on a CNI that doesn't enforce NetworkPolicy (plain Flannel, the +in-tree kubenet, etc.) makes this rendered policy a no-op. There is no +namespace-based fallback — pod-to-pod traffic crosses namespaces freely +unless something actually enforces policy at the data plane. On such a +cluster there is no way to prevent a cross-release CLUSTER MEET from the +chart alone; either switch to a policy-enforcing CNI, add an Istio +AuthorizationPolicy at layer 7, or run each Valkey cluster in its own +Kubernetes cluster. +*/}} +{{- if and .Values.cluster.enabled .Values.cluster.isolation.enabled }} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "valkey.fullname" . }}-cluster-isolation + labels: + {{- include "valkey.labels" . | nindent 4 }} +spec: + podSelector: + matchLabels: + {{- include "valkey.selectorLabels" . | nindent 6 }} + policyTypes: + - Ingress + ingress: + # Bus port: only other pods of THIS release. + - from: + - podSelector: + matchLabels: + {{- include "valkey.selectorLabels" . | nindent 14 }} + ports: + - protocol: TCP + port: {{ .Values.cluster.busPort }} + # Client port: anyone. ACL + TLS guard it above the network layer. + - ports: + - protocol: TCP + port: {{ .Values.service.port }} + {{- if .Values.metrics.enabled }} + # Metrics sidecar: anyone (typically Prometheus). + - ports: + - protocol: TCP + port: {{ .Values.metrics.exporter.port }} + {{- end }} +{{- end }} diff --git a/valkey/tests/cluster_isolation_netpol_test.yaml b/valkey/tests/cluster_isolation_netpol_test.yaml new file mode 100644 index 00000000..19de6b1d --- /dev/null +++ b/valkey/tests/cluster_isolation_netpol_test.yaml @@ -0,0 +1,127 @@ +suite: cluster isolation network policy +templates: + - templates/cluster-isolation-netpol.yaml +tests: + - it: should not render when cluster mode is disabled + set: + cluster.enabled: false + asserts: + - hasDocuments: + count: 0 + + - it: should not render in replica mode + set: + replica.enabled: true + replica.persistence.size: "5Gi" + asserts: + - hasDocuments: + count: 0 + + - it: should not render when isolation is explicitly disabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.isolation.enabled: false + asserts: + - hasDocuments: + count: 0 + + - it: should render by default in cluster mode + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - isKind: + of: NetworkPolicy + - equal: + path: metadata.name + value: RELEASE-NAME-valkey-cluster-isolation + + - it: should select only pods of this release + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - equal: + path: spec.podSelector.matchLabels["app.kubernetes.io/name"] + value: valkey + - equal: + path: spec.podSelector.matchLabels["app.kubernetes.io/instance"] + value: RELEASE-NAME + + # Egress is intentionally NOT restricted here — locking it down breaks + # Istio sidecar bootstrap (xDS to istiod) and any DNS-heavy flow. Users + # who want egress isolation should add their own NetworkPolicy on top. + - it: should restrict ingress only + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - equal: + path: spec.policyTypes + value: + - Ingress + - notExists: + path: spec.egress + + # The core guarantee: the bus port inbound is scoped to same-instance pods. + - it: bus port ingress must be scoped to same-release pods + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.busPort: 16379 + asserts: + - contains: + path: spec.ingress + content: + from: + - podSelector: + matchLabels: + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: RELEASE-NAME + ports: + - protocol: TCP + port: 16379 + + # The client port must NOT be scoped — arbitrary clients need to reach it. + # If a future change accidentally restricts it to same-release pods, every + # client outside the chart will lose access. + - it: client port ingress must not require the same-release selector + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - contains: + path: spec.ingress + content: + ports: + - protocol: TCP + port: 6379 + + - it: should include metrics port ingress only when metrics enabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + metrics.enabled: true + metrics.exporter.port: 9121 + asserts: + - contains: + path: spec.ingress + content: + ports: + - protocol: TCP + port: 9121 + + - it: should not include metrics port ingress when metrics disabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + metrics.enabled: false + asserts: + - notContains: + path: spec.ingress + content: + ports: + - protocol: TCP + port: 9121 + diff --git a/valkey/values.schema.json b/valkey/values.schema.json index 94f5b553..f84fdd46 100644 --- a/valkey/values.schema.json +++ b/valkey/values.schema.json @@ -66,6 +66,14 @@ "busPort": { "type": "integer" }, + "isolation": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + } + } + }, "persistentVolumeClaimRetentionPolicy": { "type": "object" } diff --git a/valkey/values.yaml b/valkey/values.yaml index d072d371..b5ee6dd9 100644 --- a/valkey/values.yaml +++ b/valkey/values.yaml @@ -305,6 +305,25 @@ cluster: # This port is used for node-to-node communication in the cluster busPort: 16379 + # Isolate this Valkey cluster's gossip bus from any other release in the + # Kubernetes cluster. Valkey's CLUSTER MEET has no authentication, so + # without this, any pod that can open a TCP connection to a node's bus + # port can merge its owner's cluster into this one — regardless of + # namespace, since pod-to-pod traffic crosses namespaces freely by + # default. + # + # The generated NetworkPolicy restricts the bus port to same-release pods + # only; client and metrics ports remain open. + # + # Requires a NetworkPolicy-enforcing CNI (Calico, Cilium, kindnet ≥ 0.20, + # Antrea, etc.). On a non-enforcing CNI (plain Flannel, in-tree kubenet) + # the rendered policy is a no-op and the chart cannot provide isolation + # on its own — an Istio AuthorizationPolicy or a separate Kubernetes + # cluster per Valkey cluster is the only remaining option. Namespaces + # alone do NOT provide isolation. + isolation: + enabled: true + # PersistentVolumeClaim retention policy for StatefulSet # Controls when PVCs are deleted (requires Kubernetes 1.23+) # More info: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#persistentvolumeclaim-retention From c57cdb98f5a649ac936db4b2136b7833787e6bfe Mon Sep 17 00:00:00 2001 From: Ankit Pati Date: Sat, 2 May 2026 21:58:27 +0530 Subject: [PATCH 11/23] feat: Ambient Mesh support Signed-off-by: Ankit Pati --- Justfile | 4 + functional-tests/lib.sh | 20 +- functional-tests/run-all.sh | 3 + functional-tests/run-ambient-scenarios.sh | 342 ++++++++++++++++++ functional-tests/setup.sh | 82 ++++- functional-tests/teardown.sh | 2 +- valkey/templates/_helpers.tpl | 48 +++ valkey/templates/cluster-init-job.yaml | 10 + .../templates/cluster-isolation-netpol.yaml | 13 +- valkey/templates/cluster-statefulset.yaml | 22 +- valkey/templates/deploy_valkey.yaml | 1 + .../templates/istio-authorization-policy.yaml | 67 ++++ valkey/templates/istio-destination-rule.yaml | 11 +- .../templates/istio-peer-authentication.yaml | 5 + valkey/templates/statefulset.yaml | 1 + .../tests/cluster_isolation_netpol_test.yaml | 40 ++ valkey/tests/cluster_test.yaml | 55 +++ valkey/tests/deployment_test.yaml | 52 +++ .../istio_authorization_policy_test.yaml | 291 +++++++++++++++ valkey/tests/istio_test.yaml | 96 +++++ valkey/tests/statefulset_test.yaml | 24 ++ valkey/values.yaml | 49 ++- 22 files changed, 1211 insertions(+), 27 deletions(-) create mode 100755 functional-tests/run-ambient-scenarios.sh create mode 100644 valkey/templates/istio-authorization-policy.yaml create mode 100644 valkey/tests/istio_authorization_policy_test.yaml diff --git a/Justfile b/Justfile index d3a4e9bb..933f494c 100644 --- a/Justfile +++ b/Justfile @@ -49,6 +49,10 @@ functional-run: functional-extras: ./functional-tests/run-extra-scenarios.sh +# Run the Istio ambient-mesh regressions on their own +functional-ambient: + ./functional-tests/run-ambient-scenarios.sh + # Full functional suite: setup + matrix + teardown including cluster functional-test: ./functional-tests/setup.sh diff --git a/functional-tests/lib.sh b/functional-tests/lib.sh index 0ccf9fb5..f4adda7c 100755 --- a/functional-tests/lib.sh +++ b/functional-tests/lib.sh @@ -14,10 +14,19 @@ RELEASE=${VALKEY_RELEASE:-valkey} AUTH_SECRET=valkey-auth TLS_SECRET=valkey-tls -# Two testbenches: one never gets an Envoy sidecar (istio=off scenarios, or when -# Istio isn't installed at all), one does (istio=on scenarios). +# Three testbenches, covering every shape of mesh participation: +# valkey-testbench — never gets an Envoy sidecar (istio=off +# scenarios, or when Istio isn't installed at +# all). Opts out of both sidecar injection +# and ambient capture. +# valkey-testbench-injected — sidecar-injected (istio=on, mode=sidecar). +# valkey-testbench-ambient — ambient-enrolled (istio=on, mode=ambient): +# no sidecar, ztunnel captures its traffic so +# it presents the expected SPIFFE identity to +# Valkey pods' AuthorizationPolicy. TESTBENCH_POD=valkey-testbench TESTBENCH_POD_INJECTED=valkey-testbench-injected +TESTBENCH_POD_AMBIENT=valkey-testbench-ambient # Deliberately hostile: spaces, shell metacharacters ($, `, &, !), a backslash, # and a double-quote. Every auth=on scenario then exercises both layers of # quoting on the chart side: @@ -50,3 +59,10 @@ wait_for_testbench() { istio_installed() { kubectl --context="${KUBE_CONTEXT}" get namespace "${ISTIO_NAMESPACE}" >/dev/null 2>&1 } + +# Whether the cluster has Istio's ambient data plane (ztunnel DaemonSet) +# installed. Scenarios that require ambient exit-skip if this returns false. +istio_ambient_installed() { + kubectl --context="${KUBE_CONTEXT}" -n "${ISTIO_NAMESPACE}" \ + get daemonset ztunnel >/dev/null 2>&1 +} diff --git a/functional-tests/run-all.sh b/functional-tests/run-all.sh index 56c7acbf..eac0137d 100755 --- a/functional-tests/run-all.sh +++ b/functional-tests/run-all.sh @@ -71,4 +71,7 @@ fi # wouldn't match anyway and running them would be surprising. if [[ -z ${FILTER:-} ]]; then "${HERE}/run-extra-scenarios.sh" + # Ambient-mesh regressions. Self-skipping when ztunnel isn't installed + # (e.g. against an older cluster with only the `demo` profile). + "${HERE}/run-ambient-scenarios.sh" fi diff --git a/functional-tests/run-ambient-scenarios.sh b/functional-tests/run-ambient-scenarios.sh new file mode 100755 index 00000000..9edd9ec8 --- /dev/null +++ b/functional-tests/run-ambient-scenarios.sh @@ -0,0 +1,342 @@ +#!/usr/bin/env bash +# Ambient-mesh regressions. Mirrors the core sidecar scenarios but flips +# istio.mode=ambient so ztunnel — not Envoy — carries the Valkey pod traffic. +# +# Rather than expanding the 32-scenario matrix to 96 (sidecar × ambient × on), +# this file concentrates on what's actually different in ambient: +# 1) Pods have no sidecar but still speak mTLS (via ztunnel HBONE). +# 2) DestinationRule is intentionally absent. +# 3) AuthorizationPolicy at L4 (ztunnel) scopes the cluster-bus port to +# same-release SPIFFE principals, preventing cross-release CLUSTER MEET. +# 4) No traffic.sidecar.istio.io/excludePorts annotations — they're +# sidecar-only and must not leak into the rendered pods. +# +# The sidecar matrix in run-all.sh already covers TLS/auth/shard/rep combos. +# Ambient is meaningful around the data-plane shape, so we sample one +# standalone, one replica, one cluster scenario — each with auth+TLS on to +# exercise the full ACL and mTLS paths. + +HERE=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=lib.sh +. "${HERE}/lib.sh" + +if ! istio_ambient_installed; then + log "Skipping ambient scenarios — ztunnel not installed" + exit 0 +fi + +RESULTS=() +pass() { RESULTS+=("PASS: $1"); } +fail() { RESULTS+=("FAIL: $1: $2"); return 1; } + +cleanup_release() { + hctl uninstall "${RELEASE}" 2>/dev/null || true + kctl delete pvc --selector="app.kubernetes.io/instance=${RELEASE}" --ignore-not-found >/dev/null +} + +testbench_ambient_exec() { + testbench_exec_in "${TESTBENCH_POD_AMBIENT}" "$@" +} + +# Assert the Valkey pod has NO Envoy sidecar (ambient-mode proof). +assert_no_sidecar() { + local pod=$1 name=$2 + if kctl get pod "${pod}" \ + -o jsonpath='{.spec.containers[*].name} {.spec.initContainers[*].name}' \ + | tr ' ' '\n' | grep -Fxq istio-proxy; then + fail "${name}" "pod ${pod} has an istio-proxy container in ambient mode" + return 1 + fi + return 0 +} + +# Assert the Valkey pod carries the ambient data-plane label. +assert_ambient_label() { + local pod=$1 name=$2 mode + mode=$(kctl get pod "${pod}" \ + -o jsonpath='{.metadata.labels.istio\.io/dataplane-mode}') + if [[ ${mode} != ambient ]]; then + fail "${name}" "pod ${pod} has istio.io/dataplane-mode=${mode:-}, want ambient" + return 1 + fi + return 0 +} + +# --------------------------------------------------------------------------- +# Scenario 1: standalone + ambient. Proves the basic ambient path. +# --------------------------------------------------------------------------- +scenario_standalone_ambient() { + local name="ambient: standalone pings via ztunnel mTLS" + log "SCENARIO: ${name}" + cleanup_release + + if ! hctl install "${RELEASE}" "${CHART_DIR}" \ + --set=istio.enabled=true \ + --set=istio.mode=ambient \ + --wait --timeout=180s >/dev/null; then + fail "${name}" "helm install failed"; return + fi + + local pod + pod=$(kctl get pod -l "app.kubernetes.io/instance=${RELEASE}" \ + -o jsonpath='{.items[0].metadata.name}') + + assert_no_sidecar "${pod}" "${name}" || return + assert_ambient_label "${pod}" "${name}" || return + + # DestinationRule must NOT be rendered in ambient mode. + if kctl get destinationrule "${RELEASE}" >/dev/null 2>&1; then + fail "${name}" "DestinationRule/${RELEASE} must not exist in ambient mode" + return + fi + + # PeerAuthentication must be present (enforced by ztunnel). + if ! kctl get peerauthentication "${RELEASE}" >/dev/null 2>&1; then + fail "${name}" "PeerAuthentication/${RELEASE} missing" + return + fi + + # Connectivity from the ambient-enrolled testbench. + local pong + pong=$(testbench_ambient_exec \ + valkey-cli -h "valkey.${NAMESPACE}.svc.cluster.local" ping | tr -d '\r\n') + if [[ ${pong} != PONG ]]; then + fail "${name}" "expected PONG, got '${pong}'"; return + fi + + cleanup_release + pass "${name}" +} + +# --------------------------------------------------------------------------- +# Scenario 2: cluster + ambient. Exercises the multi-pod case, the +# AuthorizationPolicy gate on the bus port, and the absence of the +# sidecar-specific exclude* annotations on the StatefulSet. +# --------------------------------------------------------------------------- +scenario_cluster_ambient() { + local name="ambient: cluster mode converges with AuthorizationPolicy gating bus port" + log "SCENARIO: ${name}" + cleanup_release + + if ! hctl install "${RELEASE}" "${CHART_DIR}" \ + --set=istio.enabled=true \ + --set=istio.mode=ambient \ + --set=cluster.enabled=true \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=0 \ + --set=cluster.persistence.size=100Mi \ + --wait --timeout=300s >/dev/null; then + fail "${name}" "helm install failed"; return + fi + kctl wait --for=condition=complete "job/${RELEASE}-cluster-init" --timeout=300s >/dev/null + + local pod + pod=$(kctl get pod -l "app.kubernetes.io/instance=${RELEASE}" \ + -o jsonpath='{.items[0].metadata.name}') + + assert_no_sidecar "${pod}" "${name}" || return + assert_ambient_label "${pod}" "${name}" || return + + # StatefulSet must NOT carry the sidecar-only exclude* annotations — if + # it does, the intent/reality have drifted (ambient has no Envoy to + # exclude ports from, and these leak would-be sidecar coupling into the + # ambient path). + local excl + excl=$(kctl get statefulset "${RELEASE}" \ + -o jsonpath='{.spec.template.metadata.annotations.traffic\.sidecar\.istio\.io/excludeInboundPorts}') + if [[ -n ${excl} ]]; then + fail "${name}" "traffic.sidecar.istio.io/excludeInboundPorts=${excl} leaked into ambient pod" + return + fi + + # AuthorizationPolicy must be present and scoped to the release principal. + local principals + principals=$(kctl get authorizationpolicy "${RELEASE}-cluster-bus" \ + -o jsonpath='{.spec.rules[0].from[0].source.principals[*]}' 2>/dev/null) + if [[ ${principals} != *"/sa/${RELEASE}"* ]]; then + fail "${name}" "AuthorizationPolicy principals=${principals} (want .../sa/${RELEASE}*)" + return + fi + + # Cluster must converge. + local state + for _ in $(seq 1 30); do + state=$(testbench_ambient_exec \ + valkey-cli -h "valkey.${NAMESPACE}.svc.cluster.local" \ + cluster info 2>/dev/null | awk -F: '/^cluster_state:/{print $2}' | tr -d '\r\n') + [[ ${state} == ok ]] && break + sleep 2 + done + if [[ ${state} != ok ]]; then + fail "${name}" "cluster_state=${state:-}, want ok"; return + fi + + cleanup_release + pass "${name}" +} + +# --------------------------------------------------------------------------- +# Scenario 3: auth + TLS + cluster + ambient. End-to-end coverage of the +# app-level crypto (TLS) + ACL auth paths running INSIDE ztunnel's HBONE +# mTLS wrapper. If any of these layers fight, this scenario catches it. +# --------------------------------------------------------------------------- +scenario_cluster_ambient_tls_auth() { + local name="ambient: cluster+auth+TLS works end-to-end through ztunnel" + log "SCENARIO: ${name}" + cleanup_release + + if ! hctl install "${RELEASE}" "${CHART_DIR}" \ + --set=istio.enabled=true \ + --set=istio.mode=ambient \ + --set=tls.enabled=true \ + --set=tls.existingSecret="${TLS_SECRET}" \ + --set=auth.enabled=true \ + --set=auth.usersExistingSecret="${AUTH_SECRET}" \ + --set=auth.aclUsers.default.permissions='~* &* +@all' \ + --set=cluster.enabled=true \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=0 \ + --set=cluster.persistence.size=100Mi \ + --wait --timeout=300s >/dev/null; then + fail "${name}" "helm install failed"; return + fi + kctl wait --for=condition=complete "job/${RELEASE}-cluster-init" --timeout=300s >/dev/null + + # Positive check: authenticated TLS client converges. + local state + for _ in $(seq 1 30); do + state=$(testbench_ambient_exec valkey-cli \ + -h "valkey.${NAMESPACE}.svc.cluster.local" \ + --no-auth-warning \ + -a "${AUTH_PASSWORD}" \ + --tls --cacert /tls/ca.crt \ + cluster info 2>/dev/null | awk -F: '/^cluster_state:/{print $2}' | tr -d '\r\n') + [[ ${state} == ok ]] && break + sleep 2 + done + if [[ ${state} != ok ]]; then + fail "${name}" "cluster_state=${state:-}, want ok"; return + fi + + # Negative: missing auth still rejected even through ztunnel. + local out rc + set +e + out=$(testbench_ambient_exec valkey-cli \ + -h "valkey.${NAMESPACE}.svc.cluster.local" \ + --no-auth-warning --tls --cacert /tls/ca.crt \ + cluster info 2>&1) + rc=$? + set -e + if ! grep -qi 'NOAUTH' <<<"${out}"; then + fail "${name}" "expected NOAUTH, got (rc=${rc}): ${out}"; return + fi + + cleanup_release + pass "${name}" +} + +# --------------------------------------------------------------------------- +# Scenario 4: cross-release CLUSTER MEET must be blocked by the ambient +# AuthorizationPolicy. Analogous to scenario_two_clusters_isolated in the +# sidecar extras but driven at L4 via ztunnel rather than by NetworkPolicy. +# +# We install two cluster-mode releases in the same namespace, both in +# ambient mode with the chart's Kubernetes NetworkPolicy isolation turned +# OFF (`cluster.isolation.enabled=false`) — so the ONLY thing stopping the +# merge is the AuthorizationPolicy. Then we fire a MEET from A targeting B, +# wait out the node-timeout, and assert each cluster still sees 3 nodes. +# --------------------------------------------------------------------------- +install_ambient_cluster() { + local release=$1 + hctl install "${release}" "${CHART_DIR}" \ + --set=istio.enabled=true \ + --set=istio.mode=ambient \ + --set=cluster.enabled=true \ + --set=cluster.persistence.size=100Mi \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=0 \ + --set=cluster.isolation.enabled=false \ + --wait --timeout=300s >/dev/null +} + +count_cluster_nodes_ambient() { + local release=$1 + kctl exec "${release}-0" -c "${release}" -- sh -c \ + "valkey-cli cluster nodes 2>/dev/null | awk 'NF {print \$1}' | sort -u | wc -l" \ + 2>/dev/null | tr -d '[:space:]' || echo 0 +} + +poison_meet_ambient() { + local src_release=$1 dst_release=$2 dst_ip + dst_ip=$(kctl get pod "${dst_release}-0" -o jsonpath='{.status.podIP}') + [[ -n ${dst_ip} ]] || return 1 + kctl exec "${src_release}-0" -c "${src_release}" -- \ + valkey-cli cluster meet "${dst_ip}" 6379 >/dev/null 2>&1 || true +} + +cleanup_ambient_pair() { + hctl uninstall valkey-amb-a 2>/dev/null || true + hctl uninstall valkey-amb-b 2>/dev/null || true + kctl delete pvc --selector='app.kubernetes.io/instance=valkey-amb-a' --ignore-not-found >/dev/null + kctl delete pvc --selector='app.kubernetes.io/instance=valkey-amb-b' --ignore-not-found >/dev/null +} + +scenario_ambient_authz_blocks_cross_release_meet() { + local name="ambient: AuthorizationPolicy blocks cross-release CLUSTER MEET" + log "SCENARIO: ${name}" + cleanup_ambient_pair + + if ! install_ambient_cluster valkey-amb-a; then + fail "${name}" "install of valkey-amb-a failed"; cleanup_ambient_pair; return + fi + if ! install_ambient_cluster valkey-amb-b; then + fail "${name}" "install of valkey-amb-b failed"; cleanup_ambient_pair; return + fi + kctl wait --for=condition=complete job/valkey-amb-a-cluster-init --timeout=300s >/dev/null + kctl wait --for=condition=complete job/valkey-amb-b-cluster-init --timeout=300s >/dev/null + + local a_before b_before + a_before=$(count_cluster_nodes_ambient valkey-amb-a) + b_before=$(count_cluster_nodes_ambient valkey-amb-b) + if [[ ${a_before} != 3 || ${b_before} != 3 ]]; then + fail "${name}" "baseline wrong (a=${a_before}, b=${b_before}; want 3+3)" + cleanup_ambient_pair; return + fi + + poison_meet_ambient valkey-amb-a valkey-amb-b + + # Same rationale as the sidecar-mode isolation test: after the MEET, + # `cluster nodes` on A briefly shows 4 as a handshake placeholder. The + # real signal is post-settle. Node-timeout defaults to 15s; give it + # multiple intervals. + sleep 45 + + local a_after b_after + a_after=$(count_cluster_nodes_ambient valkey-amb-a) + b_after=$(count_cluster_nodes_ambient valkey-amb-b) + if [[ ${a_after} != 3 || ${b_after} != 3 ]]; then + fail "${name}" "clusters merged despite AuthorizationPolicy (a=${a_after}, b=${b_after}; want 3+3)" + cleanup_ambient_pair; return + fi + + cleanup_ambient_pair + pass "${name}" +} + +trap 'cleanup_release; cleanup_ambient_pair' EXIT + +scenario_standalone_ambient || true +scenario_cluster_ambient || true +scenario_cluster_ambient_tls_auth || true +scenario_ambient_authz_blocks_cross_release_meet || true + +echo +log "Ambient scenario summary" +passed=0; failed=0 +for r in "${RESULTS[@]}"; do + printf ' %s\n' "${r}" + [[ ${r} == PASS:* ]] && passed=$(( passed + 1 )) || failed=$(( failed + 1 )) +done +echo +log "Ambient: ${passed} passed, ${failed} failed" +(( failed == 0 )) diff --git a/functional-tests/setup.sh b/functional-tests/setup.sh index f7ac759e..b6a65cc6 100755 --- a/functional-tests/setup.sh +++ b/functional-tests/setup.sh @@ -14,18 +14,34 @@ else kind create cluster --config "${HERE}/kind-config.yaml" --wait 120s fi -log "Installing Istio (demo profile)" +log "Installing Istio (ambient profile)" if istio_installed; then echo "istio-system namespace already exists; assuming Istio is installed" else - # `demo` gives us istiod + an ingress/egress gateway. We only need istiod, - # but the profile is the simplest path and adds no meaningful overhead. - istioctl install --context="${KUBE_CONTEXT}" --set profile=demo --skip-confirmation + # `ambient` ships istiod + the ambient data plane (istio-cni DaemonSet + # for iptables redirection, ztunnel DaemonSet for node-local HBONE + # mTLS). It also installs the sidecar injection webhook, so classic + # sidecar-mode pods still work on the same cluster — we can run both + # the sidecar matrix and the ambient regressions against one install. + istioctl install --context="${KUBE_CONTEXT}" \ + --set profile=ambient --skip-confirmation +fi + +# Wait for the ambient data plane to be live before launching testbenches. +# Without this, the first few ambient scenarios race ztunnel startup and +# the testbench gets no HBONE wrapping. +if istio_ambient_installed; then + log "Waiting for ztunnel DaemonSet to be ready" + kubectl --context="${KUBE_CONTEXT}" -n "${ISTIO_NAMESPACE}" \ + rollout status daemonset/ztunnel --timeout=180s fi log "Enabling sidecar injection on namespace ${NAMESPACE}" # Label idempotently — `kubectl label --overwrite` works whether or not the -# label exists. +# label exists. Sidecar and ambient opt-in are independent: the namespace +# carries the sidecar webhook label, and individual pods opt into ambient +# via the pod-level `istio.io/dataplane-mode` label (the Helm chart sets +# this on every Valkey pod when istio.mode=ambient). kubectl --context="${KUBE_CONTEXT}" label namespace "${NAMESPACE}" \ istio-injection=enabled --overwrite @@ -70,18 +86,39 @@ kctl create secret generic "${TLS_SECRET}" \ --from-file="ca.crt=${CERT_DIR}/valkey-ca.crt" # --------------------------------------------------------------------------- -# Testbench pods. Two flavours: -# valkey-testbench — never injected (sidecar.istio.io/inject=false) -# valkey-testbench-injected — injected, used for istio=on scenarios +# Testbench pods. Three flavours: +# valkey-testbench — never injected (sidecar.istio.io/inject=false). +# Also opts out of ambient capture so the +# default testbench is a plain pod regardless +# of mesh mode. +# valkey-testbench-injected — Envoy sidecar, used for istio=on mode=sidecar. +# valkey-testbench-ambient — ambient-enrolled (no sidecar, ztunnel-wrapped), +# used for istio=on mode=ambient. +# Each flavour is a POD-level opt-in/out so one cluster (which has both data +# planes installed by the `ambient` profile) can host all three side by side. # --------------------------------------------------------------------------- +# $1: pod name +# $2: flavour (plain|sidecar|ambient) launch_testbench() { - local pod=$1 inject=$2 overrides - local labels - if [[ ${inject} == "false" ]]; then - labels='sidecar.istio.io/inject=false' - else - labels='sidecar.istio.io/inject=true' - fi + local pod=$1 flavour=$2 overrides labels + case "${flavour}" in + plain) + # Out of both meshes: classic no-Istio behaviour for istio=off. + labels='sidecar.istio.io/inject=false,istio.io/dataplane-mode=none' + ;; + sidecar) + labels='sidecar.istio.io/inject=true' + ;; + ambient) + # Pod-level ambient opt-in. Overrides the namespace's + # istio-injection=enabled so this pod gets ztunnel, not Envoy. + labels='sidecar.istio.io/inject=false,istio.io/dataplane-mode=ambient' + ;; + *) + echo "launch_testbench: unknown flavour ${flavour}" >&2 + return 2 + ;; + esac overrides='{ "spec": { "containers": [{ @@ -106,10 +143,17 @@ launch_testbench() { wait_for_testbench "${pod}" } -log "Launching ${TESTBENCH_POD} (no sidecar)" -launch_testbench "${TESTBENCH_POD}" false +log "Launching ${TESTBENCH_POD} (no mesh)" +launch_testbench "${TESTBENCH_POD}" plain + +log "Launching ${TESTBENCH_POD_INJECTED} (Envoy sidecar)" +launch_testbench "${TESTBENCH_POD_INJECTED}" sidecar -log "Launching ${TESTBENCH_POD_INJECTED} (with Envoy sidecar)" -launch_testbench "${TESTBENCH_POD_INJECTED}" true +if istio_ambient_installed; then + log "Launching ${TESTBENCH_POD_AMBIENT} (ambient / ztunnel)" + launch_testbench "${TESTBENCH_POD_AMBIENT}" ambient +else + log "Skipping ${TESTBENCH_POD_AMBIENT} — ambient data plane not installed" +fi log "Setup complete" diff --git a/functional-tests/teardown.sh b/functional-tests/teardown.sh index f48b8e22..1349abad 100755 --- a/functional-tests/teardown.sh +++ b/functional-tests/teardown.sh @@ -22,7 +22,7 @@ if kind get clusters | grep -Fxq "${CLUSTER_NAME}"; then # Best-effort: any lingering release + PVCs. hctl uninstall "${RELEASE}" 2>/dev/null || true kctl delete pvc --selector="app.kubernetes.io/instance=${RELEASE}" --ignore-not-found - kctl delete pod "${TESTBENCH_POD}" "${TESTBENCH_POD_INJECTED}" --ignore-not-found + kctl delete pod "${TESTBENCH_POD}" "${TESTBENCH_POD_INJECTED}" "${TESTBENCH_POD_AMBIENT}" --ignore-not-found kctl delete secret "${AUTH_SECRET}" "${TLS_SECRET}" --ignore-not-found fi diff --git a/valkey/templates/_helpers.tpl b/valkey/templates/_helpers.tpl index df778284..bb622d3d 100644 --- a/valkey/templates/_helpers.tpl +++ b/valkey/templates/_helpers.tpl @@ -223,4 +223,52 @@ Calculate total number of nodes in the cluster {{- mul $shards (add 1 $replicasPerShard) -}} {{- end -}} +{{/* +Istio pod labels. Emits the label that tells Istio how to capture this pod's +traffic. Ambient requires `istio.io/dataplane-mode: ambient` on the pod (or +namespace); omitting it leaves the pod outside the mesh even when ambient is +installed cluster-wide. Sidecar mode uses the webhook-injection label unless +the namespace is already labelled `istio-injection=enabled`. + +In ambient mode we also emit `sidecar.istio.io/inject: "false"` so the pod +opts out of Envoy sidecar injection even when the namespace is labelled +`istio-injection=enabled` (a common setup when a cluster runs both data +planes side-by-side, e.g. during a sidecar→ambient migration). Without this, +injecting both a sidecar AND labelling the pod ambient produces a pod whose +traffic is redirected twice and mTLS negotiation breaks silently — the +pod's client port returns "Connection reset by peer" on every request. + +When istio.enabled is false this helper emits nothing so the user can still +set their own `sidecar.istio.io/inject=false` via podLabels (see the +functional-tests istio=off path). +*/}} +{{- define "valkey.istioPodLabels" -}} +{{- if .Values.istio.enabled -}} +{{- if eq .Values.istio.mode "ambient" }} +istio.io/dataplane-mode: ambient +sidecar.istio.io/inject: "false" +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +The valkey ServiceAccount name as an Istio SPIFFE principal. +Used by the AuthorizationPolicy to pin the cluster-bus port to same-release +pods cryptographically rather than by pod-selector IP. +*/}} +{{- define "valkey.istioPrincipal" -}} +{{- printf "cluster.local/ns/%s/sa/%s" .Release.Namespace (include "valkey.serviceAccountName" .) -}} +{{- end -}} + +{{/* +Validate istio configuration +*/}} +{{- define "valkey.validateIstioConfig" -}} +{{- if .Values.istio.enabled }} + {{- if not (or (eq .Values.istio.mode "sidecar") (eq .Values.istio.mode "ambient")) }} + {{- fail (printf "istio.mode must be 'sidecar' or 'ambient', got: %s" .Values.istio.mode) }} + {{- end }} +{{- end }} +{{- end -}} + diff --git a/valkey/templates/cluster-init-job.yaml b/valkey/templates/cluster-init-job.yaml index a4e8ab87..2fcee63b 100644 --- a/valkey/templates/cluster-init-job.yaml +++ b/valkey/templates/cluster-init-job.yaml @@ -24,6 +24,16 @@ spec: {{- with .Values.podLabels }} {{- toYaml . | nindent 8 }} {{- end }} + {{- /* + In ambient mode, the Job pod needs the dataplane-mode label so + ztunnel captures its outbound connections to the Valkey pods. Without + this, the Job speaks plaintext to pods whose PeerAuthentication + requires STRICT mTLS and the Job hangs until backoffLimit, which is + what surfaces as "Job in progress / context deadline exceeded" on + helm install. + The same label is benign in sidecar mode (ambient gated inside). + */}} + {{- include "valkey.istioPodLabels" . | nindent 8 }} {{- with .Values.podAnnotations }} annotations: {{- toYaml . | nindent 8 }} diff --git a/valkey/templates/cluster-isolation-netpol.yaml b/valkey/templates/cluster-isolation-netpol.yaml index 53fe8958..3b51f42d 100644 --- a/valkey/templates/cluster-isolation-netpol.yaml +++ b/valkey/templates/cluster-isolation-netpol.yaml @@ -36,8 +36,19 @@ cluster there is no way to prevent a cross-release CLUSTER MEET from the chart alone; either switch to a policy-enforcing CNI, add an Istio AuthorizationPolicy at layer 7, or run each Valkey cluster in its own Kubernetes cluster. + +Ambient mesh caveat: in ambient mode, ztunnel wraps all pod-to-pod traffic +in HBONE on port 15008, then unwraps it at the destination and re-delivers +to the pod-local port. A NetworkPolicy that only allows ingress on 6379 / +16379 / 9121 drops the inbound HBONE — the client port gets blocked at +the policy layer and every connection fails with "Connection reset by +peer". The chart-owned AuthorizationPolicy already provides equivalent +(and stronger, identity-based) bus-port scoping for ambient, so we skip +this NetworkPolicy entirely when istio.mode=ambient. Users who still want +a belt-and-braces IP-level NetworkPolicy in ambient can add their own via +.Values.networkPolicy (rendered by netpolicy.yaml) and include port 15008. */}} -{{- if and .Values.cluster.enabled .Values.cluster.isolation.enabled }} +{{- if and .Values.cluster.enabled .Values.cluster.isolation.enabled (not (and .Values.istio.enabled (eq .Values.istio.mode "ambient"))) }} apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: diff --git a/valkey/templates/cluster-statefulset.yaml b/valkey/templates/cluster-statefulset.yaml index 6ae63e57..9eb8d625 100644 --- a/valkey/templates/cluster-statefulset.yaml +++ b/valkey/templates/cluster-statefulset.yaml @@ -44,6 +44,7 @@ spec: {{- with .Values.podLabels }} {{- toYaml . | nindent 8 }} {{- end }} + {{- include "valkey.istioPodLabels" . | nindent 8 }} annotations: {{- with .Values.podAnnotations }} {{- toYaml . | nindent 8 }} @@ -52,7 +53,26 @@ spec: {{- if .Values.valkeyConfig }} checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum | trunc 32 | quote }} {{- end }} - {{- if .Values.istio.enabled }} + {{- /* + The cluster-bus port (16379 by default) carries raw Valkey gossip: a + binary, long-lived, bidirectional protocol that neither Envoy nor + ztunnel can proxy sensibly. The chart keeps it unproxied in both + modes, but the mechanics differ: + + sidecar — explicit: Envoy sees the port in its iptables rules, so + we emit traffic.sidecar.istio.io/exclude*Ports to take + it out. The AuthorizationPolicy (rendered separately) + does the cross-release enforcement via Envoy-terminated + mTLS on the OTHER ports. + + ambient — implicit: ztunnel only captures traffic for pods it + recognises, and the AuthorizationPolicy's ALLOW rules + only bind the client/metrics ports. That leaves the bus + port outside ztunnel's HBONE tunnel; pod-to-pod TCP on + 16379 takes the direct CNI path. No annotation needed + (they're sidecar-only). + */}} + {{- if and .Values.istio.enabled (eq .Values.istio.mode "sidecar") }} traffic.sidecar.istio.io/excludeInboundPorts: {{ .Values.cluster.busPort | quote }} traffic.sidecar.istio.io/excludeOutboundPorts: {{ .Values.cluster.busPort | quote }} {{- end }} diff --git a/valkey/templates/deploy_valkey.yaml b/valkey/templates/deploy_valkey.yaml index 08516e52..b9b041b3 100644 --- a/valkey/templates/deploy_valkey.yaml +++ b/valkey/templates/deploy_valkey.yaml @@ -30,6 +30,7 @@ spec: {{- with .Values.podLabels }} {{- toYaml . | nindent 8 }} {{- end }} + {{- include "valkey.istioPodLabels" . | nindent 8 }} annotations: {{- with .Values.podAnnotations }} {{- toYaml . | nindent 8 }} diff --git a/valkey/templates/istio-authorization-policy.yaml b/valkey/templates/istio-authorization-policy.yaml new file mode 100644 index 00000000..30bf73b0 --- /dev/null +++ b/valkey/templates/istio-authorization-policy.yaml @@ -0,0 +1,67 @@ +{{- /* +AuthorizationPolicy for the cluster-bus port. + +Valkey's CLUSTER MEET has no authentication of its own: a pod that can open +a TCP connection to a node's bus port can merge into the cluster. The chart +already ships a NetworkPolicy that pins the bus port to same-release pods by +IP (cluster-isolation-netpol.yaml), but that only works on a CNI that +enforces NetworkPolicy. + +An Istio AuthorizationPolicy is the belt-and-braces: it matches on SPIFFE +principal (the caller's ServiceAccount identity), not IP, so a pod that +spoofs its way onto the right IP range still fails the check. It also works +regardless of CNI — the enforcement point is the sidecar Envoy (sidecar +mode) or the node-local ztunnel (ambient mode), both of which terminate +mTLS and have the peer's identity. + +Rendered only in cluster mode — no bus port to protect otherwise. + +Both L4 (sidecar via Envoy, ambient via ztunnel) enforce ALLOW/DENY on +principal+port, so a single policy shape works for both modes. Ambient's +ztunnel does NOT enforce L7 rules (HTTP method, path, etc.) — those need a +waypoint — but we only need L4 here. +*/}} +{{- if and .Values.istio.enabled .Values.istio.authorizationPolicy.enabled .Values.cluster.enabled }} +{{- include "valkey.validateIstioConfig" . }} +apiVersion: security.istio.io/v1 +kind: AuthorizationPolicy +metadata: + name: {{ include "valkey.fullname" . }}-cluster-bus + labels: + {{- include "valkey.labels" . | nindent 4 }} + {{- with .Values.istio.authorizationPolicy.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.istio.authorizationPolicy.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + matchLabels: + {{- include "valkey.selectorLabels" . | nindent 6 }} + action: ALLOW + rules: + # Same-release pods (identified by SPIFFE principal) may reach the bus port. + - from: + - source: + principals: + - {{ include "valkey.istioPrincipal" . | quote }} + to: + - operation: + ports: + - {{ .Values.cluster.busPort | quote }} + # Client and metrics ports stay wide open at this layer — they have + # their own auth (ACL, TLS) above the mesh. A separate deny rule on the + # bus port is unnecessary: this policy is ALLOW-only, and because at + # least one AuthorizationPolicy now targets these pods, Istio applies + # default-deny to anything not matched — i.e. the bus port for + # non-same-release principals. + - to: + - operation: + ports: + - {{ .Values.service.port | quote }} + {{- if .Values.metrics.enabled }} + - {{ .Values.metrics.exporter.port | quote }} + {{- end }} +{{- end }} diff --git a/valkey/templates/istio-destination-rule.yaml b/valkey/templates/istio-destination-rule.yaml index 19bc74b7..88e7f6f2 100644 --- a/valkey/templates/istio-destination-rule.yaml +++ b/valkey/templates/istio-destination-rule.yaml @@ -1,4 +1,13 @@ -{{- if .Values.istio.enabled }} +{{- /* +DestinationRule wraps outbound connections in ISTIO_MUTUAL mTLS. This is a +sidecar-mode concept — an outbound Envoy sees the DR and upgrades the TLS. +In ambient mode the ztunnel already wraps every pod-to-pod hop in HBONE mTLS +transparently, so a DR on top would layer a second mTLS (Envoy-in-ztunnel) +— double crypto for no security gain, and it requires a waypoint proxy to +even take effect. Skip it. +*/}} +{{- if and .Values.istio.enabled (eq .Values.istio.mode "sidecar") }} +{{- include "valkey.validateIstioConfig" . }} apiVersion: networking.istio.io/v1 kind: DestinationRule metadata: diff --git a/valkey/templates/istio-peer-authentication.yaml b/valkey/templates/istio-peer-authentication.yaml index 83d468bd..d04670f3 100644 --- a/valkey/templates/istio-peer-authentication.yaml +++ b/valkey/templates/istio-peer-authentication.yaml @@ -1,4 +1,9 @@ +{{- /* +PeerAuthentication applies in both sidecar and ambient mode — Envoy enforces +in sidecar, ztunnel enforces in ambient. The CRD shape is the same for both. +*/}} {{- if .Values.istio.enabled }} +{{- include "valkey.validateIstioConfig" . }} apiVersion: security.istio.io/v1 kind: PeerAuthentication metadata: diff --git a/valkey/templates/statefulset.yaml b/valkey/templates/statefulset.yaml index b5cb0e15..81979de2 100644 --- a/valkey/templates/statefulset.yaml +++ b/valkey/templates/statefulset.yaml @@ -44,6 +44,7 @@ spec: {{- with .Values.podLabels }} {{- toYaml . | nindent 8 }} {{- end }} + {{- include "valkey.istioPodLabels" . | nindent 8 }} annotations: {{- with .Values.podAnnotations }} {{- toYaml . | nindent 8 }} diff --git a/valkey/tests/cluster_isolation_netpol_test.yaml b/valkey/tests/cluster_isolation_netpol_test.yaml index 19de6b1d..52aa8903 100644 --- a/valkey/tests/cluster_isolation_netpol_test.yaml +++ b/valkey/tests/cluster_isolation_netpol_test.yaml @@ -125,3 +125,43 @@ tests: - protocol: TCP port: 9121 + # --- Istio ambient mesh interaction --- + # In ambient mode, ztunnel wraps all pod-to-pod hops in HBONE (port 15008) + # then re-delivers to the pod-local port. A NetworkPolicy that only lists + # 6379/16379/9121 drops the inbound HBONE and every connection breaks with + # "Connection reset by peer". The chart-owned AuthorizationPolicy gives + # equivalent (and cryptographically stronger) isolation at the ztunnel + # layer, so we render NO NetworkPolicy when ambient is on. Bus-port + # protection still exists — just at a different layer. + - it: should not render in ambient mode + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.enabled: true + istio.mode: ambient + asserts: + - hasDocuments: + count: 0 + + - it: should still render in sidecar mode (Envoy's iptables capture is + per-pod and leaves the chart's pod-selector-based netpol correct) + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.enabled: true + istio.mode: sidecar + asserts: + - hasDocuments: + count: 1 + - isKind: + of: NetworkPolicy + + - it: should still render when istio is disabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.enabled: false + asserts: + - hasDocuments: + count: 1 + diff --git a/valkey/tests/cluster_test.yaml b/valkey/tests/cluster_test.yaml index 485cc673..2ecb828e 100644 --- a/valkey/tests/cluster_test.yaml +++ b/valkey/tests/cluster_test.yaml @@ -979,3 +979,58 @@ tests: name: RELEASE-NAME-valkey-auth key: default-password + # --- Istio ambient mode (cluster) --- + - it: should add ambient dataplane-mode label when istio.mode=ambient + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.enabled: true + istio.mode: ambient + template: templates/cluster-statefulset.yaml + asserts: + - equal: + path: spec.template.metadata.labels["istio.io/dataplane-mode"] + value: ambient + + - it: should NOT emit traffic.sidecar.istio.io exclude annotations in ambient mode + # Ambient has no pod-local Envoy; the exclude* annotations are sidecar-only + # and meaningless to ztunnel. ztunnel leaves the bus port unproxied by + # default (nothing in the AuthorizationPolicy's ALLOW set binds it). + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.enabled: true + istio.mode: ambient + template: templates/cluster-statefulset.yaml + asserts: + - notExists: + path: spec.template.metadata.annotations["traffic.sidecar.istio.io/excludeInboundPorts"] + - notExists: + path: spec.template.metadata.annotations["traffic.sidecar.istio.io/excludeOutboundPorts"] + + - it: should emit traffic.sidecar.istio.io exclude annotations in sidecar mode + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.enabled: true + istio.mode: sidecar + cluster.busPort: 16379 + template: templates/cluster-statefulset.yaml + asserts: + - equal: + path: spec.template.metadata.annotations["traffic.sidecar.istio.io/excludeInboundPorts"] + value: "16379" + - equal: + path: spec.template.metadata.annotations["traffic.sidecar.istio.io/excludeOutboundPorts"] + value: "16379" + + - it: should NOT emit traffic.sidecar.istio.io annotations when istio is disabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.enabled: false + template: templates/cluster-statefulset.yaml + asserts: + - notExists: + path: spec.template.metadata.annotations["traffic.sidecar.istio.io/excludeInboundPorts"] + diff --git a/valkey/tests/deployment_test.yaml b/valkey/tests/deployment_test.yaml index 5b4346c0..bd98be7d 100644 --- a/valkey/tests/deployment_test.yaml +++ b/valkey/tests/deployment_test.yaml @@ -511,3 +511,55 @@ tests: asserts: - exists: path: spec.template.spec.containers[0].readinessProbe + + # --- Istio ambient mode --- + - it: should add ambient dataplane-mode label when istio.mode=ambient + set: + istio.enabled: true + istio.mode: ambient + template: templates/deploy_valkey.yaml + asserts: + - equal: + path: spec.template.metadata.labels["istio.io/dataplane-mode"] + value: ambient + + - it: should NOT add ambient dataplane-mode label when istio.mode=sidecar + set: + istio.enabled: true + istio.mode: sidecar + template: templates/deploy_valkey.yaml + asserts: + - notExists: + path: spec.template.metadata.labels["istio.io/dataplane-mode"] + + # Namespaces labelled `istio-injection=enabled` would otherwise inject an + # Envoy sidecar AND the ambient capture label would direct traffic to + # ztunnel — the pod then gets double-redirected and every connection + # fails with "Connection reset by peer". Opting the pod out of injection + # explicitly is the only reliable way to make ambient work in that setup. + - it: should set sidecar.istio.io/inject=false in ambient mode + set: + istio.enabled: true + istio.mode: ambient + template: templates/deploy_valkey.yaml + asserts: + - equal: + path: spec.template.metadata.labels["sidecar.istio.io/inject"] + value: "false" + + - it: should NOT force sidecar.istio.io/inject=false in sidecar mode + set: + istio.enabled: true + istio.mode: sidecar + template: templates/deploy_valkey.yaml + asserts: + - notExists: + path: spec.template.metadata.labels["sidecar.istio.io/inject"] + + - it: should NOT add ambient dataplane-mode label when istio is disabled + set: + istio.enabled: false + template: templates/deploy_valkey.yaml + asserts: + - notExists: + path: spec.template.metadata.labels["istio.io/dataplane-mode"] diff --git a/valkey/tests/istio_authorization_policy_test.yaml b/valkey/tests/istio_authorization_policy_test.yaml new file mode 100644 index 00000000..e12faa39 --- /dev/null +++ b/valkey/tests/istio_authorization_policy_test.yaml @@ -0,0 +1,291 @@ +suite: istio authorization policy (cluster-bus isolation) +templates: + - templates/istio-authorization-policy.yaml +tests: + # --- Feature flag tests --- + - it: should not render when istio is disabled + set: + istio.enabled: false + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - hasDocuments: + count: 0 + + - it: should not render when cluster mode is disabled + # No bus port to protect in standalone/replica mode. + set: + istio.enabled: true + asserts: + - hasDocuments: + count: 0 + + - it: should not render in replica (non-cluster) mode + set: + istio.enabled: true + replica.enabled: true + replica.persistence.size: "5Gi" + asserts: + - hasDocuments: + count: 0 + + - it: should not render when authorizationPolicy is explicitly disabled + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.authorizationPolicy.enabled: false + asserts: + - hasDocuments: + count: 0 + + - it: should render in cluster mode with istio enabled (sidecar default) + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - hasDocuments: + count: 1 + - isKind: + of: AuthorizationPolicy + - isAPIVersion: + of: security.istio.io/v1 + + - it: should render in cluster mode with istio enabled (ambient) + # Ambient ztunnel enforces the same L4 AuthorizationPolicy shape. + set: + istio.enabled: true + istio.mode: ambient + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - hasDocuments: + count: 1 + - isKind: + of: AuthorizationPolicy + + # --- Resource identity --- + - it: should have correct name + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - equal: + path: metadata.name + value: RELEASE-NAME-valkey-cluster-bus + + - it: should use fullnameOverride in name + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + fullnameOverride: "my-valkey" + asserts: + - equal: + path: metadata.name + value: my-valkey-cluster-bus + + - it: should include chart labels + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - isNotNull: + path: metadata.labels["helm.sh/chart"] + - isNotNull: + path: metadata.labels["app.kubernetes.io/name"] + - isNotNull: + path: metadata.labels["app.kubernetes.io/managed-by"] + + - it: should include commonLabels + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + commonLabels: + env: production + asserts: + - equal: + path: metadata.labels.env + value: production + + - it: should include custom labels on the policy + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.authorizationPolicy.labels: + security.example.com/reviewed: "true" + asserts: + - equal: + path: metadata.labels["security.example.com/reviewed"] + value: "true" + + - it: should include custom annotations on the policy + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.authorizationPolicy.annotations: + security.example.com/reviewed: "yes" + asserts: + - equal: + path: metadata.annotations["security.example.com/reviewed"] + value: "yes" + + - it: should not have annotations when none are set + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - notExists: + path: metadata.annotations + + # --- Selector and action --- + - it: should target Valkey pods via selector labels + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - equal: + path: spec.selector.matchLabels["app.kubernetes.io/name"] + value: valkey + - equal: + path: spec.selector.matchLabels["app.kubernetes.io/instance"] + value: RELEASE-NAME + + - it: should be an ALLOW policy + # An ALLOW AuthorizationPolicy attached to these pods triggers Istio's + # implicit default-deny: anything not matched by a rule is blocked. + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - equal: + path: spec.action + value: ALLOW + + # --- Bus-port principal rule --- + - it: bus-port rule must be scoped to the release's SPIFFE principal + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.busPort: 16379 + asserts: + - contains: + path: spec.rules + content: + from: + - source: + principals: + - "cluster.local/ns/NAMESPACE/sa/RELEASE-NAME-valkey" + to: + - operation: + ports: + - "16379" + + - it: bus-port rule should follow custom busPort + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.busPort: 26379 + asserts: + - contains: + path: spec.rules + content: + from: + - source: + principals: + - "cluster.local/ns/NAMESPACE/sa/RELEASE-NAME-valkey" + to: + - operation: + ports: + - "26379" + + - it: bus-port rule should follow custom serviceAccount name + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + serviceAccount.name: "my-sa" + asserts: + - contains: + path: spec.rules + content: + from: + - source: + principals: + - "cluster.local/ns/NAMESPACE/sa/my-sa" + to: + - operation: + ports: + - "16379" + + # --- Client / metrics open rules --- + - it: client port should be open (no principal restriction) + # ACL/TLS live above the mesh; locking the client port to the release's + # own principal would lock out every legitimate caller. + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - contains: + path: spec.rules + content: + to: + - operation: + ports: + - "6379" + + - it: metrics port should be in the open rule when metrics enabled + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + metrics.enabled: true + metrics.exporter.port: 9121 + asserts: + - contains: + path: spec.rules + content: + to: + - operation: + ports: + - "6379" + - "9121" + + - it: metrics port should NOT appear when metrics disabled + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + metrics.enabled: false + asserts: + - notContains: + path: spec.rules + content: + to: + - operation: + ports: + - "6379" + - "9121" + + # --- Invalid mode --- + - it: should fail when istio.mode is invalid + set: + istio.enabled: true + istio.mode: "not-a-real-mode" + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - failedTemplate: + errorPattern: "istio.mode must be 'sidecar' or 'ambient'.*" diff --git a/valkey/tests/istio_test.yaml b/valkey/tests/istio_test.yaml index 3d081fb4..24a045e6 100644 --- a/valkey/tests/istio_test.yaml +++ b/valkey/tests/istio_test.yaml @@ -445,3 +445,99 @@ tests: - equal: path: spec.host value: my-valkey-headless.NAMESPACE.svc.cluster.local + + # --- Mode validation --- + - it: should fail with a helpful error when istio.mode is invalid + set: + istio.enabled: true + istio.mode: waypoint # typo — not a real mode + template: templates/istio-peer-authentication.yaml + asserts: + - failedTemplate: + errorPattern: "istio.mode must be 'sidecar' or 'ambient'.*" + + - it: should accept istio.mode=sidecar + set: + istio.enabled: true + istio.mode: sidecar + template: templates/istio-peer-authentication.yaml + asserts: + - hasDocuments: + count: 1 + + - it: should accept istio.mode=ambient + set: + istio.enabled: true + istio.mode: ambient + template: templates/istio-peer-authentication.yaml + asserts: + - hasDocuments: + count: 1 + + # --- Ambient mode: DestinationRule must NOT render --- + # ztunnel already wraps pod-to-pod hops in HBONE mTLS; a DR on top would + # layer Envoy mTLS inside ztunnel mTLS (double crypto) and requires a + # waypoint proxy to even take effect. Keep it off. + - it: should not render DestinationRule in ambient mode (standalone) + set: + istio.enabled: true + istio.mode: ambient + template: templates/istio-destination-rule.yaml + asserts: + - hasDocuments: + count: 0 + + - it: should not render DestinationRule in ambient mode (cluster) + set: + istio.enabled: true + istio.mode: ambient + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/istio-destination-rule.yaml + asserts: + - hasDocuments: + count: 0 + + - it: should not render DestinationRule in ambient mode (replica) + set: + istio.enabled: true + istio.mode: ambient + replica.enabled: true + replica.persistence.size: "5Gi" + template: templates/istio-destination-rule.yaml + asserts: + - hasDocuments: + count: 0 + + - it: should render DestinationRule in sidecar mode (default) + set: + istio.enabled: true + template: templates/istio-destination-rule.yaml + asserts: + - hasDocuments: + count: 1 + + # --- Ambient mode: PeerAuthentication still applies --- + # Enforced by ztunnel instead of Envoy, but the CRD shape is identical. + - it: should render PeerAuthentication in ambient mode + set: + istio.enabled: true + istio.mode: ambient + template: templates/istio-peer-authentication.yaml + asserts: + - hasDocuments: + count: 1 + - equal: + path: spec.mtls.mode + value: STRICT + + - it: PeerAuthentication should be identical across modes + set: + istio.enabled: true + istio.mode: ambient + istio.peerAuthentication.mode: PERMISSIVE + template: templates/istio-peer-authentication.yaml + asserts: + - equal: + path: spec.mtls.mode + value: PERMISSIVE diff --git a/valkey/tests/statefulset_test.yaml b/valkey/tests/statefulset_test.yaml index fb293673..63b128b3 100644 --- a/valkey/tests/statefulset_test.yaml +++ b/valkey/tests/statefulset_test.yaml @@ -402,3 +402,27 @@ tests: asserts: - exists: path: spec.template.spec.containers[0].readinessProbe + + # --- Istio ambient mode --- + - it: should add ambient dataplane-mode label when istio.mode=ambient + set: + replica.enabled: true + replica.persistence.size: "5Gi" + istio.enabled: true + istio.mode: ambient + template: templates/statefulset.yaml + asserts: + - equal: + path: spec.template.metadata.labels["istio.io/dataplane-mode"] + value: ambient + + - it: should NOT add ambient dataplane-mode label when istio.mode=sidecar + set: + replica.enabled: true + replica.persistence.size: "5Gi" + istio.enabled: true + istio.mode: sidecar + template: templates/statefulset.yaml + asserts: + - notExists: + path: spec.template.metadata.labels["istio.io/dataplane-mode"] diff --git a/valkey/values.yaml b/valkey/values.yaml index b5ee6dd9..68ca53d4 100644 --- a/valkey/values.yaml +++ b/valkey/values.yaml @@ -349,7 +349,31 @@ istio: # Enable Istio enabled: false - # PeerAuthentication controls mTLS enforcement on inbound connections + # Data-plane mode. Two very different shapes: + # + # sidecar — Classic Istio. An Envoy sidecar is injected into every pod. + # Requires the namespace/pod to be labelled for injection + # (`istio-injection=enabled` or `sidecar.istio.io/inject=true`). + # The chart adds `traffic.sidecar.istio.io/exclude*Ports` so + # the cluster-bus port bypasses Envoy (gossip needs raw TCP). + # + # ambient — Istio Ambient Mesh. No sidecar; a node-local ztunnel wraps + # pod traffic in HBONE mTLS transparently. The chart adds + # `istio.io/dataplane-mode: ambient` to every workload pod so + # ztunnel captures their traffic (also works when the whole + # namespace is labelled; the pod label is additive and lets + # operators opt in per-release). The DestinationRule that + # wraps outbound connections in ISTIO_MUTUAL is skipped — + # ztunnel already handles mTLS, and a DR would layer a second + # Envoy mTLS on top, doubling crypto overhead. + # + # Both modes still render the PeerAuthentication (enforced by ztunnel in + # ambient, by Envoy in sidecar) and the chart's AuthorizationPolicy that + # pins the cluster-bus port to same-release principals. + mode: sidecar # @schema enum:[sidecar,ambient] + + # PeerAuthentication controls mTLS enforcement on inbound connections. + # Applies to both sidecar and ambient modes. peerAuthentication: # mTLS mode for inbound traffic (STRICT, PERMISSIVE, DISABLE, UNSET) # STRICT: Require mTLS on all ports @@ -360,7 +384,9 @@ istio: # Additional annotations for the PeerAuthentication resource annotations: {} - # DestinationRule configures mTLS for outbound connections to Valkey services + # DestinationRule configures mTLS for outbound connections to Valkey services. + # Rendered only when istio.mode is "sidecar" — in ambient mode, ztunnel + # handles mTLS transparently and a DR would double-encrypt. destinationRule: # TLS mode for outbound traffic (DISABLE, SIMPLE, MUTUAL, ISTIO_MUTUAL) # ISTIO_MUTUAL: Use Istio-managed certificates for mTLS. @@ -376,6 +402,25 @@ istio: # Additional annotations for the DestinationRule resource annotations: {} + # AuthorizationPolicy restricts the cluster-bus port to same-release + # principals. In sidecar mode, Envoy enforces; in ambient mode, ztunnel + # enforces. Unlike the NetworkPolicy (IP-based, requires a policy-enforcing + # CNI), this is cryptographic — it requires the caller to hold a SPIFFE + # identity matching the Valkey pods' ServiceAccount, so a pod in a different + # release cannot forge the check even if it can open a TCP connection. + # + # Only active in cluster mode (no bus port otherwise). + authorizationPolicy: + # Enable the chart-owned AuthorizationPolicy. + # Defaults to true when istio.enabled is true — the whole point of wiring + # Istio in is to get mesh-level enforcement, and this is the piece that + # prevents cross-release CLUSTER MEET attacks at L4. + enabled: true + # Additional labels for the AuthorizationPolicy resource + labels: {} + # Additional annotations for the AuthorizationPolicy resource + annotations: {} + # Node selector for pod assignment nodeSelector: {} From f86c2888990cb1b8853240dad24b0bb5ba3ae302 Mon Sep 17 00:00:00 2001 From: Ankit Pati Date: Sun, 3 May 2026 02:19:13 +0530 Subject: [PATCH 12/23] fix: multiple Ambient Mesh issues identified in testing Signed-off-by: Ankit Pati --- functional-tests/run-ambient-scenarios.sh | 189 +++++++++++++++++- functional-tests/run-extra-scenarios.sh | 5 - functional-tests/run-scenario.sh | 11 +- functional-tests/setup.sh | 54 ++--- valkey/templates/_helpers.tpl | 108 ++++++++-- valkey/templates/cluster-init-job.yaml | 23 +-- valkey/templates/cluster-statefulset.yaml | 17 +- valkey/templates/deploy_valkey.yaml | 17 +- .../templates/istio-authorization-policy.yaml | 15 ++ valkey/templates/statefulset.yaml | 17 +- valkey/tests/cluster_test.yaml | 32 +++ valkey/tests/deployment_test.yaml | 104 +++++++++- .../istio_authorization_policy_test.yaml | 140 ++++++++++++- valkey/tests/istio_test.yaml | 101 +++++++++- valkey/tests/statefulset_test.yaml | 11 +- valkey/values.schema.json | 24 +++ valkey/values.yaml | 19 ++ 17 files changed, 783 insertions(+), 104 deletions(-) diff --git a/functional-tests/run-ambient-scenarios.sh b/functional-tests/run-ambient-scenarios.sh index 9edd9ec8..6b53505c 100755 --- a/functional-tests/run-ambient-scenarios.sh +++ b/functional-tests/run-ambient-scenarios.sh @@ -323,12 +323,195 @@ scenario_ambient_authz_blocks_cross_release_meet() { pass "${name}" } +# --------------------------------------------------------------------------- +# Scenario 5: the chart must refuse to install in ambient+cluster mode when +# AuthorizationPolicy is explicitly disabled — dropping it leaves the bus +# port with NO cross-release protection (the chart also skips the +# NetworkPolicy in ambient mode to avoid blocking HBONE). We proved live +# during review that this silently ships an open bus port; the fix is to +# fail closed at install time. +# --------------------------------------------------------------------------- +scenario_ambient_ap_disabled_refused() { + local name="ambient: chart refuses install when authorizationPolicy.enabled=false + cluster" + log "SCENARIO: ${name}" + cleanup_release + + local out rc + set +e + out=$(hctl install "${RELEASE}" "${CHART_DIR}" \ + --set=istio.enabled=true \ + --set=istio.mode=ambient \ + --set=cluster.enabled=true \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=0 \ + --set=cluster.persistence.size=100Mi \ + --set=istio.authorizationPolicy.enabled=false \ + --dry-run 2>&1) + rc=$? + set -e + + if (( rc == 0 )); then + fail "${name}" "dry-run succeeded but should have failed: ${out}" + return + fi + if ! grep -q 'cluster-bus port unprotected' <<<"${out}"; then + fail "${name}" "got error without the expected message (rc=${rc}): ${out}" + return + fi + pass "${name}" +} + +# --------------------------------------------------------------------------- +# Scenario 6: the chart must refuse to install when ambient + cluster + +# serviceAccount.create=false (with no explicit name), because every release +# collapses to the namespace's `default` SA and the AP can no longer +# distinguish between them. Live-repro'd in review: two releases merged +# despite both having the AP rendered. The fix is to fail closed at install +# time and force the user to pick a distinct SA name (or let the chart +# create one). +# --------------------------------------------------------------------------- +scenario_ambient_shared_default_sa_refused() { + local name="ambient: chart refuses install when serviceAccount defaults to namespace-wide 'default'" + log "SCENARIO: ${name}" + cleanup_release + + local out rc + set +e + out=$(hctl install "${RELEASE}" "${CHART_DIR}" \ + --set=istio.enabled=true \ + --set=istio.mode=ambient \ + --set=cluster.enabled=true \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=0 \ + --set=cluster.persistence.size=100Mi \ + --set=serviceAccount.create=false \ + --dry-run 2>&1) + rc=$? + set -e + + if (( rc == 0 )); then + fail "${name}" "dry-run succeeded but should have failed: ${out}" + return + fi + if ! grep -q "serviceAccount.create=false AND serviceAccount.name empty" <<<"${out}"; then + fail "${name}" "got error without the expected message (rc=${rc}): ${out}" + return + fi + pass "${name}" +} + +# --------------------------------------------------------------------------- +# Scenario 7: custom trustDomain must propagate into the AuthorizationPolicy +# principal. A cluster with `istio.trustDomain=my.mesh.example.com` whose AP +# still emits `cluster.local/…` would self-deny: same-release callers +# present an identity under the CUSTOM trust domain but the AP's ALLOW rule +# only matches the hardcoded one — the cluster-bus port defaults-denies +# even for its own pods and the cluster never forms. +# We install with the chart's default (cluster.local) but prove the RENDER +# honours the override. Testing the failure mode in-cluster would require +# reconfiguring Istio's trust domain, which isn't a chart-level concern. +# --------------------------------------------------------------------------- +scenario_ambient_trustdomain_override() { + local name="ambient: AP principal follows istio.trustDomain override" + log "SCENARIO: ${name}" + cleanup_release + + if ! hctl install "${RELEASE}" "${CHART_DIR}" \ + --set=istio.enabled=true \ + --set=istio.mode=ambient \ + --set=cluster.enabled=true \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=0 \ + --set=cluster.persistence.size=100Mi \ + --set=istio.trustDomain=my.mesh.example.com \ + --wait --timeout=240s >/dev/null 2>&1; then + # Install will NOT converge because Istio actually uses cluster.local — + # that's a feature of this scenario. We only need the AP rendered to + # verify the principal string. + : + fi + + local principals + principals=$(kctl get authorizationpolicy "${RELEASE}-cluster-bus" \ + -o jsonpath='{.spec.rules[0].from[0].source.principals[*]}' 2>/dev/null) + if [[ ${principals} != "my.mesh.example.com/ns/${NAMESPACE}/sa/${RELEASE}" ]]; then + fail "${name}" "AP principals=${principals}, want my.mesh.example.com/ns/${NAMESPACE}/sa/${RELEASE}" + return + fi + + cleanup_release + pass "${name}" +} + trap 'cleanup_release; cleanup_ambient_pair' EXIT -scenario_standalone_ambient || true -scenario_cluster_ambient || true -scenario_cluster_ambient_tls_auth || true +# --------------------------------------------------------------------------- +# Scenario 8: Prometheus scraping the metrics exporter must work in +# ambient mode. The AuthorizationPolicy is ALLOW-only, which triggers +# default-deny for any non-matching traffic — if the chart forgets to +# include the metrics port in the open rule, production Prometheus stacks +# silently stop seeing Valkey metrics the moment someone enables Istio. +# --------------------------------------------------------------------------- +scenario_ambient_prometheus_scrape() { + local name="ambient: in-mesh Prometheus can scrape metrics exporter" + log "SCENARIO: ${name}" + cleanup_release + + if ! hctl install "${RELEASE}" "${CHART_DIR}" \ + --set=istio.enabled=true \ + --set=istio.mode=ambient \ + --set=cluster.enabled=true \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=0 \ + --set=cluster.persistence.size=100Mi \ + --set=metrics.enabled=true \ + --wait --timeout=300s >/dev/null; then + fail "${name}" "helm install failed"; return + fi + kctl wait --for=condition=complete "job/${RELEASE}-cluster-init" --timeout=300s >/dev/null + + # Launch a curl pod enrolled in ambient (same mesh-participation shape + # as an in-mesh Prometheus would have). + local scraper="scrape-${RELEASE}-$$" + kctl delete pod "${scraper}" --ignore-not-found --wait=true >/dev/null + kctl run "${scraper}" \ + --image=curlimages/curl \ + --labels='istio.io/dataplane-mode=ambient' \ + --restart=Never \ + --command -- sleep 300 >/dev/null + kctl wait --for=condition=Ready "pod/${scraper}" --timeout=120s >/dev/null + + local code out + set +e + out=$(kctl exec "${scraper}" -c "${scraper}" -- \ + curl -sS --max-time 10 -w '\nHTTP=%{http_code}\n' \ + "http://${RELEASE}-metrics.${NAMESPACE}.svc.cluster.local:9121/metrics" 2>&1) + set -e + code=$(awk -F= '/^HTTP=/{print $2}' <<<"${out}") + + kctl delete pod "${scraper}" --ignore-not-found --wait=false >/dev/null + + if [[ ${code} != "200" ]]; then + fail "${name}" "scrape returned HTTP=${code:-}, body was: ${out}" + return + fi + if ! grep -q '^redis_' <<<"${out}"; then + fail "${name}" "HTTP 200 but body lacks redis_* metrics" + return + fi + + cleanup_release + pass "${name}" +} + +scenario_standalone_ambient || true +scenario_cluster_ambient || true +scenario_cluster_ambient_tls_auth || true scenario_ambient_authz_blocks_cross_release_meet || true +scenario_ambient_ap_disabled_refused || true +scenario_ambient_shared_default_sa_refused || true +scenario_ambient_trustdomain_override || true +scenario_ambient_prometheus_scrape || true echo log "Ambient scenario summary" diff --git a/functional-tests/run-extra-scenarios.sh b/functional-tests/run-extra-scenarios.sh index 9bf5c625..f95d4a09 100755 --- a/functional-tests/run-extra-scenarios.sh +++ b/functional-tests/run-extra-scenarios.sh @@ -38,7 +38,6 @@ scenario_aclconfig_metrics() { --set=metrics.enabled=true \ --set=auth.enabled=true \ --set-string="auth.aclConfig=user default on >simplepass ~* &* +@all" \ - --set-string='podLabels.sidecar\.istio\.io/inject=false' \ --wait --timeout=180s >/dev/null; then fail "${name}" "helm install failed" hctl uninstall "${release}" 2>/dev/null || true @@ -103,7 +102,6 @@ scenario_default_deny_netpol() { cleanup_release if ! hctl install "${RELEASE}" "${CHART_DIR}" \ - --set-string='podLabels.sidecar\.istio\.io/inject=false' \ --set-json='networkPolicy={"ingress":[]}' \ --wait --timeout=120s >/dev/null; then fail "${name}" "helm install failed" @@ -161,7 +159,6 @@ scenario_bus_port_hidden() { --set=cluster.shards=3 \ --set=cluster.replicasPerShard=0 \ --set=cluster.busPort=16379 \ - --set-string='podLabels.sidecar\.istio\.io/inject=false' \ --wait --timeout=300s >/dev/null; then fail "${name}" "helm install failed" return @@ -197,7 +194,6 @@ scenario_readiness_probe_exists() { cleanup_release if ! hctl install "${RELEASE}" "${CHART_DIR}" \ - --set-string='podLabels.sidecar\.istio\.io/inject=false' \ --wait --timeout=120s >/dev/null; then fail "${name}" "helm install failed" return @@ -248,7 +244,6 @@ install_cluster() { --set=cluster.shards=3 \ --set=cluster.replicasPerShard=0 \ --set="cluster.isolation.enabled=${isolation}" \ - --set-string='podLabels.sidecar\.istio\.io/inject=false' \ --wait --timeout=300s >/dev/null } diff --git a/functional-tests/run-scenario.sh b/functional-tests/run-scenario.sh index 6304be81..600d0149 100755 --- a/functional-tests/run-scenario.sh +++ b/functional-tests/run-scenario.sh @@ -43,13 +43,14 @@ testbench_exec() { testbench_exec_in "${TESTBENCH}" "$@"; } helm_flags=() if is_on "${ISTIO}"; then - # Let Envoy get injected into every chart pod; turn on the chart's Istio templates. + # Let Envoy get injected into every chart pod; turn on the chart's Istio + # templates. The chart pins sidecar.istio.io/inject=true on every pod + # itself, so no namespace-level label is required. helm_flags+=(--set=istio.enabled=true) -else - # Opt out of injection when Istio isn't the target — the sidecar would break - # the probe and the cluster-init Job would never finish. - helm_flags+=(--set-string='podLabels.sidecar\.istio\.io/inject=false') fi +# istio=off needs no extra flags: with the namespace unlabelled and +# istio.enabled=false, the chart emits zero mesh labels and pods stay out +# of both data planes. if is_on "${AUTH}"; then helm_flags+=( diff --git a/functional-tests/setup.sh b/functional-tests/setup.sh index b6a65cc6..a407084f 100755 --- a/functional-tests/setup.sh +++ b/functional-tests/setup.sh @@ -36,14 +36,18 @@ if istio_ambient_installed; then rollout status daemonset/ztunnel --timeout=180s fi -log "Enabling sidecar injection on namespace ${NAMESPACE}" -# Label idempotently — `kubectl label --overwrite` works whether or not the -# label exists. Sidecar and ambient opt-in are independent: the namespace -# carries the sidecar webhook label, and individual pods opt into ambient -# via the pod-level `istio.io/dataplane-mode` label (the Helm chart sets -# this on every Valkey pod when istio.mode=ambient). +# Namespace-level Istio injection intentionally NOT set. The chart now +# carries per-pod `sidecar.istio.io/inject` and `istio.io/dataplane-mode` +# labels derived from `istio.enabled` + `istio.mode`, so every workload +# opts in or out explicitly at the pod layer. Labelling the namespace +# `istio-injection=enabled` on top would (a) override istio=off scenarios +# into sidecar'd pods unless each test sinks labels manually, and (b) +# blur which layer is actually responsible for mesh capture when +# troubleshooting. Keep the decision at the pod level, the same as how +# the chart ships to real operators. +log "Namespace ${NAMESPACE} left unlabelled — chart controls mesh opt-in at the pod level" kubectl --context="${KUBE_CONTEXT}" label namespace "${NAMESPACE}" \ - istio-injection=enabled --overwrite + istio-injection- istio.io/dataplane-mode- 2>/dev/null || true log "Creating ${AUTH_SECRET} secret" kctl delete secret "${AUTH_SECRET}" --ignore-not-found @@ -86,16 +90,19 @@ kctl create secret generic "${TLS_SECRET}" \ --from-file="ca.crt=${CERT_DIR}/valkey-ca.crt" # --------------------------------------------------------------------------- -# Testbench pods. Three flavours: -# valkey-testbench — never injected (sidecar.istio.io/inject=false). -# Also opts out of ambient capture so the -# default testbench is a plain pod regardless -# of mesh mode. -# valkey-testbench-injected — Envoy sidecar, used for istio=on mode=sidecar. -# valkey-testbench-ambient — ambient-enrolled (no sidecar, ztunnel-wrapped), -# used for istio=on mode=ambient. -# Each flavour is a POD-level opt-in/out so one cluster (which has both data -# planes installed by the `ambient` profile) can host all three side by side. +# Testbench pods. Three flavours, each expressing its mesh intent via +# POD-level labels (the namespace is intentionally unlabelled — see the +# comment at the sidecar-injection step above). The chart's Valkey pods +# take the same pod-level approach, so the tests exercise the same opt-in +# path operators use in production. +# +# valkey-testbench — out of both meshes. Used for istio=off +# scenarios; no mesh labels emitted. +# valkey-testbench-injected — Envoy sidecar via per-pod inject=true. +# Used for istio=on mode=sidecar. +# valkey-testbench-ambient — ztunnel-wrapped via +# istio.io/dataplane-mode=ambient. +# Used for istio=on mode=ambient. # --------------------------------------------------------------------------- # $1: pod name # $2: flavour (plain|sidecar|ambient) @@ -103,16 +110,15 @@ launch_testbench() { local pod=$1 flavour=$2 overrides labels case "${flavour}" in plain) - # Out of both meshes: classic no-Istio behaviour for istio=off. - labels='sidecar.istio.io/inject=false,istio.io/dataplane-mode=none' + # No mesh labels: with the namespace unlabelled, the default is + # already "out of both meshes". + labels='' ;; sidecar) labels='sidecar.istio.io/inject=true' ;; ambient) - # Pod-level ambient opt-in. Overrides the namespace's - # istio-injection=enabled so this pod gets ztunnel, not Envoy. - labels='sidecar.istio.io/inject=false,istio.io/dataplane-mode=ambient' + labels='istio.io/dataplane-mode=ambient' ;; *) echo "launch_testbench: unknown flavour ${flavour}" >&2 @@ -133,10 +139,12 @@ launch_testbench() { }] } }' + local label_args=() + [[ -n ${labels} ]] && label_args=(--labels="${labels}") kctl delete pod "${pod}" --ignore-not-found --wait=true kctl run "${pod}" \ --image=valkey/valkey:9.0.1 \ - --labels="${labels}" \ + "${label_args[@]}" \ --restart=Never \ --overrides="${overrides}" \ --command -- sleep infinity diff --git a/valkey/templates/_helpers.tpl b/valkey/templates/_helpers.tpl index bb622d3d..1f2956d3 100644 --- a/valkey/templates/_helpers.tpl +++ b/valkey/templates/_helpers.tpl @@ -224,31 +224,63 @@ Calculate total number of nodes in the cluster {{- end -}} {{/* -Istio pod labels. Emits the label that tells Istio how to capture this pod's -traffic. Ambient requires `istio.io/dataplane-mode: ambient` on the pod (or -namespace); omitting it leaves the pod outside the mesh even when ambient is -installed cluster-wide. Sidecar mode uses the webhook-injection label unless -the namespace is already labelled `istio-injection=enabled`. +Istio pod labels. Emits the labels that tell Istio exactly how to capture +this pod's traffic, so the chart works whether or not the namespace carries +`istio-injection=enabled` or `istio.io/dataplane-mode=ambient` — and, just +as importantly, so that toggling `istio.mode` on a dual-mode cluster moves +pods between data planes cleanly. -In ambient mode we also emit `sidecar.istio.io/inject: "false"` so the pod -opts out of Envoy sidecar injection even when the namespace is labelled -`istio-injection=enabled` (a common setup when a cluster runs both data -planes side-by-side, e.g. during a sidecar→ambient migration). Without this, -injecting both a sidecar AND labelling the pod ambient produces a pod whose -traffic is redirected twice and mTLS negotiation breaks silently — the -pod's client port returns "Connection reset by peer" on every request. +Sidecar mode: + sidecar.istio.io/inject: "true" — force Envoy injection even if the + namespace lacks the injection label. + istio.io/dataplane-mode: none — veto ambient capture, so a cluster + that ALSO runs ambient (e.g. during + a sidecar→ambient migration) does + not double-redirect this pod. -When istio.enabled is false this helper emits nothing so the user can still -set their own `sidecar.istio.io/inject=false` via podLabels (see the -functional-tests istio=off path). +Ambient mode: + istio.io/dataplane-mode: ambient — ztunnel captures this pod's traffic. + sidecar.istio.io/inject: "false" — veto Envoy injection even if the + namespace has the injection label, + so the pod isn't simultaneously + sidecar'd (which double-redirects + and silently breaks mTLS, surfacing + as "Connection reset by peer" on + every request). + +Either mode by itself is enough; emitting both (per mode) makes pod-level +intent the source of truth and eliminates the cluster-configuration +dependency that's easy to miss at install time. + +When istio.enabled is false this helper emits nothing so the user remains +free to pick their own opt-in/out via podLabels (see the istio=off +functional-tests path). */}} {{- define "valkey.istioPodLabels" -}} {{- if .Values.istio.enabled -}} -{{- if eq .Values.istio.mode "ambient" }} +{{- if eq (.Values.istio.mode | default "sidecar") "ambient" -}} istio.io/dataplane-mode: ambient sidecar.istio.io/inject: "false" +{{- else -}} +sidecar.istio.io/inject: "true" +istio.io/dataplane-mode: none +{{- end -}} {{- end -}} {{- end -}} + +{{/* +Compute the merged pod labels map: selector + common + chart-computed mesh +labels + user podLabels (user wins on collision). Emits the merged dict as +YAML so the rendered output has no duplicate keys, even when a user sets +e.g. `sidecar.istio.io/inject=false` via podLabels alongside +`istio.enabled=true`. +*/}} +{{- define "valkey.podLabels" -}} +{{- $selector := fromYaml (include "valkey.selectorLabels" .) -}} +{{- $common := .Values.commonLabels | default dict -}} +{{- $mesh := fromYaml (include "valkey.istioPodLabels" .) | default dict -}} +{{- $user := .Values.podLabels | default dict -}} +{{- toYaml (mergeOverwrite $selector $common $mesh $user) -}} {{- end -}} {{/* @@ -257,18 +289,56 @@ Used by the AuthorizationPolicy to pin the cluster-bus port to same-release pods cryptographically rather than by pod-selector IP. */}} {{- define "valkey.istioPrincipal" -}} -{{- printf "cluster.local/ns/%s/sa/%s" .Release.Namespace (include "valkey.serviceAccountName" .) -}} +{{- $trustDomain := .Values.istio.trustDomain | default "cluster.local" -}} +{{- printf "%s/ns/%s/sa/%s" $trustDomain .Release.Namespace (include "valkey.serviceAccountName" .) -}} {{- end -}} {{/* -Validate istio configuration +Validate istio configuration. Runs regardless of istio.enabled so a typo in +istio.mode (e.g. `mode: ambiet` buried in a GitOps values file) surfaces at +template time instead of silently rendering the sidecar-only code paths. */}} {{- define "valkey.validateIstioConfig" -}} -{{- if .Values.istio.enabled }} +{{- if hasKey .Values.istio "mode" }} {{- if not (or (eq .Values.istio.mode "sidecar") (eq .Values.istio.mode "ambient")) }} {{- fail (printf "istio.mode must be 'sidecar' or 'ambient', got: %s" .Values.istio.mode) }} {{- end }} {{- end }} +{{- /* +Guard against the silent-no-protection footgun for the cluster bus port: +when istio is enabled in ambient mode AND cluster mode is on, dropping BOTH +the NetworkPolicy (skipped for ambient) AND the AuthorizationPolicy leaves +the bus port open to any pod that can route to it. The feature's whole +point is cross-release isolation; failing closed is the only safe default. +Users who genuinely want the bus port unprotected can set +`cluster.isolation.enabled=true` (NetworkPolicy path still runs in sidecar +mode, but in ambient it's dropped) and explicitly acknowledge by setting +`istio.authorizationPolicy.enabled=true`; the chart refuses to let BOTH be +false when both layers have been chosen-off. +*/}} +{{- if and .Values.istio.enabled (eq .Values.istio.mode "ambient") .Values.cluster.enabled }} + {{- if not .Values.istio.authorizationPolicy.enabled }} + {{- fail "istio.authorizationPolicy.enabled=false in ambient mode + cluster mode leaves the cluster-bus port unprotected: the NetworkPolicy is skipped for ambient (it would block HBONE), and disabling the AuthorizationPolicy removes the only remaining cross-release isolation layer. Re-enable istio.authorizationPolicy.enabled, or switch to istio.mode=sidecar if you intend to rely on the NetworkPolicy." }} + {{- end }} +{{- end }} +{{- /* +Guard against the shared-ServiceAccount footgun. The AuthorizationPolicy +uses the SPIFFE principal `/ns//sa/` to scope the bus +port to same-release pods. If two releases in the same namespace share a SA +(e.g. both use `serviceAccount.create=false` with the namespace default, or +both explicitly set the same `serviceAccount.name`), their APs encode the +SAME principal — cross-release MEET passes the identity check and the +clusters silently merge. The chart cannot detect other releases at template +time, but it can surface the risk: refuse the obviously-unsafe case +(`serviceAccount.create=false` with no explicit name, i.e. the shared +`default` SA) whenever the AP is rendered. Users who deliberately share +a named SA across releases can still do so; they just have to type it. +*/}} +{{- if and .Values.istio.enabled .Values.istio.authorizationPolicy.enabled .Values.cluster.enabled }} + {{- if and (not .Values.serviceAccount.create) (not .Values.serviceAccount.name) }} + {{- fail "istio.authorizationPolicy gives cross-release cluster-bus isolation by scoping the bus port to a SPIFFE principal built from the pod's ServiceAccount. With serviceAccount.create=false AND serviceAccount.name empty, the chart falls back to the namespace's 'default' ServiceAccount — which every other release using the same fallback ALSO maps to, so the AuthorizationPolicy cannot distinguish them and cross-release CLUSTER MEET succeeds. Either set serviceAccount.create=true (per-release SA) or serviceAccount.name=." }} + {{- end }} +{{- end }} {{- end -}} diff --git a/valkey/templates/cluster-init-job.yaml b/valkey/templates/cluster-init-job.yaml index 2fcee63b..e1ce0ebd 100644 --- a/valkey/templates/cluster-init-job.yaml +++ b/valkey/templates/cluster-init-job.yaml @@ -17,23 +17,16 @@ spec: template: metadata: labels: - {{- include "valkey.selectorLabels" . | nindent 8 }} - {{- with .Values.commonLabels }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.podLabels }} - {{- toYaml . | nindent 8 }} - {{- end }} {{- /* - In ambient mode, the Job pod needs the dataplane-mode label so - ztunnel captures its outbound connections to the Valkey pods. Without - this, the Job speaks plaintext to pods whose PeerAuthentication - requires STRICT mTLS and the Job hangs until backoffLimit, which is - what surfaces as "Job in progress / context deadline exceeded" on - helm install. - The same label is benign in sidecar mode (ambient gated inside). + Single merged label set (see valkey.podLabels helper). In ambient + mode the Job pod picks up the dataplane-mode label automatically so + ztunnel captures its outbound connections to the Valkey pods; + without it the Job speaks plaintext against STRICT mTLS and hangs + until backoffLimit. In sidecar mode the pod carries an explicit + sidecar.istio.io/inject=true so the Job works on namespaces that + don't carry the injection label. */}} - {{- include "valkey.istioPodLabels" . | nindent 8 }} + {{- include "valkey.podLabels" . | nindent 8 }} {{- with .Values.podAnnotations }} annotations: {{- toYaml . | nindent 8 }} diff --git a/valkey/templates/cluster-statefulset.yaml b/valkey/templates/cluster-statefulset.yaml index 9eb8d625..035eeeff 100644 --- a/valkey/templates/cluster-statefulset.yaml +++ b/valkey/templates/cluster-statefulset.yaml @@ -2,6 +2,7 @@ {{- include "valkey.validateAuthConfig" . }} {{- include "valkey.validateClusterConfig" . }} {{- include "valkey.validateClusterAuth" . }} +{{- include "valkey.validateIstioConfig" . }} apiVersion: apps/v1 kind: StatefulSet metadata: @@ -37,14 +38,14 @@ spec: template: metadata: labels: - {{- include "valkey.selectorLabels" . | nindent 8 }} - {{- with .Values.commonLabels }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.podLabels }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- include "valkey.istioPodLabels" . | nindent 8 }} + {{- /* + Single merged label set: selector + commonLabels + chart-computed + mesh labels + user podLabels (user wins on collision). Keeps the + rendered YAML free of duplicate keys when e.g. a user sets + sidecar.istio.io/inject=false via podLabels alongside + istio.enabled=true. + */}} + {{- include "valkey.podLabels" . | nindent 8 }} annotations: {{- with .Values.podAnnotations }} {{- toYaml . | nindent 8 }} diff --git a/valkey/templates/deploy_valkey.yaml b/valkey/templates/deploy_valkey.yaml index b9b041b3..a1578378 100644 --- a/valkey/templates/deploy_valkey.yaml +++ b/valkey/templates/deploy_valkey.yaml @@ -3,6 +3,7 @@ {{- $storage := .Values.dataStorage }} {{- $createPVC := and $storage.enabled (not (empty $storage.requestedSize)) (empty $storage.persistentVolumeClaimName) }} {{- include "valkey.validateAuthConfig" . }} +{{- include "valkey.validateIstioConfig" . }} apiVersion: apps/v1 kind: Deployment metadata: @@ -23,14 +24,14 @@ spec: template: metadata: labels: - {{- include "valkey.selectorLabels" . | nindent 8 }} - {{- with .Values.commonLabels }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.podLabels }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- include "valkey.istioPodLabels" . | nindent 8 }} + {{- /* + Single merged label set: selector + commonLabels + chart-computed + mesh labels + user podLabels (user wins on collision). Keeps the + rendered YAML free of duplicate keys when e.g. a user sets + sidecar.istio.io/inject=false via podLabels alongside + istio.enabled=true. + */}} + {{- include "valkey.podLabels" . | nindent 8 }} annotations: {{- with .Values.podAnnotations }} {{- toYaml . | nindent 8 }} diff --git a/valkey/templates/istio-authorization-policy.yaml b/valkey/templates/istio-authorization-policy.yaml index 30bf73b0..4b983ded 100644 --- a/valkey/templates/istio-authorization-policy.yaml +++ b/valkey/templates/istio-authorization-policy.yaml @@ -57,6 +57,21 @@ spec: # least one AuthorizationPolicy now targets these pods, Istio applies # default-deny to anything not matched — i.e. the bus port for # non-same-release principals. + # + # Istio-managed ports (15020 merged-stats, 15021 Envoy readiness, 15090 + # Envoy admin) are intentionally NOT listed: + # sidecar mode — Istio auto-excludes these via iptables so they never + # hit Envoy's authz stack; the AP has no bearing on + # them. Verified on a live kind+Istio 1.29 install: + # 15021/15090 reachable from in-mesh pods without an + # explicit allow rule; 15020 is bound to pilot-agent + # outside Envoy's path. + # ambient mode — no Envoy exists, so none of these ports have analogues + # (ztunnel metrics live on the NODE, not the pod). + # Prometheus scrapes this chart via the shipped Service/PodMonitor on + # the app-level 9121 port, so this is the only port Prometheus cares + # about here. Scrapers that rely on Istio's Envoy-merged 15020 path + # hit pilot-agent directly and aren't gated by this AP. - to: - operation: ports: diff --git a/valkey/templates/statefulset.yaml b/valkey/templates/statefulset.yaml index 81979de2..66163cf0 100644 --- a/valkey/templates/statefulset.yaml +++ b/valkey/templates/statefulset.yaml @@ -2,6 +2,7 @@ {{- include "valkey.validateAuthConfig" . }} {{- include "valkey.validateReplicaPersistence" . }} {{- include "valkey.validateReplicaAuth" . }} +{{- include "valkey.validateIstioConfig" . }} apiVersion: apps/v1 kind: StatefulSet metadata: @@ -37,14 +38,14 @@ spec: template: metadata: labels: - {{- include "valkey.selectorLabels" . | nindent 8 }} - {{- with .Values.commonLabels }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.podLabels }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- include "valkey.istioPodLabels" . | nindent 8 }} + {{- /* + Single merged label set: selector + commonLabels + chart-computed + mesh labels + user podLabels (user wins on collision). Keeps the + rendered YAML free of duplicate keys when e.g. a user sets + sidecar.istio.io/inject=false via podLabels alongside + istio.enabled=true. + */}} + {{- include "valkey.podLabels" . | nindent 8 }} annotations: {{- with .Values.podAnnotations }} {{- toYaml . | nindent 8 }} diff --git a/valkey/tests/cluster_test.yaml b/valkey/tests/cluster_test.yaml index 2ecb828e..66ea037f 100644 --- a/valkey/tests/cluster_test.yaml +++ b/valkey/tests/cluster_test.yaml @@ -1034,3 +1034,35 @@ tests: - notExists: path: spec.template.metadata.annotations["traffic.sidecar.istio.io/excludeInboundPorts"] + - it: should emit sidecar-mode mesh labels on the cluster statefulset + # Sidecar mode is self-sufficient now: we pin dataplane-mode=none AND + # sidecar.istio.io/inject=true on the pod, so injection works whether + # or not the namespace is labelled, and ztunnel stays out of the way + # on dual-mode clusters. Regression: we used to emit neither, leaning + # on namespace labels exclusively. + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.enabled: true + istio.mode: sidecar + template: templates/cluster-statefulset.yaml + asserts: + - equal: + path: spec.template.metadata.labels["istio.io/dataplane-mode"] + value: none + - equal: + path: spec.template.metadata.labels["sidecar.istio.io/inject"] + value: "true" + + - it: should emit no mesh labels when istio is disabled (cluster) + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.enabled: false + template: templates/cluster-statefulset.yaml + asserts: + - notExists: + path: spec.template.metadata.labels["istio.io/dataplane-mode"] + - notExists: + path: spec.template.metadata.labels["sidecar.istio.io/inject"] + diff --git a/valkey/tests/deployment_test.yaml b/valkey/tests/deployment_test.yaml index bd98be7d..bfc1067e 100644 --- a/valkey/tests/deployment_test.yaml +++ b/valkey/tests/deployment_test.yaml @@ -523,14 +523,18 @@ tests: path: spec.template.metadata.labels["istio.io/dataplane-mode"] value: ambient - - it: should NOT add ambient dataplane-mode label when istio.mode=sidecar + # In sidecar mode istio.io/dataplane-mode is set to "none" (not absent) so + # a dual-mode cluster running ambient too doesn't accidentally capture + # this pod via ztunnel on top of its Envoy sidecar. + - it: should set istio.io/dataplane-mode=none in sidecar mode set: istio.enabled: true istio.mode: sidecar template: templates/deploy_valkey.yaml asserts: - - notExists: + - equal: path: spec.template.metadata.labels["istio.io/dataplane-mode"] + value: none # Namespaces labelled `istio-injection=enabled` would otherwise inject an # Envoy sidecar AND the ambient capture label would direct traffic to @@ -547,14 +551,20 @@ tests: path: spec.template.metadata.labels["sidecar.istio.io/inject"] value: "false" - - it: should NOT force sidecar.istio.io/inject=false in sidecar mode + # Sidecar mode now forces injection on at the pod level so the chart + # doesn't silently depend on the namespace carrying + # istio-injection=enabled. Previously this was the user's problem to get + # right — half the "it's not working" issue reports in sidecar mode came + # down to "the namespace wasn't labelled". + - it: should set sidecar.istio.io/inject=true in sidecar mode set: istio.enabled: true istio.mode: sidecar template: templates/deploy_valkey.yaml asserts: - - notExists: + - equal: path: spec.template.metadata.labels["sidecar.istio.io/inject"] + value: "true" - it: should NOT add ambient dataplane-mode label when istio is disabled set: @@ -563,3 +573,89 @@ tests: asserts: - notExists: path: spec.template.metadata.labels["istio.io/dataplane-mode"] + + # Regression: the istioPodLabels helper used to emit nothing in sidecar + # mode AND still have its caller run `nindent 8`, which injects a blank + # line into the labels map. Valid YAML, but crap to read in diffs. After + # the fix the labels map holds exactly the selector labels + the two + # mode-specific keys — no blank key, no stray whitespace. + - it: should produce exactly the sidecar label set in sidecar mode + set: + istio.enabled: true + istio.mode: sidecar + template: templates/deploy_valkey.yaml + asserts: + - equal: + path: spec.template.metadata.labels + value: + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: RELEASE-NAME + sidecar.istio.io/inject: "true" + istio.io/dataplane-mode: none + + # And with ambient, exactly the two ambient-specific keys on top of the + # selector labels — and nothing else. + - it: should produce exactly the ambient label set in ambient mode + set: + istio.enabled: true + istio.mode: ambient + template: templates/deploy_valkey.yaml + asserts: + - equal: + path: spec.template.metadata.labels + value: + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: RELEASE-NAME + istio.io/dataplane-mode: ambient + sidecar.istio.io/inject: "false" + + # And with istio disabled, no mesh labels at all — users can still opt + # into their own mesh labels via podLabels. + - it: should emit no mesh labels when istio.enabled is false + set: + istio.enabled: false + template: templates/deploy_valkey.yaml + asserts: + - equal: + path: spec.template.metadata.labels + value: + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: RELEASE-NAME + + # User override: a podLabels entry that collides with a chart-computed + # mesh label (e.g. sidecar.istio.io/inject) must win cleanly, with NO + # duplicate YAML keys in the rendered output. This lets operators run + # istio.enabled=true but force a specific release out of the sidecar + # mesh (rare but legitimate — e.g. pinning a canary pod to plain TCP). + - it: user podLabels must override chart mesh labels cleanly + set: + istio.enabled: true + istio.mode: sidecar + podLabels: + sidecar.istio.io/inject: "false" + custom-label: "custom-value" + template: templates/deploy_valkey.yaml + asserts: + - equal: + path: spec.template.metadata.labels["sidecar.istio.io/inject"] + value: "false" + - equal: + path: spec.template.metadata.labels["istio.io/dataplane-mode"] + value: none + - equal: + path: spec.template.metadata.labels["custom-label"] + value: "custom-value" + + # Regression: installing istio.enabled=false with a typo'd istio.mode + # (istio.mode=ambiet) used to sail through because the validator gated + # on istio.enabled. With schema validation active it now fails at lint, + # and the template helper would catch it too if someone force-fed an + # invalid value past the schema. This ensures the helper still errors. + - it: should reject typo'd istio.mode even when istio.enabled=false (at schema layer) + set: + istio.enabled: false + istio.mode: ambiet + template: templates/deploy_valkey.yaml + asserts: + - failedTemplate: + errorMessage: "values don't meet the specifications of the schema(s) in the following chart(s):\nvalkey:\n- at '/istio/mode': value must be one of 'sidecar', 'ambient'\n" diff --git a/valkey/tests/istio_authorization_policy_test.yaml b/valkey/tests/istio_authorization_policy_test.yaml index e12faa39..b47406a9 100644 --- a/valkey/tests/istio_authorization_policy_test.yaml +++ b/valkey/tests/istio_authorization_policy_test.yaml @@ -279,8 +279,23 @@ tests: - "6379" - "9121" + # Regression: the "open" rule (second element in spec.rules) must have + # NO `from` clause. If somebody ever adds a principal filter there the + # metrics port becomes same-release only, which silently kills every + # Prometheus scrape from a different namespace (the shipped + # ServiceMonitor path). Keep this rule unconditional. + - it: the open port rule must have no principal restriction + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + metrics.enabled: true + asserts: + - notExists: + path: spec.rules[1].from + # --- Invalid mode --- - - it: should fail when istio.mode is invalid + - it: should reject invalid istio.mode at the schema layer set: istio.enabled: true istio.mode: "not-a-real-mode" @@ -288,4 +303,125 @@ tests: cluster.persistence.size: "5Gi" asserts: - failedTemplate: - errorPattern: "istio.mode must be 'sidecar' or 'ambient'.*" + errorMessage: "values don't meet the specifications of the schema(s) in the following chart(s):\nvalkey:\n- at '/istio/mode': value must be one of 'sidecar', 'ambient'\n" + + # --- Custom trust domain --- + # A cluster federated via multi-cluster mesh (or any install that + # overrides istiod's default) publishes identities under a non-default + # trust domain. The AP principal string must follow — otherwise same- + # release callers ALSO fail the ALLOW match and the bus rule is a + # self-denial. + - it: principal should honour istio.trustDomain override + set: + istio.enabled: true + istio.mode: ambient + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.trustDomain: "my.mesh.example.com" + asserts: + - contains: + path: spec.rules + content: + from: + - source: + principals: + - "my.mesh.example.com/ns/NAMESPACE/sa/RELEASE-NAME-valkey" + to: + - operation: + ports: + - "16379" + + - it: principal should default to cluster.local when trustDomain unset + set: + istio.enabled: true + istio.mode: ambient + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - contains: + path: spec.rules + content: + from: + - source: + principals: + - "cluster.local/ns/NAMESPACE/sa/RELEASE-NAME-valkey" + to: + - operation: + ports: + - "16379" + + # --- Cross-release isolation footguns --- + # These two validators exist to make the feature's SECURITY GUARANTEE + # hold: ambient cross-release isolation relies on the SPIFFE principal + # being UNIQUE per release AND on SOMETHING enforcing it. Without the + # guards you can silently ship a chart install with zero bus-port + # protection. + + - it: should refuse ambient+cluster when shared 'default' SA would be used + # serviceAccount.create=false AND no explicit name collapses every + # release's AP principal to sa/default; cross-release MEET passes the + # identity check and the clusters silently merge. Live-repro'd in + # review — this MUST fail template at install time. + set: + istio.enabled: true + istio.mode: ambient + cluster.enabled: true + cluster.persistence.size: "5Gi" + serviceAccount.create: false + serviceAccount.name: "" + asserts: + - failedTemplate: + errorPattern: "serviceAccount.create=false AND serviceAccount.name empty.*" + + - it: should accept ambient+cluster with explicit (distinct) serviceAccount.name + # Opt-in for the advanced multi-release-shared-SA case — we can't tell + # whether the user picked a DIFFERENT name from a hypothetical other + # release, but at least the name is intentional. + set: + istio.enabled: true + istio.mode: ambient + cluster.enabled: true + cluster.persistence.size: "5Gi" + serviceAccount.create: false + serviceAccount.name: "my-valkey-sa" + asserts: + - hasDocuments: + count: 1 + - contains: + path: spec.rules + content: + from: + - source: + principals: + - "cluster.local/ns/NAMESPACE/sa/my-valkey-sa" + to: + - operation: + ports: + - "16379" + + # Note: the "refuse ambient+cluster when AuthorizationPolicy AND + # NetworkPolicy are both off" assertion lives in istio_test.yaml — that + # suite renders the PeerAuthentication template, which is what carries + # the validator (the AP template correctly renders NOTHING when the AP + # is disabled, so it's the wrong place to prove the guard fires). + + - it: should allow authorizationPolicy.enabled=false in sidecar mode (NetworkPolicy still guards) + set: + istio.enabled: true + istio.mode: sidecar + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.authorizationPolicy.enabled: false + asserts: + - hasDocuments: + count: 0 + + - it: should allow authorizationPolicy.enabled=false when istio is off + set: + istio.enabled: false + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.authorizationPolicy.enabled: false + asserts: + - hasDocuments: + count: 0 diff --git a/valkey/tests/istio_test.yaml b/valkey/tests/istio_test.yaml index 24a045e6..8c821bd2 100644 --- a/valkey/tests/istio_test.yaml +++ b/valkey/tests/istio_test.yaml @@ -447,14 +447,19 @@ tests: value: my-valkey-headless.NAMESPACE.svc.cluster.local # --- Mode validation --- - - it: should fail with a helpful error when istio.mode is invalid + # The schema catches typos in istio.mode at install time (before any + # template renders). This keeps errors fast and mode-neutral, unlike the + # old regime where only templates that happened to render in the chosen + # mode would fail — a bogus mode with istio.enabled=false would silently + # sail through. + - it: should reject invalid istio.mode at the schema layer set: istio.enabled: true istio.mode: waypoint # typo — not a real mode template: templates/istio-peer-authentication.yaml asserts: - failedTemplate: - errorPattern: "istio.mode must be 'sidecar' or 'ambient'.*" + errorMessage: "values don't meet the specifications of the schema(s) in the following chart(s):\nvalkey:\n- at '/istio/mode': value must be one of 'sidecar', 'ambient'\n" - it: should accept istio.mode=sidecar set: @@ -541,3 +546,95 @@ tests: - equal: path: spec.mtls.mode value: PERMISSIVE + + # --- Cross-release isolation guards --- + # These assert the chart refuses to silently ship an unprotected cluster + # bus. The validator is called from the PeerAuthentication template so it + # fires regardless of whether the AuthorizationPolicy itself renders. + + - it: should refuse ambient+cluster when AuthorizationPolicy is off + # Ambient mode skips the bus-port NetworkPolicy (it would drop HBONE); + # disabling the AP on top leaves the port completely unprotected across + # releases. The chart must fail closed. + set: + istio.enabled: true + istio.mode: ambient + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.authorizationPolicy.enabled: false + template: templates/istio-peer-authentication.yaml + asserts: + - failedTemplate: + errorPattern: "istio.authorizationPolicy.enabled=false in ambient mode.*cluster-bus port unprotected.*" + + - it: should refuse ambient+cluster when serviceAccount collapses to default + # serviceAccount.create=false AND serviceAccount.name="" produces the + # shared `default` SA. Two such releases in the same namespace generate + # identical AP principals; cross-release MEET succeeds. Repro'd live. + set: + istio.enabled: true + istio.mode: ambient + cluster.enabled: true + cluster.persistence.size: "5Gi" + serviceAccount.create: false + serviceAccount.name: "" + template: templates/istio-peer-authentication.yaml + asserts: + - failedTemplate: + errorPattern: "serviceAccount.create=false AND serviceAccount.name empty.*" + + - it: should accept ambient+cluster with serviceAccount.create=true (default) + # Per-release SA (default): distinct SPIFFE principal per release, + # AP correctly isolates. + set: + istio.enabled: true + istio.mode: ambient + cluster.enabled: true + cluster.persistence.size: "5Gi" + serviceAccount.create: true + template: templates/istio-peer-authentication.yaml + asserts: + - hasDocuments: + count: 1 + + - it: should accept ambient+cluster with explicit serviceAccount.name override + # User takes responsibility for distinct naming. + set: + istio.enabled: true + istio.mode: ambient + cluster.enabled: true + cluster.persistence.size: "5Gi" + serviceAccount.create: false + serviceAccount.name: "my-valkey-sa" + template: templates/istio-peer-authentication.yaml + asserts: + - hasDocuments: + count: 1 + + # --- Custom trust domain --- + - it: PeerAuthentication unaffected by custom trustDomain + # PA doesn't reference principals, so trustDomain is a no-op here. Just + # prove nothing breaks when the value is set. + set: + istio.enabled: true + istio.mode: ambient + istio.trustDomain: "my.mesh.example.com" + template: templates/istio-peer-authentication.yaml + asserts: + - hasDocuments: + count: 1 + - equal: + path: spec.mtls.mode + value: STRICT + + # --- Typo defence --- + # Schema check fires even when istio.enabled=false so typos surface at + # GitOps-commit time, not after someone flips the toggle in production. + - it: should reject typo'd istio.mode even with istio.enabled=false + set: + istio.enabled: false + istio.mode: ambiet + template: templates/istio-peer-authentication.yaml + asserts: + - failedTemplate: + errorMessage: "values don't meet the specifications of the schema(s) in the following chart(s):\nvalkey:\n- at '/istio/mode': value must be one of 'sidecar', 'ambient'\n" diff --git a/valkey/tests/statefulset_test.yaml b/valkey/tests/statefulset_test.yaml index 63b128b3..98e69d7c 100644 --- a/valkey/tests/statefulset_test.yaml +++ b/valkey/tests/statefulset_test.yaml @@ -416,7 +416,10 @@ tests: path: spec.template.metadata.labels["istio.io/dataplane-mode"] value: ambient - - it: should NOT add ambient dataplane-mode label when istio.mode=sidecar + # Sidecar mode emits istio.io/dataplane-mode=none (veto ambient capture) + # and sidecar.istio.io/inject=true (force injection), so the chart is + # self-sufficient on clusters that run both data planes side-by-side. + - it: should set sidecar-mode mesh labels in sidecar mode set: replica.enabled: true replica.persistence.size: "5Gi" @@ -424,5 +427,9 @@ tests: istio.mode: sidecar template: templates/statefulset.yaml asserts: - - notExists: + - equal: path: spec.template.metadata.labels["istio.io/dataplane-mode"] + value: none + - equal: + path: spec.template.metadata.labels["sidecar.istio.io/inject"] + value: "true" diff --git a/valkey/values.schema.json b/valkey/values.schema.json index f84fdd46..b5a24d76 100644 --- a/valkey/values.schema.json +++ b/valkey/values.schema.json @@ -194,6 +194,16 @@ "enabled": { "type": "boolean" }, + "mode": { + "type": "string", + "enum": [ + "sidecar", + "ambient" + ] + }, + "trustDomain": { + "type": "string" + }, "peerAuthentication": { "type": "object", "properties": { @@ -233,6 +243,20 @@ "type": "object" } } + }, + "authorizationPolicy": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "labels": { + "type": "object" + }, + "annotations": { + "type": "object" + } + } } } }, diff --git a/valkey/values.yaml b/valkey/values.yaml index 68ca53d4..7fdcff1e 100644 --- a/valkey/values.yaml +++ b/valkey/values.yaml @@ -372,8 +372,27 @@ istio: # pins the cluster-bus port to same-release principals. mode: sidecar # @schema enum:[sidecar,ambient] + # SPIFFE trust domain this mesh issues identities under. Used by the + # AuthorizationPolicy to build the principal string + # `/ns//sa/`. Must match the `trustDomain` value in + # the mesh ConfigMap (`kubectl -n istio-system get cm istio -o jsonpath= + # '{.data.mesh}' | grep trustDomain`); a mismatch means the rule matches + # nothing and same-release pods get default-denied on the bus port, not + # just cross-release ones. + trustDomain: cluster.local + # PeerAuthentication controls mTLS enforcement on inbound connections. # Applies to both sidecar and ambient modes. + # + # A note on Prometheus scraping: STRICT requires every inbound connection + # to present a mesh-issued mTLS cert. An out-of-mesh Prometheus (e.g. a + # Prometheus running in a namespace without injection, or on a cluster + # without Istio) cannot present such a cert, so scraping the metrics + # service fails with "Connection reset by peer". Fix either by putting + # the scraper in the mesh (ambient capture or sidecar injection), or by + # setting this mode to PERMISSIVE so the mesh accepts plaintext too. + # The chart's AuthorizationPolicy keeps 9121 wide open at the identity + # layer regardless — PeerAuthentication is the gate that flips. peerAuthentication: # mTLS mode for inbound traffic (STRICT, PERMISSIVE, DISABLE, UNSET) # STRICT: Require mTLS on all ports From 6491a2737dfb4bc93a6b825efef165ddfaf999a8 Mon Sep 17 00:00:00 2001 From: Ankit Pati Date: Sun, 3 May 2026 14:17:00 +0530 Subject: [PATCH 13/23] test: include Ambient Mesh in test matrix Signed-off-by: Ankit Pati --- Justfile | 9 +- functional-tests/run-all.sh | 48 +- functional-tests/run-ambient-scenarios.sh | 525 ---------------------- functional-tests/run-extra-scenarios.sh | 290 +++++++++++- functional-tests/run-scenario.sh | 201 ++++++--- 5 files changed, 474 insertions(+), 599 deletions(-) delete mode 100755 functional-tests/run-ambient-scenarios.sh diff --git a/Justfile b/Justfile index 933f494c..d741f615 100644 --- a/Justfile +++ b/Justfile @@ -37,11 +37,12 @@ functional-teardown *ARGS: ./functional-tests/teardown.sh {{ARGS}} # Run one scenario against the already-set-up kind cluster, e.g. -# just functional-scenario off off on on off +# just functional-scenario off off on on sidecar +# tls/auth/shard/rep are on|off; istio is off|sidecar|ambient. functional-scenario tls auth shard rep istio: ./functional-tests/run-scenario.sh {{tls}} {{auth}} {{shard}} {{rep}} {{istio}} -# Run the full 32-scenario matrix (set FILTER='tls=on istio=on' to narrow) +# Run the full 48-scenario matrix (set FILTER='tls=on istio=ambient' to narrow) functional-run: ./functional-tests/run-all.sh @@ -49,10 +50,6 @@ functional-run: functional-extras: ./functional-tests/run-extra-scenarios.sh -# Run the Istio ambient-mesh regressions on their own -functional-ambient: - ./functional-tests/run-ambient-scenarios.sh - # Full functional suite: setup + matrix + teardown including cluster functional-test: ./functional-tests/setup.sh diff --git a/functional-tests/run-all.sh b/functional-tests/run-all.sh index eac0137d..3ddbfbc4 100755 --- a/functional-tests/run-all.sh +++ b/functional-tests/run-all.sh @@ -6,9 +6,16 @@ HERE=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=lib.sh . "${HERE}/lib.sh" -# 32 scenarios: every combination of tls/auth/shard/rep/istio. +# 48 scenarios: every combination of tls/auth/shard/rep × istio= +# off|sidecar|ambient. The istio dimension is three-valued rather than two +# because sidecar and ambient share almost nothing below the chart-owned +# templates — different label paths, different mTLS enforcement points +# (Envoy vs ztunnel), different rendered resources (DestinationRule only +# in sidecar; AuthorizationPolicy in both but enforced differently). Keep +# them both in the matrix so a regression in one mode can't hide behind a +# passing result in the other. SCENARIOS=() -for istio in off on; do +for istio in off sidecar ambient; do for tls in off on; do for auth in off on; do for shard in off on; do @@ -20,7 +27,9 @@ for istio in off on; do done done -# Optional filter: `FILTER='tls=on istio=on'` runs only matching scenarios. +# Optional filter: `FILTER='tls=on istio=ambient'` runs only matching +# scenarios. Filter values for `istio` are off|sidecar|ambient; `on` is +# accepted as an alias for "sidecar or ambient" to keep old habits working. matches() { local tls=$1 auth=$2 shard=$3 rep=$4 istio=$5 for sel in ${FILTER:-}; do @@ -31,7 +40,13 @@ matches() { auth) have=${auth} ;; shard) have=${shard} ;; rep) have=${rep} ;; - istio) have=${istio} ;; + istio) + if [[ ${v} == on ]]; then + [[ ${istio} == sidecar || ${istio} == ambient ]] || return 1 + continue + fi + have=${istio} + ;; *) echo "bad filter key: ${k}" >&2; exit 2 ;; esac [[ ${have} == "${v}" ]] || return 1 @@ -41,6 +56,7 @@ matches() { passed=0 failed=0 +skipped=0 failures=() for s in "${SCENARIOS[@]}"; do @@ -50,6 +66,17 @@ for s in "${SCENARIOS[@]}"; do continue fi + # Ambient scenarios require ztunnel to be installed. setup.sh now + # installs the ambient profile by default, but a user running against + # a pre-existing cluster might have only the sidecar data plane — + # skip rather than fail in that case so the rest of the matrix still + # runs. + if [[ ${istio} == ambient ]] && ! istio_ambient_installed; then + log "SKIP: tls=${tls} auth=${auth} shard=${shard} rep=${rep} istio=${istio} (ztunnel not installed)" + skipped=$(( skipped + 1 )) + continue + fi + log "SCENARIO: tls=${tls} auth=${auth} shard=${shard} rep=${rep} istio=${istio}" if "${HERE}/run-scenario.sh" "${tls}" "${auth}" "${shard}" "${rep}" "${istio}"; then passed=$(( passed + 1 )) @@ -60,18 +87,19 @@ for s in "${SCENARIOS[@]}"; do done echo -log "Matrix summary: ${passed} passed, ${failed} failed" +log "Matrix summary: ${passed} passed, ${failed} failed, ${skipped} skipped" if (( failed > 0 )); then printf ' failed: %s\n' "${failures[@]}" exit 1 fi -# Extra, non-matrix regressions (aclConfig+metrics, default-deny netpol, etc). -# Skipped when FILTER is set — filters are matrix-scoped, so the extras +# Extra, non-matrix regressions (aclConfig+metrics, default-deny netpol, +# cross-release MEET isolation, ambient validator footguns, Prometheus +# scraping, etc.). Each one is independent of the tls/auth/shard/rep +# combinations — folding them into the matrix would just pay the +# install/teardown cost N times to exercise the same single assertion. +# Skipped when FILTER is set: filters are matrix-scoped, so the extras # wouldn't match anyway and running them would be surprising. if [[ -z ${FILTER:-} ]]; then "${HERE}/run-extra-scenarios.sh" - # Ambient-mesh regressions. Self-skipping when ztunnel isn't installed - # (e.g. against an older cluster with only the `demo` profile). - "${HERE}/run-ambient-scenarios.sh" fi diff --git a/functional-tests/run-ambient-scenarios.sh b/functional-tests/run-ambient-scenarios.sh deleted file mode 100755 index 6b53505c..00000000 --- a/functional-tests/run-ambient-scenarios.sh +++ /dev/null @@ -1,525 +0,0 @@ -#!/usr/bin/env bash -# Ambient-mesh regressions. Mirrors the core sidecar scenarios but flips -# istio.mode=ambient so ztunnel — not Envoy — carries the Valkey pod traffic. -# -# Rather than expanding the 32-scenario matrix to 96 (sidecar × ambient × on), -# this file concentrates on what's actually different in ambient: -# 1) Pods have no sidecar but still speak mTLS (via ztunnel HBONE). -# 2) DestinationRule is intentionally absent. -# 3) AuthorizationPolicy at L4 (ztunnel) scopes the cluster-bus port to -# same-release SPIFFE principals, preventing cross-release CLUSTER MEET. -# 4) No traffic.sidecar.istio.io/excludePorts annotations — they're -# sidecar-only and must not leak into the rendered pods. -# -# The sidecar matrix in run-all.sh already covers TLS/auth/shard/rep combos. -# Ambient is meaningful around the data-plane shape, so we sample one -# standalone, one replica, one cluster scenario — each with auth+TLS on to -# exercise the full ACL and mTLS paths. - -HERE=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=lib.sh -. "${HERE}/lib.sh" - -if ! istio_ambient_installed; then - log "Skipping ambient scenarios — ztunnel not installed" - exit 0 -fi - -RESULTS=() -pass() { RESULTS+=("PASS: $1"); } -fail() { RESULTS+=("FAIL: $1: $2"); return 1; } - -cleanup_release() { - hctl uninstall "${RELEASE}" 2>/dev/null || true - kctl delete pvc --selector="app.kubernetes.io/instance=${RELEASE}" --ignore-not-found >/dev/null -} - -testbench_ambient_exec() { - testbench_exec_in "${TESTBENCH_POD_AMBIENT}" "$@" -} - -# Assert the Valkey pod has NO Envoy sidecar (ambient-mode proof). -assert_no_sidecar() { - local pod=$1 name=$2 - if kctl get pod "${pod}" \ - -o jsonpath='{.spec.containers[*].name} {.spec.initContainers[*].name}' \ - | tr ' ' '\n' | grep -Fxq istio-proxy; then - fail "${name}" "pod ${pod} has an istio-proxy container in ambient mode" - return 1 - fi - return 0 -} - -# Assert the Valkey pod carries the ambient data-plane label. -assert_ambient_label() { - local pod=$1 name=$2 mode - mode=$(kctl get pod "${pod}" \ - -o jsonpath='{.metadata.labels.istio\.io/dataplane-mode}') - if [[ ${mode} != ambient ]]; then - fail "${name}" "pod ${pod} has istio.io/dataplane-mode=${mode:-}, want ambient" - return 1 - fi - return 0 -} - -# --------------------------------------------------------------------------- -# Scenario 1: standalone + ambient. Proves the basic ambient path. -# --------------------------------------------------------------------------- -scenario_standalone_ambient() { - local name="ambient: standalone pings via ztunnel mTLS" - log "SCENARIO: ${name}" - cleanup_release - - if ! hctl install "${RELEASE}" "${CHART_DIR}" \ - --set=istio.enabled=true \ - --set=istio.mode=ambient \ - --wait --timeout=180s >/dev/null; then - fail "${name}" "helm install failed"; return - fi - - local pod - pod=$(kctl get pod -l "app.kubernetes.io/instance=${RELEASE}" \ - -o jsonpath='{.items[0].metadata.name}') - - assert_no_sidecar "${pod}" "${name}" || return - assert_ambient_label "${pod}" "${name}" || return - - # DestinationRule must NOT be rendered in ambient mode. - if kctl get destinationrule "${RELEASE}" >/dev/null 2>&1; then - fail "${name}" "DestinationRule/${RELEASE} must not exist in ambient mode" - return - fi - - # PeerAuthentication must be present (enforced by ztunnel). - if ! kctl get peerauthentication "${RELEASE}" >/dev/null 2>&1; then - fail "${name}" "PeerAuthentication/${RELEASE} missing" - return - fi - - # Connectivity from the ambient-enrolled testbench. - local pong - pong=$(testbench_ambient_exec \ - valkey-cli -h "valkey.${NAMESPACE}.svc.cluster.local" ping | tr -d '\r\n') - if [[ ${pong} != PONG ]]; then - fail "${name}" "expected PONG, got '${pong}'"; return - fi - - cleanup_release - pass "${name}" -} - -# --------------------------------------------------------------------------- -# Scenario 2: cluster + ambient. Exercises the multi-pod case, the -# AuthorizationPolicy gate on the bus port, and the absence of the -# sidecar-specific exclude* annotations on the StatefulSet. -# --------------------------------------------------------------------------- -scenario_cluster_ambient() { - local name="ambient: cluster mode converges with AuthorizationPolicy gating bus port" - log "SCENARIO: ${name}" - cleanup_release - - if ! hctl install "${RELEASE}" "${CHART_DIR}" \ - --set=istio.enabled=true \ - --set=istio.mode=ambient \ - --set=cluster.enabled=true \ - --set=cluster.shards=3 \ - --set=cluster.replicasPerShard=0 \ - --set=cluster.persistence.size=100Mi \ - --wait --timeout=300s >/dev/null; then - fail "${name}" "helm install failed"; return - fi - kctl wait --for=condition=complete "job/${RELEASE}-cluster-init" --timeout=300s >/dev/null - - local pod - pod=$(kctl get pod -l "app.kubernetes.io/instance=${RELEASE}" \ - -o jsonpath='{.items[0].metadata.name}') - - assert_no_sidecar "${pod}" "${name}" || return - assert_ambient_label "${pod}" "${name}" || return - - # StatefulSet must NOT carry the sidecar-only exclude* annotations — if - # it does, the intent/reality have drifted (ambient has no Envoy to - # exclude ports from, and these leak would-be sidecar coupling into the - # ambient path). - local excl - excl=$(kctl get statefulset "${RELEASE}" \ - -o jsonpath='{.spec.template.metadata.annotations.traffic\.sidecar\.istio\.io/excludeInboundPorts}') - if [[ -n ${excl} ]]; then - fail "${name}" "traffic.sidecar.istio.io/excludeInboundPorts=${excl} leaked into ambient pod" - return - fi - - # AuthorizationPolicy must be present and scoped to the release principal. - local principals - principals=$(kctl get authorizationpolicy "${RELEASE}-cluster-bus" \ - -o jsonpath='{.spec.rules[0].from[0].source.principals[*]}' 2>/dev/null) - if [[ ${principals} != *"/sa/${RELEASE}"* ]]; then - fail "${name}" "AuthorizationPolicy principals=${principals} (want .../sa/${RELEASE}*)" - return - fi - - # Cluster must converge. - local state - for _ in $(seq 1 30); do - state=$(testbench_ambient_exec \ - valkey-cli -h "valkey.${NAMESPACE}.svc.cluster.local" \ - cluster info 2>/dev/null | awk -F: '/^cluster_state:/{print $2}' | tr -d '\r\n') - [[ ${state} == ok ]] && break - sleep 2 - done - if [[ ${state} != ok ]]; then - fail "${name}" "cluster_state=${state:-}, want ok"; return - fi - - cleanup_release - pass "${name}" -} - -# --------------------------------------------------------------------------- -# Scenario 3: auth + TLS + cluster + ambient. End-to-end coverage of the -# app-level crypto (TLS) + ACL auth paths running INSIDE ztunnel's HBONE -# mTLS wrapper. If any of these layers fight, this scenario catches it. -# --------------------------------------------------------------------------- -scenario_cluster_ambient_tls_auth() { - local name="ambient: cluster+auth+TLS works end-to-end through ztunnel" - log "SCENARIO: ${name}" - cleanup_release - - if ! hctl install "${RELEASE}" "${CHART_DIR}" \ - --set=istio.enabled=true \ - --set=istio.mode=ambient \ - --set=tls.enabled=true \ - --set=tls.existingSecret="${TLS_SECRET}" \ - --set=auth.enabled=true \ - --set=auth.usersExistingSecret="${AUTH_SECRET}" \ - --set=auth.aclUsers.default.permissions='~* &* +@all' \ - --set=cluster.enabled=true \ - --set=cluster.shards=3 \ - --set=cluster.replicasPerShard=0 \ - --set=cluster.persistence.size=100Mi \ - --wait --timeout=300s >/dev/null; then - fail "${name}" "helm install failed"; return - fi - kctl wait --for=condition=complete "job/${RELEASE}-cluster-init" --timeout=300s >/dev/null - - # Positive check: authenticated TLS client converges. - local state - for _ in $(seq 1 30); do - state=$(testbench_ambient_exec valkey-cli \ - -h "valkey.${NAMESPACE}.svc.cluster.local" \ - --no-auth-warning \ - -a "${AUTH_PASSWORD}" \ - --tls --cacert /tls/ca.crt \ - cluster info 2>/dev/null | awk -F: '/^cluster_state:/{print $2}' | tr -d '\r\n') - [[ ${state} == ok ]] && break - sleep 2 - done - if [[ ${state} != ok ]]; then - fail "${name}" "cluster_state=${state:-}, want ok"; return - fi - - # Negative: missing auth still rejected even through ztunnel. - local out rc - set +e - out=$(testbench_ambient_exec valkey-cli \ - -h "valkey.${NAMESPACE}.svc.cluster.local" \ - --no-auth-warning --tls --cacert /tls/ca.crt \ - cluster info 2>&1) - rc=$? - set -e - if ! grep -qi 'NOAUTH' <<<"${out}"; then - fail "${name}" "expected NOAUTH, got (rc=${rc}): ${out}"; return - fi - - cleanup_release - pass "${name}" -} - -# --------------------------------------------------------------------------- -# Scenario 4: cross-release CLUSTER MEET must be blocked by the ambient -# AuthorizationPolicy. Analogous to scenario_two_clusters_isolated in the -# sidecar extras but driven at L4 via ztunnel rather than by NetworkPolicy. -# -# We install two cluster-mode releases in the same namespace, both in -# ambient mode with the chart's Kubernetes NetworkPolicy isolation turned -# OFF (`cluster.isolation.enabled=false`) — so the ONLY thing stopping the -# merge is the AuthorizationPolicy. Then we fire a MEET from A targeting B, -# wait out the node-timeout, and assert each cluster still sees 3 nodes. -# --------------------------------------------------------------------------- -install_ambient_cluster() { - local release=$1 - hctl install "${release}" "${CHART_DIR}" \ - --set=istio.enabled=true \ - --set=istio.mode=ambient \ - --set=cluster.enabled=true \ - --set=cluster.persistence.size=100Mi \ - --set=cluster.shards=3 \ - --set=cluster.replicasPerShard=0 \ - --set=cluster.isolation.enabled=false \ - --wait --timeout=300s >/dev/null -} - -count_cluster_nodes_ambient() { - local release=$1 - kctl exec "${release}-0" -c "${release}" -- sh -c \ - "valkey-cli cluster nodes 2>/dev/null | awk 'NF {print \$1}' | sort -u | wc -l" \ - 2>/dev/null | tr -d '[:space:]' || echo 0 -} - -poison_meet_ambient() { - local src_release=$1 dst_release=$2 dst_ip - dst_ip=$(kctl get pod "${dst_release}-0" -o jsonpath='{.status.podIP}') - [[ -n ${dst_ip} ]] || return 1 - kctl exec "${src_release}-0" -c "${src_release}" -- \ - valkey-cli cluster meet "${dst_ip}" 6379 >/dev/null 2>&1 || true -} - -cleanup_ambient_pair() { - hctl uninstall valkey-amb-a 2>/dev/null || true - hctl uninstall valkey-amb-b 2>/dev/null || true - kctl delete pvc --selector='app.kubernetes.io/instance=valkey-amb-a' --ignore-not-found >/dev/null - kctl delete pvc --selector='app.kubernetes.io/instance=valkey-amb-b' --ignore-not-found >/dev/null -} - -scenario_ambient_authz_blocks_cross_release_meet() { - local name="ambient: AuthorizationPolicy blocks cross-release CLUSTER MEET" - log "SCENARIO: ${name}" - cleanup_ambient_pair - - if ! install_ambient_cluster valkey-amb-a; then - fail "${name}" "install of valkey-amb-a failed"; cleanup_ambient_pair; return - fi - if ! install_ambient_cluster valkey-amb-b; then - fail "${name}" "install of valkey-amb-b failed"; cleanup_ambient_pair; return - fi - kctl wait --for=condition=complete job/valkey-amb-a-cluster-init --timeout=300s >/dev/null - kctl wait --for=condition=complete job/valkey-amb-b-cluster-init --timeout=300s >/dev/null - - local a_before b_before - a_before=$(count_cluster_nodes_ambient valkey-amb-a) - b_before=$(count_cluster_nodes_ambient valkey-amb-b) - if [[ ${a_before} != 3 || ${b_before} != 3 ]]; then - fail "${name}" "baseline wrong (a=${a_before}, b=${b_before}; want 3+3)" - cleanup_ambient_pair; return - fi - - poison_meet_ambient valkey-amb-a valkey-amb-b - - # Same rationale as the sidecar-mode isolation test: after the MEET, - # `cluster nodes` on A briefly shows 4 as a handshake placeholder. The - # real signal is post-settle. Node-timeout defaults to 15s; give it - # multiple intervals. - sleep 45 - - local a_after b_after - a_after=$(count_cluster_nodes_ambient valkey-amb-a) - b_after=$(count_cluster_nodes_ambient valkey-amb-b) - if [[ ${a_after} != 3 || ${b_after} != 3 ]]; then - fail "${name}" "clusters merged despite AuthorizationPolicy (a=${a_after}, b=${b_after}; want 3+3)" - cleanup_ambient_pair; return - fi - - cleanup_ambient_pair - pass "${name}" -} - -# --------------------------------------------------------------------------- -# Scenario 5: the chart must refuse to install in ambient+cluster mode when -# AuthorizationPolicy is explicitly disabled — dropping it leaves the bus -# port with NO cross-release protection (the chart also skips the -# NetworkPolicy in ambient mode to avoid blocking HBONE). We proved live -# during review that this silently ships an open bus port; the fix is to -# fail closed at install time. -# --------------------------------------------------------------------------- -scenario_ambient_ap_disabled_refused() { - local name="ambient: chart refuses install when authorizationPolicy.enabled=false + cluster" - log "SCENARIO: ${name}" - cleanup_release - - local out rc - set +e - out=$(hctl install "${RELEASE}" "${CHART_DIR}" \ - --set=istio.enabled=true \ - --set=istio.mode=ambient \ - --set=cluster.enabled=true \ - --set=cluster.shards=3 \ - --set=cluster.replicasPerShard=0 \ - --set=cluster.persistence.size=100Mi \ - --set=istio.authorizationPolicy.enabled=false \ - --dry-run 2>&1) - rc=$? - set -e - - if (( rc == 0 )); then - fail "${name}" "dry-run succeeded but should have failed: ${out}" - return - fi - if ! grep -q 'cluster-bus port unprotected' <<<"${out}"; then - fail "${name}" "got error without the expected message (rc=${rc}): ${out}" - return - fi - pass "${name}" -} - -# --------------------------------------------------------------------------- -# Scenario 6: the chart must refuse to install when ambient + cluster + -# serviceAccount.create=false (with no explicit name), because every release -# collapses to the namespace's `default` SA and the AP can no longer -# distinguish between them. Live-repro'd in review: two releases merged -# despite both having the AP rendered. The fix is to fail closed at install -# time and force the user to pick a distinct SA name (or let the chart -# create one). -# --------------------------------------------------------------------------- -scenario_ambient_shared_default_sa_refused() { - local name="ambient: chart refuses install when serviceAccount defaults to namespace-wide 'default'" - log "SCENARIO: ${name}" - cleanup_release - - local out rc - set +e - out=$(hctl install "${RELEASE}" "${CHART_DIR}" \ - --set=istio.enabled=true \ - --set=istio.mode=ambient \ - --set=cluster.enabled=true \ - --set=cluster.shards=3 \ - --set=cluster.replicasPerShard=0 \ - --set=cluster.persistence.size=100Mi \ - --set=serviceAccount.create=false \ - --dry-run 2>&1) - rc=$? - set -e - - if (( rc == 0 )); then - fail "${name}" "dry-run succeeded but should have failed: ${out}" - return - fi - if ! grep -q "serviceAccount.create=false AND serviceAccount.name empty" <<<"${out}"; then - fail "${name}" "got error without the expected message (rc=${rc}): ${out}" - return - fi - pass "${name}" -} - -# --------------------------------------------------------------------------- -# Scenario 7: custom trustDomain must propagate into the AuthorizationPolicy -# principal. A cluster with `istio.trustDomain=my.mesh.example.com` whose AP -# still emits `cluster.local/…` would self-deny: same-release callers -# present an identity under the CUSTOM trust domain but the AP's ALLOW rule -# only matches the hardcoded one — the cluster-bus port defaults-denies -# even for its own pods and the cluster never forms. -# We install with the chart's default (cluster.local) but prove the RENDER -# honours the override. Testing the failure mode in-cluster would require -# reconfiguring Istio's trust domain, which isn't a chart-level concern. -# --------------------------------------------------------------------------- -scenario_ambient_trustdomain_override() { - local name="ambient: AP principal follows istio.trustDomain override" - log "SCENARIO: ${name}" - cleanup_release - - if ! hctl install "${RELEASE}" "${CHART_DIR}" \ - --set=istio.enabled=true \ - --set=istio.mode=ambient \ - --set=cluster.enabled=true \ - --set=cluster.shards=3 \ - --set=cluster.replicasPerShard=0 \ - --set=cluster.persistence.size=100Mi \ - --set=istio.trustDomain=my.mesh.example.com \ - --wait --timeout=240s >/dev/null 2>&1; then - # Install will NOT converge because Istio actually uses cluster.local — - # that's a feature of this scenario. We only need the AP rendered to - # verify the principal string. - : - fi - - local principals - principals=$(kctl get authorizationpolicy "${RELEASE}-cluster-bus" \ - -o jsonpath='{.spec.rules[0].from[0].source.principals[*]}' 2>/dev/null) - if [[ ${principals} != "my.mesh.example.com/ns/${NAMESPACE}/sa/${RELEASE}" ]]; then - fail "${name}" "AP principals=${principals}, want my.mesh.example.com/ns/${NAMESPACE}/sa/${RELEASE}" - return - fi - - cleanup_release - pass "${name}" -} - -trap 'cleanup_release; cleanup_ambient_pair' EXIT - -# --------------------------------------------------------------------------- -# Scenario 8: Prometheus scraping the metrics exporter must work in -# ambient mode. The AuthorizationPolicy is ALLOW-only, which triggers -# default-deny for any non-matching traffic — if the chart forgets to -# include the metrics port in the open rule, production Prometheus stacks -# silently stop seeing Valkey metrics the moment someone enables Istio. -# --------------------------------------------------------------------------- -scenario_ambient_prometheus_scrape() { - local name="ambient: in-mesh Prometheus can scrape metrics exporter" - log "SCENARIO: ${name}" - cleanup_release - - if ! hctl install "${RELEASE}" "${CHART_DIR}" \ - --set=istio.enabled=true \ - --set=istio.mode=ambient \ - --set=cluster.enabled=true \ - --set=cluster.shards=3 \ - --set=cluster.replicasPerShard=0 \ - --set=cluster.persistence.size=100Mi \ - --set=metrics.enabled=true \ - --wait --timeout=300s >/dev/null; then - fail "${name}" "helm install failed"; return - fi - kctl wait --for=condition=complete "job/${RELEASE}-cluster-init" --timeout=300s >/dev/null - - # Launch a curl pod enrolled in ambient (same mesh-participation shape - # as an in-mesh Prometheus would have). - local scraper="scrape-${RELEASE}-$$" - kctl delete pod "${scraper}" --ignore-not-found --wait=true >/dev/null - kctl run "${scraper}" \ - --image=curlimages/curl \ - --labels='istio.io/dataplane-mode=ambient' \ - --restart=Never \ - --command -- sleep 300 >/dev/null - kctl wait --for=condition=Ready "pod/${scraper}" --timeout=120s >/dev/null - - local code out - set +e - out=$(kctl exec "${scraper}" -c "${scraper}" -- \ - curl -sS --max-time 10 -w '\nHTTP=%{http_code}\n' \ - "http://${RELEASE}-metrics.${NAMESPACE}.svc.cluster.local:9121/metrics" 2>&1) - set -e - code=$(awk -F= '/^HTTP=/{print $2}' <<<"${out}") - - kctl delete pod "${scraper}" --ignore-not-found --wait=false >/dev/null - - if [[ ${code} != "200" ]]; then - fail "${name}" "scrape returned HTTP=${code:-}, body was: ${out}" - return - fi - if ! grep -q '^redis_' <<<"${out}"; then - fail "${name}" "HTTP 200 but body lacks redis_* metrics" - return - fi - - cleanup_release - pass "${name}" -} - -scenario_standalone_ambient || true -scenario_cluster_ambient || true -scenario_cluster_ambient_tls_auth || true -scenario_ambient_authz_blocks_cross_release_meet || true -scenario_ambient_ap_disabled_refused || true -scenario_ambient_shared_default_sa_refused || true -scenario_ambient_trustdomain_override || true -scenario_ambient_prometheus_scrape || true - -echo -log "Ambient scenario summary" -passed=0; failed=0 -for r in "${RESULTS[@]}"; do - printf ' %s\n' "${r}" - [[ ${r} == PASS:* ]] && passed=$(( passed + 1 )) || failed=$(( failed + 1 )) -done -echo -log "Ambient: ${passed} passed, ${failed} failed" -(( failed == 0 )) diff --git a/functional-tests/run-extra-scenarios.sh b/functional-tests/run-extra-scenarios.sh index f95d4a09..e403bed0 100755 --- a/functional-tests/run-extra-scenarios.sh +++ b/functional-tests/run-extra-scenarios.sh @@ -355,14 +355,288 @@ scenario_isolation_off_lets_merge_happen() { pass "${name}" } -trap 'cleanup_release; cleanup_pair' EXIT - -scenario_aclconfig_metrics || true -scenario_default_deny_netpol || true -scenario_bus_port_hidden || true -scenario_readiness_probe_exists || true -scenario_two_clusters_isolated || true -scenario_isolation_off_lets_merge_happen|| true +# --------------------------------------------------------------------------- +# Ambient-only regressions. Each of these tests a behaviour that's +# independent of the tls/auth/shard/rep dimensions, so it lives here +# rather than inflating the matrix with 16 copies of the same assertion. +# Each self-skips if the cluster lacks the ambient data plane. +# --------------------------------------------------------------------------- + +install_ambient_cluster() { + local release=$1 + hctl install "${release}" "${CHART_DIR}" \ + --set=istio.enabled=true \ + --set=istio.mode=ambient \ + --set=cluster.enabled=true \ + --set=cluster.persistence.size=100Mi \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=0 \ + --set=cluster.isolation.enabled=false \ + --wait --timeout=300s >/dev/null +} + +count_cluster_nodes_ambient() { + local release=$1 + kctl exec "${release}-0" -c "${release}" -- sh -c \ + "valkey-cli cluster nodes 2>/dev/null | awk 'NF {print \$1}' | sort -u | wc -l" \ + 2>/dev/null | tr -d '[:space:]' || echo 0 +} + +poison_meet_ambient() { + local src_release=$1 dst_release=$2 dst_ip + dst_ip=$(kctl get pod "${dst_release}-0" -o jsonpath='{.status.podIP}') + [[ -n ${dst_ip} ]] || return 1 + kctl exec "${src_release}-0" -c "${src_release}" -- \ + valkey-cli cluster meet "${dst_ip}" 6379 >/dev/null 2>&1 || true +} + +cleanup_ambient_pair() { + hctl uninstall valkey-amb-a 2>/dev/null || true + hctl uninstall valkey-amb-b 2>/dev/null || true + kctl delete pvc --selector='app.kubernetes.io/instance=valkey-amb-a' --ignore-not-found >/dev/null + kctl delete pvc --selector='app.kubernetes.io/instance=valkey-amb-b' --ignore-not-found >/dev/null +} + +# Cross-release CLUSTER MEET must be blocked by the ambient +# AuthorizationPolicy. Analogous to scenario_two_clusters_isolated above +# but driven at L4 via ztunnel rather than by NetworkPolicy (the +# NetworkPolicy is intentionally skipped in ambient — it would drop +# HBONE). The ONLY thing stopping the merge here is the AP, so we +# disable cluster.isolation.enabled to force that. +scenario_ambient_authz_blocks_cross_release_meet() { + local name="ambient: AuthorizationPolicy blocks cross-release CLUSTER MEET" + log "SCENARIO: ${name}" + if ! istio_ambient_installed; then + log "SKIP: ${name} (ztunnel not installed)" + return + fi + cleanup_ambient_pair + + if ! install_ambient_cluster valkey-amb-a; then + fail "${name}" "install of valkey-amb-a failed"; cleanup_ambient_pair; return + fi + if ! install_ambient_cluster valkey-amb-b; then + fail "${name}" "install of valkey-amb-b failed"; cleanup_ambient_pair; return + fi + kctl wait --for=condition=complete job/valkey-amb-a-cluster-init --timeout=300s >/dev/null + kctl wait --for=condition=complete job/valkey-amb-b-cluster-init --timeout=300s >/dev/null + + local a_before b_before + a_before=$(count_cluster_nodes_ambient valkey-amb-a) + b_before=$(count_cluster_nodes_ambient valkey-amb-b) + if [[ ${a_before} != 3 || ${b_before} != 3 ]]; then + fail "${name}" "baseline wrong (a=${a_before}, b=${b_before}; want 3+3)" + cleanup_ambient_pair; return + fi + + poison_meet_ambient valkey-amb-a valkey-amb-b + + # Same rationale as the sidecar-mode isolation test: after the MEET, + # `cluster nodes` on A briefly shows 4 as a handshake placeholder. + # The real signal is post-settle. Node-timeout defaults to 15s; give + # it multiple intervals. + sleep 45 + + local a_after b_after + a_after=$(count_cluster_nodes_ambient valkey-amb-a) + b_after=$(count_cluster_nodes_ambient valkey-amb-b) + if [[ ${a_after} != 3 || ${b_after} != 3 ]]; then + fail "${name}" "clusters merged despite AuthorizationPolicy (a=${a_after}, b=${b_after}; want 3+3)" + cleanup_ambient_pair; return + fi + + cleanup_ambient_pair + pass "${name}" +} + +# The chart must refuse to install in ambient+cluster mode when the +# AuthorizationPolicy is explicitly disabled — dropping it leaves the bus +# port with NO cross-release protection (the NetworkPolicy is also +# skipped in ambient to avoid blocking HBONE). Fail-closed at template +# time so nobody silently ships an open cluster. +scenario_ambient_ap_disabled_refused() { + local name="ambient: chart refuses install when authorizationPolicy.enabled=false + cluster" + log "SCENARIO: ${name}" + cleanup_release + + local out rc + set +e + out=$(hctl install "${RELEASE}" "${CHART_DIR}" \ + --set=istio.enabled=true \ + --set=istio.mode=ambient \ + --set=cluster.enabled=true \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=0 \ + --set=cluster.persistence.size=100Mi \ + --set=istio.authorizationPolicy.enabled=false \ + --dry-run 2>&1) + rc=$? + set -e + + if (( rc == 0 )); then + fail "${name}" "dry-run succeeded but should have failed: ${out}" + return + fi + if ! grep -q 'cluster-bus port unprotected' <<<"${out}"; then + fail "${name}" "got error without the expected message (rc=${rc}): ${out}" + return + fi + pass "${name}" +} + +# The chart must refuse when ambient + cluster + serviceAccount.create=false +# with no explicit name, because every release collapses to the namespace's +# `default` SA and the AP can no longer distinguish releases. Repro'd live +# during review: two clusters merged despite both having the AP rendered. +scenario_ambient_shared_default_sa_refused() { + local name="ambient: chart refuses install when serviceAccount defaults to namespace-wide 'default'" + log "SCENARIO: ${name}" + cleanup_release + + local out rc + set +e + out=$(hctl install "${RELEASE}" "${CHART_DIR}" \ + --set=istio.enabled=true \ + --set=istio.mode=ambient \ + --set=cluster.enabled=true \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=0 \ + --set=cluster.persistence.size=100Mi \ + --set=serviceAccount.create=false \ + --dry-run 2>&1) + rc=$? + set -e + + if (( rc == 0 )); then + fail "${name}" "dry-run succeeded but should have failed: ${out}" + return + fi + if ! grep -q "serviceAccount.create=false AND serviceAccount.name empty" <<<"${out}"; then + fail "${name}" "got error without the expected message (rc=${rc}): ${out}" + return + fi + pass "${name}" +} + +# Custom trustDomain must propagate into the AuthorizationPolicy principal. +# A cluster with `istio.trustDomain=my.mesh.example.com` whose AP still +# emits `cluster.local/…` would self-deny: same-release callers present an +# identity under the CUSTOM trust domain but the AP's ALLOW rule only +# matches the hardcoded one, so the bus port default-denies even for its +# own pods. +# We don't actually reconfigure Istio's trust domain here — that's a +# cluster-wide concern, not chart-level — so the install does NOT fully +# converge. The test inspects the rendered AP to confirm the principal +# string follows the override. That's the piece the chart owns. +scenario_ambient_trustdomain_override() { + local name="ambient: AP principal follows istio.trustDomain override" + log "SCENARIO: ${name}" + if ! istio_ambient_installed; then + log "SKIP: ${name} (ztunnel not installed)" + return + fi + cleanup_release + + if ! hctl install "${RELEASE}" "${CHART_DIR}" \ + --set=istio.enabled=true \ + --set=istio.mode=ambient \ + --set=cluster.enabled=true \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=0 \ + --set=cluster.persistence.size=100Mi \ + --set=istio.trustDomain=my.mesh.example.com \ + --wait --timeout=240s >/dev/null 2>&1; then + # Expected: install won't converge because the actual mesh trust + # domain is still cluster.local. We only need the AP rendered to + # verify the principal string. + : + fi + + local principals + principals=$(kctl get authorizationpolicy "${RELEASE}-cluster-bus" \ + -o jsonpath='{.spec.rules[0].from[0].source.principals[*]}' 2>/dev/null) + if [[ ${principals} != "my.mesh.example.com/ns/${NAMESPACE}/sa/${RELEASE}" ]]; then + fail "${name}" "AP principals=${principals}, want my.mesh.example.com/ns/${NAMESPACE}/sa/${RELEASE}" + return + fi + + cleanup_release + pass "${name}" +} + +# Prometheus scraping the metrics exporter must work in ambient mode. The +# AuthorizationPolicy is ALLOW-only, which triggers Istio default-deny for +# any non-matching traffic — if the chart forgets to include the metrics +# port in the open rule, production Prometheus stacks silently stop +# seeing Valkey metrics the moment someone enables Istio. +scenario_ambient_prometheus_scrape() { + local name="ambient: in-mesh Prometheus can scrape metrics exporter" + log "SCENARIO: ${name}" + if ! istio_ambient_installed; then + log "SKIP: ${name} (ztunnel not installed)" + return + fi + cleanup_release + + if ! hctl install "${RELEASE}" "${CHART_DIR}" \ + --set=istio.enabled=true \ + --set=istio.mode=ambient \ + --set=cluster.enabled=true \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=0 \ + --set=cluster.persistence.size=100Mi \ + --set=metrics.enabled=true \ + --wait --timeout=300s >/dev/null; then + fail "${name}" "helm install failed"; return + fi + kctl wait --for=condition=complete "job/${RELEASE}-cluster-init" --timeout=300s >/dev/null + + # An ambient-enrolled curl pod simulates an in-mesh Prometheus. + local scraper="scrape-${RELEASE}-$$" + kctl delete pod "${scraper}" --ignore-not-found --wait=true >/dev/null + kctl run "${scraper}" \ + --image=curlimages/curl \ + --labels='istio.io/dataplane-mode=ambient' \ + --restart=Never \ + --command -- sleep 300 >/dev/null + kctl wait --for=condition=Ready "pod/${scraper}" --timeout=120s >/dev/null + + local code out + set +e + out=$(kctl exec "${scraper}" -c "${scraper}" -- \ + curl -sS --max-time 10 -w '\nHTTP=%{http_code}\n' \ + "http://${RELEASE}-metrics.${NAMESPACE}.svc.cluster.local:9121/metrics" 2>&1) + set -e + code=$(awk -F= '/^HTTP=/{print $2}' <<<"${out}") + + kctl delete pod "${scraper}" --ignore-not-found --wait=false >/dev/null + + if [[ ${code} != "200" ]]; then + fail "${name}" "scrape returned HTTP=${code:-}, body was: ${out}" + return + fi + if ! grep -q '^redis_' <<<"${out}"; then + fail "${name}" "HTTP 200 but body lacks redis_* metrics" + return + fi + + cleanup_release + pass "${name}" +} + +trap 'cleanup_release; cleanup_pair; cleanup_ambient_pair' EXIT + +scenario_aclconfig_metrics || true +scenario_default_deny_netpol || true +scenario_bus_port_hidden || true +scenario_readiness_probe_exists || true +scenario_two_clusters_isolated || true +scenario_isolation_off_lets_merge_happen || true +scenario_ambient_authz_blocks_cross_release_meet || true +scenario_ambient_ap_disabled_refused || true +scenario_ambient_shared_default_sa_refused || true +scenario_ambient_trustdomain_override || true +scenario_ambient_prometheus_scrape || true echo log "Extra scenario summary" diff --git a/functional-tests/run-scenario.sh b/functional-tests/run-scenario.sh index 600d0149..4615e707 100755 --- a/functional-tests/run-scenario.sh +++ b/functional-tests/run-scenario.sh @@ -4,16 +4,19 @@ # # Usage: # ./run-scenario.sh -# Each arg is "on" or "off". Example: -# ./run-scenario.sh off off on on on -# drives the "TLS off, auth off, shard on, rep on, Istio on" scenario. +# tls/auth/shard/rep are on|off; istio is off|sidecar|ambient. +# Example: +# ./run-scenario.sh off off on on ambient +# drives the "TLS off, auth off, shard on, rep on, Istio ambient" scenario. HERE=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=lib.sh . "${HERE}/lib.sh" if (( $# != 5 )); then - echo "usage: $0 (each on|off)" >&2 + echo "usage: $0 " >&2 + echo " tls/auth/shard/rep: on|off" >&2 + echo " istio: off|sidecar|ambient" >&2 exit 2 fi @@ -23,18 +26,29 @@ on_or_off() { *) echo "expected 'on' or 'off', got: $1" >&2; return 1 ;; esac } -for v in "$@"; do on_or_off "${v}"; done +for v in "$1" "$2" "$3" "$4"; do on_or_off "${v}"; done +case "$5" in + off|sidecar|ambient) ;; + *) echo "expected istio=off|sidecar|ambient, got: $5" >&2; exit 2 ;; +esac TLS=$1; AUTH=$2; SHARD=$3; REP=$4; ISTIO=$5 SCENARIO="tls=${TLS} auth=${AUTH} shard=${SHARD} rep=${REP} istio=${ISTIO}" -is_on() { [[ $1 == on ]]; } +is_on() { [[ $1 == on ]]; } +is_mesh() { [[ ${ISTIO} != off ]]; } +is_sidecar() { [[ ${ISTIO} == sidecar ]]; } +is_ambient() { [[ ${ISTIO} == ambient ]]; } -if is_on "${ISTIO}"; then - TESTBENCH=${TESTBENCH_POD_INJECTED} -else - TESTBENCH=${TESTBENCH_POD} -fi +# Pick a testbench that shares the right mesh participation with the chart +# workload — that's the only way the in-mesh connectivity checks reflect +# what an in-production client on the same mesh would experience. The three +# testbench flavours are launched once by setup.sh. +case "${ISTIO}" in + off) TESTBENCH=${TESTBENCH_POD} ;; + sidecar) TESTBENCH=${TESTBENCH_POD_INJECTED} ;; + ambient) TESTBENCH=${TESTBENCH_POD_AMBIENT} ;; +esac testbench_exec() { testbench_exec_in "${TESTBENCH}" "$@"; } # --------------------------------------------------------------------------- @@ -42,15 +56,15 @@ testbench_exec() { testbench_exec_in "${TESTBENCH}" "$@"; } # --------------------------------------------------------------------------- helm_flags=() -if is_on "${ISTIO}"; then - # Let Envoy get injected into every chart pod; turn on the chart's Istio - # templates. The chart pins sidecar.istio.io/inject=true on every pod - # itself, so no namespace-level label is required. - helm_flags+=(--set=istio.enabled=true) +if is_mesh; then + helm_flags+=( + --set=istio.enabled=true + "--set=istio.mode=${ISTIO}" + ) fi -# istio=off needs no extra flags: with the namespace unlabelled and -# istio.enabled=false, the chart emits zero mesh labels and pods stay out -# of both data planes. +# istio=off needs no extra flags: the chart emits zero mesh labels when +# istio.enabled=false, and setup.sh leaves the namespace unlabelled so +# pods stay out of both data planes by default. if is_on "${AUTH}"; then helm_flags+=( @@ -152,38 +166,125 @@ assert_eq() { fi } -# Istio resources: PeerAuthentication + DestinationRule (headless DR only exists -# in replica / cluster mode) should be present iff istio=on. -if is_on "${ISTIO}"; then - log "Istio check: chart-owned resources must exist" - kctl get peerauthentication "${RELEASE}" >/dev/null \ - || fail "PeerAuthentication/${RELEASE} missing" - kctl get destinationrule "${RELEASE}" >/dev/null \ - || fail "DestinationRule/${RELEASE} missing" - if is_on "${SHARD}" || is_on "${REP}"; then - kctl get destinationrule "${RELEASE}-headless" >/dev/null \ - || fail "DestinationRule/${RELEASE}-headless missing" - fi +# Pick any chart pod so mode-specific checks can inspect live container / +# label state. The first matching pod is fine — all pods in a release +# share the same mesh participation shape. +pod=$(kctl get pod -l "app.kubernetes.io/instance=${RELEASE}" \ + -o jsonpath='{.items[0].metadata.name}') - # Chart pods must actually have the Envoy sidecar. Istio >=1.29 injects it - # as a native sidecar (initContainer with restartPolicy=Always), so check - # both containers and initContainers. - pod=$(kctl get pod -l "app.kubernetes.io/instance=${RELEASE}" \ - -o jsonpath='{.items[0].metadata.name}') - if ! kctl get pod "${pod}" \ - -o jsonpath='{.spec.containers[*].name} {.spec.initContainers[*].name}' \ - | tr ' ' '\n' | grep -Fxq istio-proxy; then - fail "pod ${pod} has no istio-proxy container" - fi -else - log "Istio check: chart-owned resources must be absent" - if kctl get peerauthentication "${RELEASE}" >/dev/null 2>&1; then - fail "PeerAuthentication/${RELEASE} should not exist when istio=off" - fi - if kctl get destinationrule "${RELEASE}" >/dev/null 2>&1; then - fail "DestinationRule/${RELEASE} should not exist when istio=off" - fi -fi +# Chart-owned Istio resources should be present iff istio is enabled. +# PeerAuthentication is mode-neutral (enforced by Envoy in sidecar, ztunnel +# in ambient). DestinationRule is sidecar-only — ambient's ztunnel HBONE +# supersedes it. AuthorizationPolicy renders only in cluster mode. +case "${ISTIO}" in + off) + log "Istio check: chart-owned resources must be absent" + if kctl get peerauthentication "${RELEASE}" >/dev/null 2>&1; then + fail "PeerAuthentication/${RELEASE} should not exist when istio=off" + fi + if kctl get destinationrule "${RELEASE}" >/dev/null 2>&1; then + fail "DestinationRule/${RELEASE} should not exist when istio=off" + fi + if kctl get authorizationpolicy "${RELEASE}-cluster-bus" >/dev/null 2>&1; then + fail "AuthorizationPolicy/${RELEASE}-cluster-bus should not exist when istio=off" + fi + # Pod must have no istio-proxy container. + if kctl get pod "${pod}" \ + -o jsonpath='{.spec.containers[*].name} {.spec.initContainers[*].name}' \ + | tr ' ' '\n' | grep -Fxq istio-proxy; then + fail "pod ${pod} has an istio-proxy container when istio=off" + fi + ;; + sidecar) + log "Istio check: sidecar-mode resources must exist" + kctl get peerauthentication "${RELEASE}" >/dev/null \ + || fail "PeerAuthentication/${RELEASE} missing in sidecar mode" + kctl get destinationrule "${RELEASE}" >/dev/null \ + || fail "DestinationRule/${RELEASE} missing in sidecar mode" + if is_on "${SHARD}" || is_on "${REP}"; then + kctl get destinationrule "${RELEASE}-headless" >/dev/null \ + || fail "DestinationRule/${RELEASE}-headless missing in sidecar mode" + fi + # Istio >=1.29 injects as a native sidecar (initContainer with + # restartPolicy=Always), so check both containers and initContainers. + if ! kctl get pod "${pod}" \ + -o jsonpath='{.spec.containers[*].name} {.spec.initContainers[*].name}' \ + | tr ' ' '\n' | grep -Fxq istio-proxy; then + fail "pod ${pod} has no istio-proxy container in sidecar mode" + fi + if is_on "${SHARD}"; then + # AP renders only in cluster mode, but it applies in BOTH sidecar + # and ambient. Verify once per mode so a sidecar-only regression + # (e.g. dropping the AP when !ambient) can't hide. + kctl get authorizationpolicy "${RELEASE}-cluster-bus" >/dev/null \ + || fail "AuthorizationPolicy/${RELEASE}-cluster-bus missing in sidecar+cluster mode" + # The bus-port exclude annotations are sidecar-only (ambient has + # no Envoy to exclude ports from). + excl=$(kctl get statefulset "${RELEASE}" \ + -o jsonpath='{.spec.template.metadata.annotations.traffic\.sidecar\.istio\.io/excludeInboundPorts}') + if [[ ${excl} != "16379" ]]; then + fail "traffic.sidecar.istio.io/excludeInboundPorts=${excl:-}, want '16379' in sidecar+cluster" + fi + else + # AP is cluster-mode only. Don't render for standalone/replica. + if kctl get authorizationpolicy "${RELEASE}-cluster-bus" >/dev/null 2>&1; then + fail "AuthorizationPolicy/${RELEASE}-cluster-bus should not render outside cluster mode" + fi + fi + ;; + ambient) + log "Istio check: ambient-mode resources must exist" + kctl get peerauthentication "${RELEASE}" >/dev/null \ + || fail "PeerAuthentication/${RELEASE} missing in ambient mode" + # DestinationRule is sidecar-only; a DR in ambient requires a + # waypoint proxy and layers a second mTLS inside ztunnel's HBONE. + if kctl get destinationrule "${RELEASE}" >/dev/null 2>&1; then + fail "DestinationRule/${RELEASE} must not exist in ambient mode" + fi + if kctl get destinationrule "${RELEASE}-headless" >/dev/null 2>&1; then + fail "DestinationRule/${RELEASE}-headless must not exist in ambient mode" + fi + # Ambient has no sidecar — ztunnel handles HBONE at the node. If any + # chart pod picks one up, our inject=false label is being ignored. + if kctl get pod "${pod}" \ + -o jsonpath='{.spec.containers[*].name} {.spec.initContainers[*].name}' \ + | tr ' ' '\n' | grep -Fxq istio-proxy; then + fail "pod ${pod} has an istio-proxy container in ambient mode" + fi + dpmode=$(kctl get pod "${pod}" -o jsonpath='{.metadata.labels.istio\.io/dataplane-mode}') + if [[ ${dpmode} != ambient ]]; then + fail "pod ${pod} has istio.io/dataplane-mode=${dpmode:-}, want ambient" + fi + if is_on "${SHARD}"; then + # Ambient skips the cluster-isolation NetworkPolicy (it would + # drop HBONE) and relies entirely on the AP at the ztunnel + # layer. Verify both halves of that swap. + kctl get authorizationpolicy "${RELEASE}-cluster-bus" >/dev/null \ + || fail "AuthorizationPolicy/${RELEASE}-cluster-bus missing in ambient+cluster mode" + if kctl get networkpolicy "${RELEASE}-cluster-isolation" >/dev/null 2>&1; then + fail "NetworkPolicy/${RELEASE}-cluster-isolation must not exist in ambient+cluster mode" + fi + # Sidecar-only exclude annotations must not leak through. + excl=$(kctl get statefulset "${RELEASE}" \ + -o jsonpath='{.spec.template.metadata.annotations.traffic\.sidecar\.istio\.io/excludeInboundPorts}') + if [[ -n ${excl} ]]; then + fail "traffic.sidecar.istio.io/excludeInboundPorts=${excl} leaked into ambient pod" + fi + # And the AP bus rule must be scoped to this release's SPIFFE + # principal, not a wildcard or missing `from` — that's the whole + # point of the ambient cross-release isolation promise. + principals=$(kctl get authorizationpolicy "${RELEASE}-cluster-bus" \ + -o jsonpath='{.spec.rules[0].from[0].source.principals[*]}') + if [[ ${principals} != *"/sa/${RELEASE}" ]]; then + fail "AuthorizationPolicy principals=${principals}, want .../sa/${RELEASE}" + fi + else + if kctl get authorizationpolicy "${RELEASE}-cluster-bus" >/dev/null 2>&1; then + fail "AuthorizationPolicy/${RELEASE}-cluster-bus should not render outside cluster mode" + fi + fi + ;; +esac # Positive: the fully-correct invocation should succeed. log "Positive check" From 2d6fa242b40cb4ba89ea8b899674009d96d86235 Mon Sep 17 00:00:00 2001 From: Ankit Pati Date: Sun, 3 May 2026 20:40:17 +0530 Subject: [PATCH 14/23] feat: `CLUSTER FAILOVER` upon `rollout restart` Signed-off-by: Ankit Pati --- functional-tests/run-extra-scenarios.sh | 182 ++++++++++++++++++++++ valkey/scripts/cluster-prestop-script.sh | 168 ++++++++++++++++++++ valkey/templates/cluster-script.yaml | 2 + valkey/templates/cluster-statefulset.yaml | 21 +++ valkey/tests/cluster_test.yaml | 164 ++++++++++++++++++- valkey/values.schema.json | 16 ++ valkey/values.yaml | 36 +++++ 7 files changed, 587 insertions(+), 2 deletions(-) create mode 100644 valkey/scripts/cluster-prestop-script.sh diff --git a/functional-tests/run-extra-scenarios.sh b/functional-tests/run-extra-scenarios.sh index e403bed0..57a1510a 100755 --- a/functional-tests/run-extra-scenarios.sh +++ b/functional-tests/run-extra-scenarios.sh @@ -624,6 +624,187 @@ scenario_ambient_prometheus_scrape() { pass "${name}" } +# --------------------------------------------------------------------------- +# Scenario: `kubectl rollout restart` on a replicated cluster must not cause +# client-visible disruption. The preStop hook runs `CLUSTER FAILOVER` on +# every primary before SIGTERM, so the shard already has a new primary by +# the time the old pod terminates. We assert this by: +# +# 1) Installing cluster.shards=3, cluster.replicasPerShard=1 (6 pods). +# 2) Recording each pod's role (master/slave) — this is our baseline. +# 3) Writing a known key through any pod (cluster redirects handle placement). +# 4) `kubectl rollout restart` the STS and waiting for the rollout. +# 5) Re-checking cluster_state, master/slave counts, and the key's value. +# 6) Comparing new roles to baseline: since every primary is asked to hand +# off to its own replica, every primary/replica pair should have flipped +# ordinals. We assert AT LEAST ONE pod's role changed — any weaker check +# would pass even if the hook never ran and the cluster simply waited +# through node-timeout failovers. +# +# If the preStop hook is broken or absent, steps 5-6 still "work" in the sense +# that the cluster eventually self-heals via node-timeout, but: +# - there's a 15s+ window of unavailability per primary, +# - and the pod role stays the same after restart (the restarted pod +# re-joins as primary because its nodes.conf persisted), so the role-flip +# assertion catches it. +# --------------------------------------------------------------------------- +scenario_rollout_restart_orderly_failover() { + local name="rollout restart performs orderly CLUSTER FAILOVER (no client-visible gap)" + log "SCENARIO: ${name}" + cleanup_release + + # nodeTimeout pinned high (3 min) so cluster-node-timeout auto-failover + # CANNOT fire during the rollout — a normal per-pod restart takes ~10-30s + # and the whole rollout ~2-3min, so with a 15s default timeout the + # observed role-flip signal could be produced either by preStop OR by + # auto-failover of an in-flight primary. Bumping to 180s guarantees any + # observed flip is the work of preStop. + if ! hctl install "${RELEASE}" "${CHART_DIR}" \ + --set=cluster.enabled=true \ + --set=cluster.persistence.size=100Mi \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=1 \ + --set=cluster.nodeTimeout=180000 \ + --wait --timeout=300s >/dev/null; then + fail "${name}" "helm install failed" + return + fi + kctl wait --for=condition=complete "job/${RELEASE}-cluster-init" --timeout=300s >/dev/null + + # Gossip convergence lags job completion: the init Job returns "done" + # once `cluster create` is ACK'd, but `cluster_state:ok` requires every + # node to have seen every other node's PING/PONG. Writing canary data + # or triggering a rollout before that window closes lets the preStop + # script's own `cluster_state != ok` early-exit fire, bypassing the + # graceful FAILOVER and silently dropping in-memory writes when the + # primary pod is replaced. + local s + for _ in $(seq 1 60); do + s=$(kctl exec "${RELEASE}-0" -c "${RELEASE}" -- \ + valkey-cli cluster info 2>/dev/null \ + | awk -F: '/^cluster_state:/{print $2}' | tr -d '\r\n' || true) + [[ ${s} == ok ]] && break + sleep 2 + done + if [[ ${s} != ok ]]; then + fail "${name}" "cluster_state=${s:-} after install (want ok before rollout)" + cleanup_release; return + fi + + # Capture the role of every pod pre-restart. Keyed by pod ordinal so we + # can compare "same ordinal, different role" after. + snapshot_roles() { + local n=6 i role + for i in $(seq 0 $((n - 1))); do + role=$(kctl exec "${RELEASE}-${i}" -c "${RELEASE}" -- \ + valkey-cli info replication 2>/dev/null \ + | awk -F: '/^role:/{print $2}' | tr -d '\r\n' || true) + printf '%s=%s\n' "${i}" "${role}" + done + } + + local before + before=$(snapshot_roles) + local masters_before slaves_before + masters_before=$(printf '%s\n' "${before}" | grep -c '=master' || true) + slaves_before=$(printf '%s\n' "${before}" | grep -c '=slave\|=replica' || true) + if [[ ${masters_before} != 3 || ${slaves_before} != 3 ]]; then + fail "${name}" "baseline wrong: masters=${masters_before} slaves=${slaves_before} (want 3+3)" + cleanup_release; return + fi + + # Write a canary key so we can prove data integrity after the rollout. + # Must write through a CLUSTER-aware client so slot routing works — + # valkey-cli -c follows MOVED redirects. The value contains shell + # metacharacters for the same reason AUTH_PASSWORD does. + local canary_key="prestop-canary-$$" + local canary_val='rollout-ok $shell "quote" \back`tick`' + if ! kctl exec "${RELEASE}-0" -c "${RELEASE}" -- \ + valkey-cli -c set "${canary_key}" "${canary_val}" >/dev/null 2>&1; then + fail "${name}" "initial SET failed" + cleanup_release; return + fi + + # The actual rollout. Default updateStrategy=RollingUpdate → pods + # restart one at a time from highest ordinal (podManagementPolicy + # controls creation/deletion parallelism, not rolling-update pacing). + # Each primary-pod restart should trigger a preStop FAILOVER; each + # replica-pod restart should no-op. + log "triggering rollout restart" + kctl rollout restart "statefulset/${RELEASE}" >/dev/null + + # Rollout must complete within terminationGracePeriodSeconds * 6 + a + # little slack — each pod can take up to the grace period in the + # worst case (preStop timeout + SIGTERM flush). + if ! kctl rollout status "statefulset/${RELEASE}" --timeout=600s >/dev/null; then + fail "${name}" "rollout status never converged" + cleanup_release; return + fi + + # Give gossip a moment to settle post-rollout — cluster_state flips to + # :ok only after every node sees every other node, and the last pod to + # restart may still be converging when rollout status returns. + local state + for _ in $(seq 1 30); do + state=$(kctl exec "${RELEASE}-0" -c "${RELEASE}" -- \ + valkey-cli cluster info 2>/dev/null \ + | awk -F: '/^cluster_state:/{print $2}' | tr -d '\r\n' || true) + [[ ${state} == ok ]] && break + sleep 2 + done + if [[ ${state} != ok ]]; then + fail "${name}" "cluster_state=${state:-} after rollout (want ok)" + cleanup_release; return + fi + + # Still 3 masters / 3 slaves — i.e. the handovers completed and every + # shard has the right shape. + local after masters_after slaves_after + after=$(snapshot_roles) + masters_after=$(printf '%s\n' "${after}" | grep -c '=master' || true) + slaves_after=$(printf '%s\n' "${after}" | grep -c '=slave\|=replica' || true) + if [[ ${masters_after} != 3 || ${slaves_after} != 3 ]]; then + fail "${name}" "post-rollout shape wrong: masters=${masters_after} slaves=${slaves_after} (want 3+3)" + cleanup_release; return + fi + + # Canary key survives (via MOVED redirect if the slot moved to a + # different primary). + local got + got=$(kctl exec "${RELEASE}-0" -c "${RELEASE}" -- \ + valkey-cli -c get "${canary_key}" 2>/dev/null || true) + if [[ ${got} != "${canary_val}" ]]; then + fail "${name}" "canary key lost: got='${got}' want='${canary_val}'" + cleanup_release; return + fi + + # Expect every primary's ordinal to flip: the rollout restarts each pod + # once, each primary-pod restart's preStop hands off to a replica, and + # the ex-primary returns as replica. So of the 3 original primaries, + # all 3 should now be replicas on those ordinals ⇒ at least 3 flips. + # With nodeTimeout pinned high above, no other mechanism can produce + # flips during the rollout window, so this is a precise signal. + # A broken / missing preStop yields 0 flips (every pod persists its + # role in nodes.conf and rejoins as that role). + local flips=0 line ordinal role_before role_after + for line in ${before}; do + ordinal=${line%=*} + role_before=${line#*=} + role_after=$(printf '%s\n' "${after}" | awk -F= -v o="${ordinal}" '$1 == o {print $2}') + if [[ ${role_before} != "${role_after}" ]]; then + flips=$(( flips + 1 )) + fi + done + if (( flips < 3 )); then + fail "${name}" "only ${flips}/6 ordinals flipped — expected >=3 (every primary's preStop should hand off to a replica). before='${before}' after='${after}'" + cleanup_release; return + fi + log "roles flipped on ${flips}/6 pods — handover ran" + + cleanup_release + pass "${name}" +} + trap 'cleanup_release; cleanup_pair; cleanup_ambient_pair' EXIT scenario_aclconfig_metrics || true @@ -632,6 +813,7 @@ scenario_bus_port_hidden || true scenario_readiness_probe_exists || true scenario_two_clusters_isolated || true scenario_isolation_off_lets_merge_happen || true +scenario_rollout_restart_orderly_failover || true scenario_ambient_authz_blocks_cross_release_meet || true scenario_ambient_ap_disabled_refused || true scenario_ambient_shared_default_sa_refused || true diff --git a/valkey/scripts/cluster-prestop-script.sh b/valkey/scripts/cluster-prestop-script.sh new file mode 100644 index 00000000..308caf72 --- /dev/null +++ b/valkey/scripts/cluster-prestop-script.sh @@ -0,0 +1,168 @@ +#!/bin/sh +# preStop hook for cluster-mode Valkey pods: orchestrate an orderly +# CLUSTER FAILOVER before kubelet sends SIGTERM. +# +# Problem this solves +# ------------------- +# A rollout restart (or any voluntary pod eviction) sends SIGTERM to Valkey +# and — 30 seconds later by default — SIGKILL. Without a preStop hook, a +# primary pod dies with open client connections; the TCP sockets close +# abruptly, connection pools fill with dead handles, the app errors out on +# every pooled command, and the cluster takes up to cluster-node-timeout +# (15s default) to promote a replica. That is the behaviour the bug report +# describes. +# +# The fix: before the SIGTERM, detect if this pod is a primary; if so, ask +# one of its own replicas to run `CLUSTER FAILOVER`. Valkey then performs +# the canonical orderly handover — the primary pauses new writes, both +# sides sync replication offsets, the replica promotes, the old primary +# demotes to replica. Clients with cluster-topology refresh see the new +# primary immediately via MOVED; existing connections close cleanly as +# part of the demotion. No SIGTERM-during-write window, no pooled dead +# connections, no visible blip. +# +# No-op paths (deliberately best-effort — a failing preStop must never +# block pod shutdown; the old abrupt behaviour is still strictly better +# than hanging in Terminating): +# * This pod is already a replica — losing a replica is invisible to +# clients, no failover needed. +# * Shard has no replicas (cluster.replicasPerShard=0) — nothing to fail +# over to, accept the abrupt close as a topology choice. +# * This pod has no healthy replica of its own (all its replicas are +# marked fail) — skip; FAILOVER would target nothing. +# * Any vcli command fails — log and exit 0. +# +# Notably NOT a no-op path: cluster_state:fail. That state is expected +# mid-rollout (slots briefly uncovered between restarts). Skipping the +# hook there would perpetuate the degraded state by letting every +# subsequent primary also die abruptly. +# +# This script is templated at Helm render time so it can inline the same +# TLS/auth plumbing the cluster-init script uses. Keeping them separate +# (rather than a shared sourced helper) is intentional: Helm's text- +# template model makes shared sh includes fragile, the code is short, and +# the two scripts evolve independently. +set -eu + +log() { echo "preStop: $*" >&2; } + +PORT="{{ .Values.service.port }}" +TIMEOUT={{ .Values.cluster.preStopFailover.timeoutSeconds }} + +# Self-FQDN (matches what init_config.yaml announces via +# cluster-announce-hostname). Using 127.0.0.1 would work for TCP but +# break TLS SAN verification — the server cert's SAN lists the FQDN, not +# the loopback. Same rationale applies to the replica endpoint below. +SELF_FQDN="${HOSTNAME}.{{ include "valkey.headlessServiceName" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}" + +{{- if and .Values.auth.enabled .Values.auth.aclUsers }} +{{- $replUsername := .Values.cluster.replicationUser }} +{{- $replUser := index .Values.auth.aclUsers $replUsername }} +{{- $replPasswordKey := $replUser.passwordKey | default $replUsername }} +{{- if .Values.auth.usersExistingSecret }} +if [ -f "/valkey-users-secret/{{ $replPasswordKey }}" ]; then + REDISCLI_AUTH=$(cat "/valkey-users-secret/{{ $replPasswordKey }}") +elif [ -f "/valkey-auth-secret/{{ $replUsername }}-password" ]; then + REDISCLI_AUTH=$(cat "/valkey-auth-secret/{{ $replUsername }}-password") +else + log "no password found for user {{ $replUsername }}; cannot authenticate preStop" + exit 0 +fi +{{- else }} +if [ -f "/valkey-auth-secret/{{ $replUsername }}-password" ]; then + REDISCLI_AUTH=$(cat "/valkey-auth-secret/{{ $replUsername }}-password") +else + log "no password found for user {{ $replUsername }}; cannot authenticate preStop" + exit 0 +fi +{{- end }} +export REDISCLI_AUTH +{{- end }} + +vcli() { +{{- if .Values.tls.enabled }} + valkey-cli --no-auth-warning --tls --cacert "/tls/{{ .Values.tls.caPublicKey }}" "$@" +{{- else }} + valkey-cli --no-auth-warning "$@" +{{- end }} +} + +# We do NOT gate on cluster_state here. A rollout restarts pods one at a +# time, and between restarts this node sees cluster_state:fail until +# gossip observes the previous pod rejoin — exactly the window this +# preStop is meant to close. Skipping FAILOVER there would defeat the +# hook: without it, SIGTERM takes the primary's slots offline and the +# next pod also sees cluster_state:fail, perpetuating the degraded state +# for the rest of the rollout. We rely instead on CLUSTER FAILOVER's +# own preconditions (a healthy, caught-up replica) to decide whether the +# handover is safe. +role=$(vcli -h "${SELF_FQDN}" -p "${PORT}" info replication 2>/dev/null | awk -F: '/^role:/{print $2}' | tr -d '\r\n' || true) +case "${role}" in + master) ;; + slave|replica) + log "role=${role}; no failover needed" + exit 0 + ;; + *) + log "unexpected role=${role:-}; not attempting failover" + exit 0 + ;; +esac + +my_id=$(vcli -h "${SELF_FQDN}" -p "${PORT}" cluster myid 2>/dev/null | tr -d '\r\n' || true) +if [ -z "${my_id}" ]; then + log "cluster myid empty; not attempting failover" + exit 0 +fi + +# CLUSTER REPLICAS returns a subset of CLUSTER NODES, one line per +# replica of this primary, in the same eight-field format. We want a live +# (non-failing), online replica. Field 2 is the announce endpoint +# "host:port@busport[,hostname]"; Helm sets +# cluster-preferred-endpoint-type=hostname in init_config.yaml, so the +# host half is a DNS name that matches the TLS SAN when TLS is enabled. +replica_line=$(vcli -h "${SELF_FQDN}" -p "${PORT}" cluster replicas "${my_id}" 2>/dev/null \ + | awk '!/fail/ && NF' \ + | head -n1 || true) +if [ -z "${replica_line}" ]; then + log "no healthy replica for this primary; skipping failover" + exit 0 +fi + +endpoint=$(printf '%s\n' "${replica_line}" | awk '{print $2}' | cut -d@ -f1) +replica_host=${endpoint%:*} +replica_port=${endpoint##*:} + +if [ -z "${replica_host}" ] || [ -z "${replica_port}" ]; then + log "could not parse replica endpoint from '${replica_line}'; skipping failover" + exit 0 +fi + +log "primary ${my_id}; asking replica ${replica_host}:${replica_port} to take over" + +# Plain CLUSTER FAILOVER (no FORCE/TAKEOVER) is the graceful path: the +# replica negotiates with the primary, waits for replication-offset sync, +# then promotes. If the replica is too far behind or the primary is +# unreachable, it returns an error — we then exit 0 and let SIGTERM run. +if ! vcli -h "${replica_host}" -p "${replica_port}" cluster failover 2>/dev/null; then + log "CLUSTER FAILOVER rejected; proceeding with abrupt shutdown" + exit 0 +fi + +# CLUSTER FAILOVER returns OK as soon as the replica accepts the request; +# the actual role flip is asynchronous. Poll our own INFO until we see +# role=slave (or give up on TIMEOUT). +deadline=$(( $(date +%s) + TIMEOUT )) +while :; do + now=$(date +%s) + if [ "${now}" -ge "${deadline}" ]; then + log "timed out after ${TIMEOUT}s waiting for demotion; proceeding with shutdown" + exit 0 + fi + cur_role=$(vcli -h "${SELF_FQDN}" -p "${PORT}" info replication 2>/dev/null | awk -F: '/^role:/{print $2}' | tr -d '\r\n' || true) + if [ "${cur_role}" = "slave" ] || [ "${cur_role}" = "replica" ]; then + log "demoted to ${cur_role}; handover complete" + exit 0 + fi + sleep 1 +done diff --git a/valkey/templates/cluster-script.yaml b/valkey/templates/cluster-script.yaml index 6023fe75..bd9fcdd7 100644 --- a/valkey/templates/cluster-script.yaml +++ b/valkey/templates/cluster-script.yaml @@ -8,4 +8,6 @@ metadata: data: init-cluster.sh: |- {{ tpl (.Files.Get "scripts/cluster-init-script.sh") . | indent 4 }} + prestop.sh: |- +{{ tpl (.Files.Get "scripts/cluster-prestop-script.sh") . | indent 4 }} {{- end }} diff --git a/valkey/templates/cluster-statefulset.yaml b/valkey/templates/cluster-statefulset.yaml index 035eeeff..64d56b73 100644 --- a/valkey/templates/cluster-statefulset.yaml +++ b/valkey/templates/cluster-statefulset.yaml @@ -182,11 +182,25 @@ spec: periodSeconds: 5 timeoutSeconds: 3 failureThreshold: 3 + {{- if and (gt (int .Values.cluster.replicasPerShard) 0) .Values.cluster.preStopFailover.enabled }} + lifecycle: + # Graceful CLUSTER FAILOVER on primary-pod shutdown. Gated on + # replicasPerShard>0 (no replica to hand over to otherwise — + # the hook would no-op and just eat grace-period budget). The + # script itself is best-effort and never blocks SIGTERM. + preStop: + exec: + command: [ "/bin/sh", "/cluster-script/prestop.sh" ] + {{- end }} resources: {{- toYaml .Values.resources | nindent 12 }} volumeMounts: - name: valkey-data mountPath: /data + {{- if and (gt (int .Values.cluster.replicasPerShard) 0) .Values.cluster.preStopFailover.enabled }} + - name: cluster-script + mountPath: /cluster-script + {{- end }} {{- if .Values.tls.enabled }} - name: {{ include "valkey.fullname" . }}-tls mountPath: /tls @@ -281,11 +295,18 @@ spec: {{- with .Values.extraContainers }} {{- toYaml . | nindent 8 }} {{- end }} + terminationGracePeriodSeconds: {{ .Values.cluster.terminationGracePeriodSeconds }} volumes: - name: scripts configMap: name: {{ include "valkey.fullname" . }}-init-scripts defaultMode: 0555 + {{- if and (gt (int .Values.cluster.replicasPerShard) 0) .Values.cluster.preStopFailover.enabled }} + - name: cluster-script + configMap: + name: {{ include "valkey.fullname" . }}-cluster-script + defaultMode: 0555 + {{- end }} {{- if .Values.auth.enabled }} - name: valkey-acl emptyDir: diff --git a/valkey/tests/cluster_test.yaml b/valkey/tests/cluster_test.yaml index 66ea037f..3220320b 100644 --- a/valkey/tests/cluster_test.yaml +++ b/valkey/tests/cluster_test.yaml @@ -138,10 +138,14 @@ tests: path: spec.template.spec.containers[0].args value: [ "/data/conf/valkey.conf" ] - - it: should not mount cluster-script volume in StatefulSet container + # Cluster-script is consumed on the STS side exclusively by the preStop + # CLUSTER FAILOVER hook: no replicas in the shard ⇒ no failover target + # ⇒ no need for the script on the main container. + - it: should not mount cluster-script volume in StatefulSet container when no replicas set: cluster.enabled: true cluster.persistence.size: "5Gi" + cluster.replicasPerShard: 0 template: templates/cluster-statefulset.yaml asserts: - notContains: @@ -150,10 +154,11 @@ tests: name: cluster-script mountPath: /cluster-script - - it: should not define cluster-script volume in StatefulSet + - it: should not define cluster-script volume in StatefulSet when no replicas set: cluster.enabled: true cluster.persistence.size: "5Gi" + cluster.replicasPerShard: 0 template: templates/cluster-statefulset.yaml asserts: - notContains: @@ -162,6 +167,161 @@ tests: name: cluster-script any: true + - it: should not mount cluster-script volume when preStopFailover disabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.replicasPerShard: 1 + cluster.preStopFailover.enabled: false + template: templates/cluster-statefulset.yaml + asserts: + - notContains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: cluster-script + mountPath: /cluster-script + - notContains: + path: spec.template.spec.volumes + content: + name: cluster-script + any: true + + - it: should mount cluster-script volume in StatefulSet container when replicas>=1 (default) + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.replicasPerShard: 1 + template: templates/cluster-statefulset.yaml + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: cluster-script + mountPath: /cluster-script + - contains: + path: spec.template.spec.volumes + content: + name: cluster-script + configMap: + name: RELEASE-NAME-valkey-cluster-script + defaultMode: 365 # 0555 + + # --- preStop CLUSTER FAILOVER hook --- + - it: should render preStop CLUSTER FAILOVER hook when replicas>=1 (default) + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.replicasPerShard: 1 + template: templates/cluster-statefulset.yaml + asserts: + - equal: + path: spec.template.spec.containers[0].lifecycle.preStop.exec.command + value: [ "/bin/sh", "/cluster-script/prestop.sh" ] + + - it: should NOT render preStop hook when replicasPerShard=0 + # Nothing to hand over to — the hook would be a no-op that just eats + # grace-period budget. + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.replicasPerShard: 0 + template: templates/cluster-statefulset.yaml + asserts: + - notExists: + path: spec.template.spec.containers[0].lifecycle + + - it: should NOT render preStop hook when preStopFailover explicitly disabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.replicasPerShard: 1 + cluster.preStopFailover.enabled: false + template: templates/cluster-statefulset.yaml + asserts: + - notExists: + path: spec.template.spec.containers[0].lifecycle + + - it: should set terminationGracePeriodSeconds from cluster.terminationGracePeriodSeconds + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.terminationGracePeriodSeconds: 120 + template: templates/cluster-statefulset.yaml + asserts: + - equal: + path: spec.template.spec.terminationGracePeriodSeconds + value: 120 + + - it: should default terminationGracePeriodSeconds to 60 (enough for default preStop timeout of 40s + SIGTERM flush) + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - equal: + path: spec.template.spec.terminationGracePeriodSeconds + value: 60 + + - it: cluster-script ConfigMap should contain prestop.sh with CLUSTER FAILOVER + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-script.yaml + asserts: + - isNotNull: + path: data["prestop.sh"] + - matchRegex: + path: data["prestop.sh"] + pattern: "cluster failover" + + - it: prestop.sh should inline TLS args when tls.enabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + tls.enabled: true + tls.existingSecret: valkey-tls + template: templates/cluster-script.yaml + asserts: + - matchRegex: + path: data["prestop.sh"] + pattern: "--tls --cacert" + + - it: prestop.sh should NOT inline TLS args when tls disabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-script.yaml + asserts: + - notMatchRegex: + path: data["prestop.sh"] + pattern: "--tls --cacert" + + - it: prestop.sh should source REDISCLI_AUTH when auth.enabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + auth.aclUsers: + default: + permissions: "~* &* +@all" + password: "secretpass" + template: templates/cluster-script.yaml + asserts: + - matchRegex: + path: data["prestop.sh"] + pattern: "REDISCLI_AUTH" + + - it: prestop.sh timeout should follow cluster.preStopFailover.timeoutSeconds + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.preStopFailover.timeoutSeconds: 25 + template: templates/cluster-script.yaml + asserts: + - matchRegex: + path: data["prestop.sh"] + pattern: "TIMEOUT=25" + # Init container tests - it: should have init container with cluster environment variables set: diff --git a/valkey/values.schema.json b/valkey/values.schema.json index b5a24d76..b3fc8fb7 100644 --- a/valkey/values.schema.json +++ b/valkey/values.schema.json @@ -74,6 +74,22 @@ } } }, + "preStopFailover": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "timeoutSeconds": { + "type": "integer", + "minimum": 1 + } + } + }, + "terminationGracePeriodSeconds": { + "type": "integer", + "minimum": 1 + }, "persistentVolumeClaimRetentionPolicy": { "type": "object" } diff --git a/valkey/values.yaml b/valkey/values.yaml index 7fdcff1e..674c43bd 100644 --- a/valkey/values.yaml +++ b/valkey/values.yaml @@ -305,6 +305,42 @@ cluster: # This port is used for node-to-node communication in the cluster busPort: 16379 + # Orderly handover on primary pod shutdown. A `kubectl rollout restart` + # (or any voluntary eviction) sends SIGTERM to Valkey directly; without a + # preStop hook, the primary dies with open client connections and the + # cluster takes up to nodeTimeout to promote a replica. During that + # window, connection pools fill with dead sockets and the app errors out + # on every pooled command. + # + # With this enabled, each pod's preStop checks whether it is a primary + # and (if so) asks one of its own replicas to run `CLUSTER FAILOVER` — + # Valkey's canonical graceful-handover command. The primary demotes to + # replica in the same pass, so by the time SIGTERM arrives the shard + # already has a new primary and existing connections close cleanly. + # + # No-op when: + # * the pod is already a replica (nothing to fail over); + # * the shard has no replicas (replicasPerShard=0 — nothing to fail + # over TO); + # * cluster_state is not ok (don't shuffle roles on a sick cluster). + # The hook is strictly best-effort — any error path falls through to the + # normal SIGTERM, which is the pre-existing (suboptimal) behaviour. + preStopFailover: + # Enable the preStop CLUSTER FAILOVER hook. + enabled: true + # How long to wait for the ex-primary to observe its own demotion + # before giving up and letting kubelet send SIGTERM. Keep comfortably + # below terminationGracePeriodSeconds — kubelet counts the preStop + # time against that total grace period. + timeoutSeconds: 40 + + # Grace period for pod shutdown. Needs to accommodate the preStop + # CLUSTER FAILOVER handshake (default timeoutSeconds: 40) with headroom + # for SIGTERM + flush. The K8s default (30s) is too short for an orderly + # cluster rollout — a clipped preStop forces the abrupt close this + # feature is trying to avoid. + terminationGracePeriodSeconds: 60 + # Isolate this Valkey cluster's gossip bus from any other release in the # Kubernetes cluster. Valkey's CLUSTER MEET has no authentication, so # without this, any pod that can open a TCP connection to a node's bus From d010b2c0901cdb09ec72b250321b743bb594b735 Mon Sep 17 00:00:00 2001 From: Ankit Pati Date: Mon, 4 May 2026 19:52:25 +0530 Subject: [PATCH 15/23] docs: clarify comment Signed-off-by: Ankit Pati --- functional-tests/setup.sh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/functional-tests/setup.sh b/functional-tests/setup.sh index a407084f..3fe57025 100755 --- a/functional-tests/setup.sh +++ b/functional-tests/setup.sh @@ -40,11 +40,12 @@ fi # carries per-pod `sidecar.istio.io/inject` and `istio.io/dataplane-mode` # labels derived from `istio.enabled` + `istio.mode`, so every workload # opts in or out explicitly at the pod layer. Labelling the namespace -# `istio-injection=enabled` on top would (a) override istio=off scenarios -# into sidecar'd pods unless each test sinks labels manually, and (b) -# blur which layer is actually responsible for mesh capture when -# troubleshooting. Keep the decision at the pod level, the same as how -# the chart ships to real operators. +# `istio-injection=enabled` on top would (a) pull every istio=off pod +# into the sidecar data plane — since namespace injection is inherited +# unless each pod stamps `sidecar.istio.io/inject=false` to veto it — +# and (b) blur which layer is actually responsible for mesh capture +# when troubleshooting. Keep the decision at the pod level, the same as +# how the chart ships to real operators. log "Namespace ${NAMESPACE} left unlabelled — chart controls mesh opt-in at the pod level" kubectl --context="${KUBE_CONTEXT}" label namespace "${NAMESPACE}" \ istio-injection- istio.io/dataplane-mode- 2>/dev/null || true From 368b11d02f61973613f69135ccdac72de28747bf Mon Sep 17 00:00:00 2001 From: Ankit Pati Date: Fri, 22 May 2026 12:17:06 +0530 Subject: [PATCH 16/23] bump `appVersion` to `9.1.0` Reference: https://valkey.io/blog/valkey-9-1-delivers-improvements-in-security-performance-and-more Signed-off-by: Ankit Pati --- valkey/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/valkey/Chart.yaml b/valkey/Chart.yaml index 6d6c7114..ea8eaba4 100644 --- a/valkey/Chart.yaml +++ b/valkey/Chart.yaml @@ -3,7 +3,7 @@ name: valkey description: A Helm chart for Kubernetes type: application version: 0.9.4 -appVersion: "9.0.2" +appVersion: "9.1.0" home: https://valkey.io/valkey-helm/ sources: - https://github.com/valkey-io/valkey-helm.git From b74c78f6f959897334663872aace6b3978be3bdc Mon Sep 17 00:00:00 2001 From: Ankit Pati Date: Fri, 22 May 2026 14:47:36 +0530 Subject: [PATCH 17/23] fix: expose annotations & labels on `cluster-init` job Signed-off-by: Ankit Pati --- valkey/templates/_helpers.tpl | 37 +++++++ valkey/templates/cluster-init-job.yaml | 23 ++-- valkey/tests/cluster_test.yaml | 148 +++++++++++++++++++++++++ valkey/values.schema.json | 11 ++ valkey/values.yaml | 24 +++- 5 files changed, 232 insertions(+), 11 deletions(-) diff --git a/valkey/templates/_helpers.tpl b/valkey/templates/_helpers.tpl index 1f2956d3..5b9857a0 100644 --- a/valkey/templates/_helpers.tpl +++ b/valkey/templates/_helpers.tpl @@ -283,6 +283,43 @@ e.g. `sidecar.istio.io/inject=false` via podLabels alongside {{- toYaml (mergeOverwrite $selector $common $mesh $user) -}} {{- end -}} +{{/* +Job-pod labels: same merge as valkey.podLabels with one extra layer for +`cluster.initJob.podLabels` applied last (so it wins). Lets operators +veto a globally-injected metrics/observability sidecar on the cluster- +init Job — which is a short-lived, exit-on-success batch task — without +having to disable the same injector for the long-running data pods. +mergeOverwrite handles the deep-merge and the no-duplicate-keys +guarantee just like the data-pod helper. +*/}} +{{- define "valkey.initJobPodLabels" -}} +{{- $selector := fromYaml (include "valkey.selectorLabels" .) -}} +{{- $common := .Values.commonLabels | default dict -}} +{{- $mesh := fromYaml (include "valkey.istioPodLabels" .) | default dict -}} +{{- $user := .Values.podLabels | default dict -}} +{{- $jobUser := (.Values.cluster.initJob).podLabels | default dict -}} +{{- toYaml (mergeOverwrite $selector $common $mesh $user $jobUser) -}} +{{- end -}} + +{{/* +Job-pod annotations: same shape as the global .Values.podAnnotations, +with `cluster.initJob.podAnnotations` merged on top so it wins on +collision. Same opt-out rationale as valkey.initJobPodLabels — some +sidecar injectors read annotations rather than labels. + +Emits nothing when the merged map is empty so the Job's metadata block +collapses cleanly (Helm/`with` semantics expect an absent key, not an +empty mapping, to skip). +*/}} +{{- define "valkey.initJobPodAnnotations" -}} +{{- $global := .Values.podAnnotations | default dict -}} +{{- $job := (.Values.cluster.initJob).podAnnotations | default dict -}} +{{- $merged := mergeOverwrite (deepCopy $global) $job -}} +{{- if $merged -}} +{{- toYaml $merged -}} +{{- end -}} +{{- end -}} + {{/* The valkey ServiceAccount name as an Istio SPIFFE principal. Used by the AuthorizationPolicy to pin the cluster-bus port to same-release diff --git a/valkey/templates/cluster-init-job.yaml b/valkey/templates/cluster-init-job.yaml index e1ce0ebd..04514017 100644 --- a/valkey/templates/cluster-init-job.yaml +++ b/valkey/templates/cluster-init-job.yaml @@ -18,18 +18,21 @@ spec: metadata: labels: {{- /* - Single merged label set (see valkey.podLabels helper). In ambient - mode the Job pod picks up the dataplane-mode label automatically so - ztunnel captures its outbound connections to the Valkey pods; - without it the Job speaks plaintext against STRICT mTLS and hangs - until backoffLimit. In sidecar mode the pod carries an explicit - sidecar.istio.io/inject=true so the Job works on namespaces that - don't carry the injection label. + Job-scoped label set (see valkey.initJobPodLabels helper). Same + layering as the data-pod helper, plus `cluster.initJob.podLabels` + on top so operators can opt the short-lived batch Job out of + global label-driven injectors (sidecar metrics agents, etc.) + without affecting the long-running data pods. In ambient mode + the dataplane-mode label is still emitted automatically so + ztunnel captures the Job's outbound connections; in sidecar mode + sidecar.istio.io/inject=true is emitted so the Job works on + namespaces that don't carry the injection label. */}} - {{- include "valkey.podLabels" . | nindent 8 }} - {{- with .Values.podAnnotations }} + {{- include "valkey.initJobPodLabels" . | nindent 8 }} + {{- $annotations := include "valkey.initJobPodAnnotations" . }} + {{- with $annotations }} annotations: - {{- toYaml . | nindent 8 }} + {{- . | nindent 8 }} {{- end }} spec: {{- (include "valkey.imagePullSecrets" .) | nindent 6 }} diff --git a/valkey/tests/cluster_test.yaml b/valkey/tests/cluster_test.yaml index 3220320b..d59804eb 100644 --- a/valkey/tests/cluster_test.yaml +++ b/valkey/tests/cluster_test.yaml @@ -637,6 +637,154 @@ tests: path: spec.template.metadata.annotations["custom-annotation"] value: my-annotation + # --- cluster.initJob.podLabels / .podAnnotations override surface --- + # Lets operators veto a globally-injected sidecar (metrics agent, mesh + # proxy via namespace label, policy webhook, etc.) on the short-lived + # cluster-init Job without affecting the long-running data pods. + + - it: Job should NOT have annotations key when none are set + # Important for Helm/`with` semantics: an empty mapping is not the + # same as an absent key; preserving the absent-key shape keeps the + # rendered manifest identical to its pre-feature state when no + # overrides are configured. + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-init-job.yaml + asserts: + - notExists: + path: spec.template.metadata.annotations + + - it: cluster.initJob.podLabels should land on the Job pod + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.initJob.podLabels: + my-injector/skip: "true" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.metadata.labels["my-injector/skip"] + value: "true" + + - it: cluster.initJob.podAnnotations should land on the Job pod + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.initJob.podAnnotations: + sidecar-injector.example.com/skip: "true" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.metadata.annotations["sidecar-injector.example.com/skip"] + value: "true" + + - it: cluster.initJob.podLabels should NOT leak onto the data StatefulSet + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.initJob.podLabels: + my-injector/skip: "true" + template: templates/cluster-statefulset.yaml + asserts: + - notExists: + path: spec.template.metadata.labels["my-injector/skip"] + + - it: cluster.initJob.podAnnotations should NOT leak onto the data StatefulSet + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.initJob.podAnnotations: + sidecar-injector.example.com/skip: "true" + template: templates/cluster-statefulset.yaml + asserts: + - notExists: + path: spec.template.metadata.annotations["sidecar-injector.example.com/skip"] + + - it: cluster.initJob.podLabels should win over global podLabels on key collision + # Same key, different values. The Job-scoped layer must override the + # global one so an opt-out can be expressed Job-only. + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + podLabels: + observe.example.com/inject: "true" + cluster.initJob.podLabels: + observe.example.com/inject: "false" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.metadata.labels["observe.example.com/inject"] + value: "false" + + - it: cluster.initJob.podAnnotations should win over global podAnnotations on key collision + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + podAnnotations: + sidecar.example.com/inject: "true" + cluster.initJob.podAnnotations: + sidecar.example.com/inject: "false" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.metadata.annotations["sidecar.example.com/inject"] + value: "false" + + - it: cluster.initJob.podLabels should be able to veto chart-emitted istio sidecar inject label + # Real-world repro: in sidecar mode the chart emits + # sidecar.istio.io/inject=true on every pod. Operators may want the + # Job to skip injection (e.g. their own scrape-only inspector + # mid-rollout, or to avoid the Job-hangs-on-sidecar problem). The + # override must sit at the END of the merge so it can replace a + # chart-computed mesh label, not just a user-supplied one. + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.initJob.podLabels: + sidecar.istio.io/inject: "false" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.metadata.labels["sidecar.istio.io/inject"] + value: "false" + + - it: cluster.initJob overrides do NOT veto the chart-emitted mesh label on the data StatefulSet + # Symmetric guard for the test above — proves the veto is Job-only, + # so flipping it in the Job can't accidentally take the data pods + # out of the mesh. + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.initJob.podLabels: + sidecar.istio.io/inject: "false" + template: templates/cluster-statefulset.yaml + asserts: + - equal: + path: spec.template.metadata.labels["sidecar.istio.io/inject"] + value: "true" + + - it: global podLabels still flow through to the Job when initJob.podLabels does not collide + # Don't shadow the existing pre-feature behaviour: a user who has set + # only the global podLabels gets them on the Job too. + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + podLabels: + team: platform + cluster.initJob.podLabels: + my-injector/skip: "true" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.metadata.labels.team + value: platform + - equal: + path: spec.template.metadata.labels["my-injector/skip"] + value: "true" + - it: Job should include node selector when set set: cluster.enabled: true diff --git a/valkey/values.schema.json b/valkey/values.schema.json index b3fc8fb7..7b6daf86 100644 --- a/valkey/values.schema.json +++ b/valkey/values.schema.json @@ -90,6 +90,17 @@ "type": "integer", "minimum": 1 }, + "initJob": { + "type": "object", + "properties": { + "podLabels": { + "type": "object" + }, + "podAnnotations": { + "type": "object" + } + } + }, "persistentVolumeClaimRetentionPolicy": { "type": "object" } diff --git a/valkey/values.yaml b/valkey/values.yaml index 674c43bd..2103626d 100644 --- a/valkey/values.yaml +++ b/valkey/values.yaml @@ -322,7 +322,8 @@ cluster: # * the pod is already a replica (nothing to fail over); # * the shard has no replicas (replicasPerShard=0 — nothing to fail # over TO); - # * cluster_state is not ok (don't shuffle roles on a sick cluster). + # * the pod has no healthy replica of its own (FAILOVER would target + # nothing). # The hook is strictly best-effort — any error path falls through to the # normal SIGTERM, which is the pre-existing (suboptimal) behaviour. preStopFailover: @@ -365,6 +366,27 @@ cluster: # More info: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#persistentvolumeclaim-retention persistentVolumeClaimRetentionPolicy: {} + # Override surface for the cluster-init Job pod only. Layered on top of + # the chart-wide podLabels / podAnnotations (these win on key collision) + # and merged with the chart-computed mesh labels, so the same opt-out + # mechanics that apply to the data pods are still in force unless you + # explicitly veto a key here. + # + # The intended use case is excluding the short-lived bootstrap Job from + # cluster-wide pod admission webhooks — observability sidecars, mesh + # proxies pulled in by namespace labels, policy agents, etc. — that are + # appropriate for long-running data pods but turn the Job into a + # never-completing batch task. Because the Job is a Helm post-install + # hook with backoffLimit=6, an injected sidecar that never exits means + # `helm install --wait` blocks until timeout and the chart fails to + # converge. + # + # Both maps default empty; with no overrides set the Job pod inherits + # the chart-wide values exactly as it did before this surface existed. + initJob: + podLabels: {} + podAnnotations: {} + tls: # Enable TLS enabled: false From 517d93b3e863ff5184db4bbdf007b3bc863e8689 Mon Sep 17 00:00:00 2001 From: Ankit Pati Date: Fri, 22 May 2026 20:18:27 +0530 Subject: [PATCH 18/23] fix: cleanup `cluster-init` pod after 300s Signed-off-by: Ankit Pati --- valkey/templates/cluster-init-job.yaml | 1 + valkey/tests/cluster_test.yaml | 32 ++++++++++++++++++++++++++ valkey/values.schema.json | 4 ++++ valkey/values.yaml | 6 +++++ 4 files changed, 43 insertions(+) diff --git a/valkey/templates/cluster-init-job.yaml b/valkey/templates/cluster-init-job.yaml index 04514017..c875ef54 100644 --- a/valkey/templates/cluster-init-job.yaml +++ b/valkey/templates/cluster-init-job.yaml @@ -14,6 +14,7 @@ metadata: "helm.sh/hook-delete-policy": before-hook-creation spec: backoffLimit: 6 + ttlSecondsAfterFinished: {{ .Values.cluster.initJob.ttlSecondsAfterFinished }} template: metadata: labels: diff --git a/valkey/tests/cluster_test.yaml b/valkey/tests/cluster_test.yaml index d59804eb..50a86112 100644 --- a/valkey/tests/cluster_test.yaml +++ b/valkey/tests/cluster_test.yaml @@ -766,6 +766,38 @@ tests: path: spec.template.metadata.labels["sidecar.istio.io/inject"] value: "true" + - it: Job should default ttlSecondsAfterFinished to 300 (auto-cleanup window) + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.ttlSecondsAfterFinished + value: 300 + + - it: cluster.initJob.ttlSecondsAfterFinished should be configurable + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.initJob.ttlSecondsAfterFinished: 60 + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.ttlSecondsAfterFinished + value: 60 + + - it: cluster.initJob.ttlSecondsAfterFinished=0 deletes immediately on completion + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.initJob.ttlSecondsAfterFinished: 0 + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.ttlSecondsAfterFinished + value: 0 + - it: global podLabels still flow through to the Job when initJob.podLabels does not collide # Don't shadow the existing pre-feature behaviour: a user who has set # only the global podLabels gets them on the Job too. diff --git a/valkey/values.schema.json b/valkey/values.schema.json index 7b6daf86..a9437821 100644 --- a/valkey/values.schema.json +++ b/valkey/values.schema.json @@ -98,6 +98,10 @@ }, "podAnnotations": { "type": "object" + }, + "ttlSecondsAfterFinished": { + "type": "integer", + "minimum": 0 } } }, diff --git a/valkey/values.yaml b/valkey/values.yaml index 2103626d..5af73aa9 100644 --- a/valkey/values.yaml +++ b/valkey/values.yaml @@ -386,6 +386,12 @@ cluster: initJob: podLabels: {} podAnnotations: {} + # Auto-cleanup window for the completed Job. Kubernetes deletes the + # Job (and its pod) this many seconds after it transitions to + # Complete or Failed via the TTL-after-finished controller. Set + # short enough to keep `kubectl get pods` clean across upgrades, + # long enough to grab `kubectl logs` for post-mortem if needed. + ttlSecondsAfterFinished: 300 tls: # Enable TLS From 635a1da13cc500509c5593c023137bbbd1a6e626 Mon Sep 17 00:00:00 2001 From: Ankit Pati Date: Fri, 22 May 2026 23:17:25 +0530 Subject: [PATCH 19/23] fix: allow overriding probe times & thresholds for large dataset loading Signed-off-by: Ankit Pati --- valkey/templates/_helpers.tpl | 30 ++++++++ valkey/templates/cluster-statefulset.yaml | 25 +++---- valkey/templates/deploy_valkey.yaml | 25 +++---- valkey/templates/statefulset.yaml | 25 +++---- valkey/tests/cluster_test.yaml | 84 +++++++++++++++++++++++ valkey/tests/deployment_test.yaml | 37 ++++++++++ valkey/tests/statefulset_test.yaml | 48 +++++++++++++ valkey/values.schema.json | 46 +++++++++++++ valkey/values.yaml | 62 +++++++++++++++++ 9 files changed, 337 insertions(+), 45 deletions(-) diff --git a/valkey/templates/_helpers.tpl b/valkey/templates/_helpers.tpl index 5b9857a0..b2c15634 100644 --- a/valkey/templates/_helpers.tpl +++ b/valkey/templates/_helpers.tpl @@ -320,6 +320,36 @@ empty mapping, to skip). {{- end -}} {{- end -}} +{{/* +Probe shell command. Returns the "sh -c" argument that pings valkey-server +locally and accepts replies that prove the server is up AND serving. + +Replies to PING are one of: + PONG — fully up, dataset loaded + NOAUTH … — up, requires auth (treat as proof of liveness — the + server is fully serving, we just lack credentials) + LOADING … — TCP listener is up but the dataset is still being read + from RDB/AOF; the server cannot serve traffic yet + +LOADING is deliberately NOT accepted, including by startupProbe. The +whole reason startupProbe exists in Kubernetes (added in 1.16) is to +gate liveness/readiness behind a slow-startup window — that gate has +to actually fail during startup or the gate does nothing. With LOADING +accepted by startupProbe, the probe passes the moment the TCP listener +opens; kubelet switches immediately to livenessProbe (which does not +accept LOADING) and the pod gets killed during load anyway, just +attributed to liveness. Operators with multi-GB RDBs bump +`startupProbe.failureThreshold` instead — that is the canonical +Kubernetes pattern for slow loaders. +*/}} +{{- define "valkey.probeShellCommand" -}} +{{- $pingCmd := "valkey-cli ping" -}} +{{- if .Values.tls.enabled -}} +{{- $pingCmd = printf "valkey-cli --tls --cacert /tls/%s ping" .Values.tls.caPublicKey -}} +{{- end -}} +{{- printf "%s 2>&1 | grep -qE 'PONG|NOAUTH'" $pingCmd -}} +{{- end -}} + {{/* The valkey ServiceAccount name as an Istio SPIFFE principal. Used by the AuthorizationPolicy to pin the cluster-bus port to same-release diff --git a/valkey/templates/cluster-statefulset.yaml b/valkey/templates/cluster-statefulset.yaml index 64d56b73..259a2900 100644 --- a/valkey/templates/cluster-statefulset.yaml +++ b/valkey/templates/cluster-statefulset.yaml @@ -157,31 +157,26 @@ spec: - name: tcp-bus containerPort: {{ .Values.cluster.busPort }} protocol: TCP - {{- $pingCmd := "valkey-cli ping" }} - {{- if .Values.tls.enabled }} - {{- $pingCmd = printf "valkey-cli --tls --cacert /tls/%s ping" .Values.tls.caPublicKey }} - {{- end }} - {{- /* When auth is enforced the server returns 'NOAUTH Authentication required.' — accept it as proof of liveness. */}} - {{- $probeCmd := printf "%s 2>&1 | grep -qE 'PONG|NOAUTH'" $pingCmd }} + {{- $probeCmd := include "valkey.probeShellCommand" . }} startupProbe: exec: command: [ "sh", "-c", {{ $probeCmd | quote }} ] initialDelaySeconds: 5 - periodSeconds: 5 - timeoutSeconds: 5 - failureThreshold: 30 + periodSeconds: {{ .Values.cluster.startupProbe.periodSeconds }} + timeoutSeconds: {{ .Values.cluster.startupProbe.timeoutSeconds }} + failureThreshold: {{ .Values.cluster.startupProbe.failureThreshold }} livenessProbe: exec: command: [ "sh", "-c", {{ $probeCmd | quote }} ] - periodSeconds: 10 - timeoutSeconds: 5 - failureThreshold: 6 + periodSeconds: {{ .Values.cluster.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.cluster.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.cluster.livenessProbe.failureThreshold }} readinessProbe: exec: command: [ "sh", "-c", {{ $probeCmd | quote }} ] - periodSeconds: 5 - timeoutSeconds: 3 - failureThreshold: 3 + periodSeconds: {{ .Values.cluster.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.cluster.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.cluster.readinessProbe.failureThreshold }} {{- if and (gt (int .Values.cluster.replicasPerShard) 0) .Values.cluster.preStopFailover.enabled }} lifecycle: # Graceful CLUSTER FAILOVER on primary-pod shutdown. Gated on diff --git a/valkey/templates/deploy_valkey.yaml b/valkey/templates/deploy_valkey.yaml index a1578378..4fd97cd6 100644 --- a/valkey/templates/deploy_valkey.yaml +++ b/valkey/templates/deploy_valkey.yaml @@ -114,30 +114,25 @@ spec: - name: tcp containerPort: {{ .Values.service.port }} protocol: TCP - {{- $pingCmd := "valkey-cli ping" }} - {{- if .Values.tls.enabled }} - {{- $pingCmd = printf "valkey-cli --tls --cacert /tls/%s ping" .Values.tls.caPublicKey }} - {{- end }} - {{- /* When auth is enforced the server returns 'NOAUTH Authentication required.' — accept it as proof of liveness. */}} - {{- $probeCmd := printf "%s 2>&1 | grep -qE 'PONG|NOAUTH'" $pingCmd }} + {{- $probeCmd := include "valkey.probeShellCommand" . }} startupProbe: exec: command: [ "sh", "-c", {{ $probeCmd | quote }} ] - periodSeconds: 5 - timeoutSeconds: 5 - failureThreshold: 30 + periodSeconds: {{ .Values.startupProbe.periodSeconds }} + timeoutSeconds: {{ .Values.startupProbe.timeoutSeconds }} + failureThreshold: {{ .Values.startupProbe.failureThreshold }} livenessProbe: exec: command: [ "sh", "-c", {{ $probeCmd | quote }} ] - periodSeconds: 10 - timeoutSeconds: 5 - failureThreshold: 6 + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} readinessProbe: exec: command: [ "sh", "-c", {{ $probeCmd | quote }} ] - periodSeconds: 5 - timeoutSeconds: 3 - failureThreshold: 3 + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} resources: {{- toYaml .Values.resources | nindent 12 }} volumeMounts: diff --git a/valkey/templates/statefulset.yaml b/valkey/templates/statefulset.yaml index 66163cf0..e72a37bd 100644 --- a/valkey/templates/statefulset.yaml +++ b/valkey/templates/statefulset.yaml @@ -127,30 +127,25 @@ spec: - name: tcp containerPort: {{ .Values.service.port }} protocol: TCP - {{- $pingCmd := "valkey-cli ping" }} - {{- if .Values.tls.enabled }} - {{- $pingCmd = printf "valkey-cli --tls --cacert /tls/%s ping" .Values.tls.caPublicKey }} - {{- end }} - {{- /* When auth is enforced the server returns 'NOAUTH Authentication required.' — accept it as proof of liveness. */}} - {{- $probeCmd := printf "%s 2>&1 | grep -qE 'PONG|NOAUTH'" $pingCmd }} + {{- $probeCmd := include "valkey.probeShellCommand" . }} startupProbe: exec: command: [ "sh", "-c", {{ $probeCmd | quote }} ] - periodSeconds: 5 - timeoutSeconds: 5 - failureThreshold: 30 + periodSeconds: {{ .Values.replica.startupProbe.periodSeconds }} + timeoutSeconds: {{ .Values.replica.startupProbe.timeoutSeconds }} + failureThreshold: {{ .Values.replica.startupProbe.failureThreshold }} livenessProbe: exec: command: [ "sh", "-c", {{ $probeCmd | quote }} ] - periodSeconds: 10 - timeoutSeconds: 5 - failureThreshold: 6 + periodSeconds: {{ .Values.replica.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.replica.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.replica.livenessProbe.failureThreshold }} readinessProbe: exec: command: [ "sh", "-c", {{ $probeCmd | quote }} ] - periodSeconds: 5 - timeoutSeconds: 3 - failureThreshold: 3 + periodSeconds: {{ .Values.replica.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.replica.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.replica.readinessProbe.failureThreshold }} resources: {{- toYaml .Values.resources | nindent 12 }} volumeMounts: diff --git a/valkey/tests/cluster_test.yaml b/valkey/tests/cluster_test.yaml index 50a86112..d2709386 100644 --- a/valkey/tests/cluster_test.yaml +++ b/valkey/tests/cluster_test.yaml @@ -1273,6 +1273,90 @@ tests: - exists: path: spec.template.spec.containers[0].readinessProbe + # --- LOADING must NOT be accepted by ANY probe (slow-load handling) --- + # Why: startupProbe's job in Kubernetes is to gate liveness/readiness + # behind a slow-startup window. If startupProbe accepted LOADING it + # would pass the moment the TCP listener opens, kubelet would switch + # to liveness, and the not-LOADING-tolerant liveness probe would kill + # the still-loading container — same crash loop, different attribution. + # The right knob for slow loaders is cluster.startupProbe.failureThreshold. + - it: cluster startupProbe must reject LOADING (so the gate has teeth) + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - notMatchRegex: + path: spec.template.spec.containers[0].startupProbe.exec.command[2] + pattern: "LOADING" + + - it: cluster livenessProbe must NOT accept LOADING + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - notMatchRegex: + path: spec.template.spec.containers[0].livenessProbe.exec.command[2] + pattern: "LOADING" + + - it: cluster readinessProbe must NOT accept LOADING + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - notMatchRegex: + path: spec.template.spec.containers[0].readinessProbe.exec.command[2] + pattern: "LOADING" + + # Tuning knobs flow through to the rendered probe. Operators with + # large datasets bump cluster.startupProbe.failureThreshold to extend + # the load window without affecting steady-state probes. + - it: cluster.startupProbe overrides should land on the startupProbe + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.startupProbe.periodSeconds: 10 + cluster.startupProbe.timeoutSeconds: 8 + cluster.startupProbe.failureThreshold: 240 + template: templates/cluster-statefulset.yaml + asserts: + - equal: + path: spec.template.spec.containers[0].startupProbe.periodSeconds + value: 10 + - equal: + path: spec.template.spec.containers[0].startupProbe.timeoutSeconds + value: 8 + - equal: + path: spec.template.spec.containers[0].startupProbe.failureThreshold + value: 240 + # And the overrides must be probe-scoped: liveness/readiness keep + # their defaults. + - equal: + path: spec.template.spec.containers[0].livenessProbe.periodSeconds + value: 10 + - equal: + path: spec.template.spec.containers[0].livenessProbe.failureThreshold + value: 6 + + - it: cluster.livenessProbe overrides should land on the livenessProbe only + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.livenessProbe.failureThreshold: 12 + template: templates/cluster-statefulset.yaml + asserts: + - equal: + path: spec.template.spec.containers[0].livenessProbe.failureThreshold + value: 12 + - equal: + path: spec.template.spec.containers[0].startupProbe.failureThreshold + value: 30 + - equal: + path: spec.template.spec.containers[0].readinessProbe.failureThreshold + value: 3 + # Regression: extraContainers and extraVolumes were unwired in cluster mode. - it: should wire extraContainers and extraVolumes through in cluster mode set: diff --git a/valkey/tests/deployment_test.yaml b/valkey/tests/deployment_test.yaml index bfc1067e..3ba4848e 100644 --- a/valkey/tests/deployment_test.yaml +++ b/valkey/tests/deployment_test.yaml @@ -512,6 +512,43 @@ tests: - exists: path: spec.template.spec.containers[0].readinessProbe + # --- LOADING must NOT be accepted by ANY probe (slow-load handling) --- + - it: standalone startupProbe must reject LOADING (so the gate has teeth) + template: templates/deploy_valkey.yaml + asserts: + - notMatchRegex: + path: spec.template.spec.containers[0].startupProbe.exec.command[2] + pattern: "LOADING" + + - it: standalone livenessProbe must NOT accept LOADING + template: templates/deploy_valkey.yaml + asserts: + - notMatchRegex: + path: spec.template.spec.containers[0].livenessProbe.exec.command[2] + pattern: "LOADING" + + - it: standalone readinessProbe must NOT accept LOADING + template: templates/deploy_valkey.yaml + asserts: + - notMatchRegex: + path: spec.template.spec.containers[0].readinessProbe.exec.command[2] + pattern: "LOADING" + + - it: top-level startupProbe overrides should land on the startupProbe only + set: + startupProbe.failureThreshold: 240 + template: templates/deploy_valkey.yaml + asserts: + - equal: + path: spec.template.spec.containers[0].startupProbe.failureThreshold + value: 240 + - equal: + path: spec.template.spec.containers[0].livenessProbe.failureThreshold + value: 6 + - equal: + path: spec.template.spec.containers[0].readinessProbe.failureThreshold + value: 3 + # --- Istio ambient mode --- - it: should add ambient dataplane-mode label when istio.mode=ambient set: diff --git a/valkey/tests/statefulset_test.yaml b/valkey/tests/statefulset_test.yaml index 98e69d7c..e7a8706a 100644 --- a/valkey/tests/statefulset_test.yaml +++ b/valkey/tests/statefulset_test.yaml @@ -403,6 +403,54 @@ tests: - exists: path: spec.template.spec.containers[0].readinessProbe + # --- LOADING must NOT be accepted by ANY probe (slow-load handling) --- + - it: replicated startupProbe must reject LOADING (so the gate has teeth) + set: + replica.enabled: true + replica.persistence.size: "5Gi" + template: templates/statefulset.yaml + asserts: + - notMatchRegex: + path: spec.template.spec.containers[0].startupProbe.exec.command[2] + pattern: "LOADING" + + - it: replicated livenessProbe must NOT accept LOADING + set: + replica.enabled: true + replica.persistence.size: "5Gi" + template: templates/statefulset.yaml + asserts: + - notMatchRegex: + path: spec.template.spec.containers[0].livenessProbe.exec.command[2] + pattern: "LOADING" + + - it: replicated readinessProbe must NOT accept LOADING + set: + replica.enabled: true + replica.persistence.size: "5Gi" + template: templates/statefulset.yaml + asserts: + - notMatchRegex: + path: spec.template.spec.containers[0].readinessProbe.exec.command[2] + pattern: "LOADING" + + - it: replica.startupProbe overrides should land on the startupProbe only + set: + replica.enabled: true + replica.persistence.size: "5Gi" + replica.startupProbe.failureThreshold: 240 + template: templates/statefulset.yaml + asserts: + - equal: + path: spec.template.spec.containers[0].startupProbe.failureThreshold + value: 240 + - equal: + path: spec.template.spec.containers[0].livenessProbe.failureThreshold + value: 6 + - equal: + path: spec.template.spec.containers[0].readinessProbe.failureThreshold + value: 3 + # --- Istio ambient mode --- - it: should add ambient dataplane-mode label when istio.mode=ambient set: diff --git a/valkey/values.schema.json b/valkey/values.schema.json index a9437821..a02c2884 100644 --- a/valkey/values.schema.json +++ b/valkey/values.schema.json @@ -1,5 +1,24 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", + "$defs": { + "probeTuning": { + "type": "object", + "properties": { + "periodSeconds": { + "type": "integer", + "minimum": 1 + }, + "timeoutSeconds": { + "type": "integer", + "minimum": 1 + }, + "failureThreshold": { + "type": "integer", + "minimum": 1 + } + } + } + }, "type": "object", "properties": { "affinity": { @@ -74,6 +93,15 @@ } } }, + "startupProbe": { + "$ref": "#/$defs/probeTuning" + }, + "livenessProbe": { + "$ref": "#/$defs/probeTuning" + }, + "readinessProbe": { + "$ref": "#/$defs/probeTuning" + }, "preStopFailover": { "type": "object", "properties": { @@ -518,6 +546,15 @@ "priorityClassName": { "type": "string" }, + "startupProbe": { + "$ref": "#/$defs/probeTuning" + }, + "livenessProbe": { + "$ref": "#/$defs/probeTuning" + }, + "readinessProbe": { + "$ref": "#/$defs/probeTuning" + }, "replica": { "type": "object", "properties": { @@ -553,6 +590,15 @@ "persistentVolumeClaimRetentionPolicy": { "type": "object" }, + "startupProbe": { + "$ref": "#/$defs/probeTuning" + }, + "livenessProbe": { + "$ref": "#/$defs/probeTuning" + }, + "readinessProbe": { + "$ref": "#/$defs/probeTuning" + }, "replicas": { "type": "integer" }, diff --git a/valkey/values.yaml b/valkey/values.yaml index 5af73aa9..506a180a 100644 --- a/valkey/values.yaml +++ b/valkey/values.yaml @@ -98,6 +98,34 @@ resources: {} # cpu: 100m # memory: 128Mi +# Probe tuning. The probe command itself is fixed (valkey-cli ping with +# TLS args matching tls.enabled, accepting only PONG and NOAUTH — +# LOADING is rejected); only the timing knobs are configurable. +# +# Why LOADING is rejected: a multi-GB RDB load on a fresh primary takes +# minutes, during which every PING returns 'LOADING …'. startupProbe is +# the canonical Kubernetes gate for this — it suppresses liveness and +# readiness until it passes. So on a slow loader, bump +# startupProbe.failureThreshold (or periodSeconds) so +# failureThreshold * periodSeconds comfortably exceeds your worst-case +# RDB load time. Default 30 * 5s = 150s suits small dev datasets only. +# A 44 GB primary needs roughly 240 * 5s = 20 minutes. +# +# liveness and readiness keep their tight defaults — they only run +# AFTER startupProbe has passed, by which point PONG is the right reply. +startupProbe: + periodSeconds: 5 + timeoutSeconds: 5 + failureThreshold: 30 +livenessProbe: + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 +readinessProbe: + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 + # Resource limits/requests for init containers initResources: {} # Example: @@ -261,6 +289,22 @@ replica: # More info: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#persistentvolumeclaim-retention persistentVolumeClaimRetentionPolicy: {} + # Probe tuning for replicated (non-cluster) StatefulSet pods. Same + # rationale as the top-level startupProbe block — bump failureThreshold + # for large RDBs. + startupProbe: + periodSeconds: 5 + timeoutSeconds: 5 + failureThreshold: 30 + livenessProbe: + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 + readinessProbe: + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 + # Cluster mode configuration for Valkey Cluster (sharded deployment) # Note: cluster.enabled and replica.enabled are mutually exclusive cluster: @@ -361,6 +405,24 @@ cluster: isolation: enabled: true + # Probe tuning for cluster-mode StatefulSet pods. Same rationale as the + # top-level startupProbe block — bump failureThreshold for large RDBs. + # In cluster mode, replicas catch up via PSYNC + RDB transfer on (re) + # connect, so the post-restart load window can be just as long as for + # standalone replicated mode. + startupProbe: + periodSeconds: 5 + timeoutSeconds: 5 + failureThreshold: 30 + livenessProbe: + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 + readinessProbe: + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 + # PersistentVolumeClaim retention policy for StatefulSet # Controls when PVCs are deleted (requires Kubernetes 1.23+) # More info: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#persistentvolumeclaim-retention From 5a4be220dcbe431e8786ac5a68ac45d93d039362 Mon Sep 17 00:00:00 2001 From: Ankit Pati Date: Sat, 23 May 2026 01:30:16 +0530 Subject: [PATCH 20/23] fix: update stale `nodes.conf` Signed-off-by: Ankit Pati --- valkey/templates/init_config.yaml | 89 ++++++++++++++++++++++++++++++ valkey/tests/init_config_test.yaml | 51 +++++++++++++++++ 2 files changed, 140 insertions(+) diff --git a/valkey/templates/init_config.yaml b/valkey/templates/init_config.yaml index 4bce19e8..4d11e327 100644 --- a/valkey/templates/init_config.yaml +++ b/valkey/templates/init_config.yaml @@ -282,6 +282,95 @@ data: } >>"$VALKEY_CONFIG" log "Enabled TLS for cluster communication" {{- end }} + + # ---------------------------------------------------------------------- + # Refresh stale IPs in /data/nodes.conf before valkey-server starts. + # + # Why: cluster bus gossip (port 16379) is dialled by raw IP, even when + # cluster-preferred-endpoint-type=hostname (the hostname is announced + # over the bus, not used to establish it). After a rolling restart pod + # IPs change; if the first pod we restart on a given node also took + # the longest to load its RDB, by the time it comes back ALL its peers + # have new IPs and its own nodes.conf has none of them. The pod is + # then a stranded minority partition and never recovers without + # operator intervention. + # + # Fix: on every cluster pod start, re-resolve each peer's announced + # FQDN (already on-disk in nodes.conf as the second comma-separated + # token of field 2) and rewrite the IP in place. Valkey reads + # nodes.conf at startup and uses those IPs as its initial gossip + # targets — fresh IPs in, fresh gossip out, no stranded pod. + # + # No-ops cleanly when: + # * nodes.conf doesn't exist (first boot — there's nothing to refresh, + # and CLUSTER MEET from cluster-init-script.sh will populate it); + # * a peer's FQDN doesn't resolve (peer is also mid-restart) — we + # leave that line as-is and let Valkey's normal retry/gossip + # reconcile it once the peer's pod IP shows up in DNS. + # ---------------------------------------------------------------------- + NODES_CONF=/data/nodes.conf + if [ -f "$NODES_CONF" ]; then + log "Refreshing IPs in $NODES_CONF against current DNS" + # Write the temp file in /data (the PVC) rather than $TMPDIR — the + # init container runs with readOnlyRootFilesystem=true, which + # leaves /tmp read-only. /data is the only RW mount we have, and + # it's the same filesystem as the destination so the final mv is + # atomic (rename(2) within one mount point). + TMP=$(mktemp /data/nodes.conf.XXXXXX) + changed=0 + kept=0 + missing=0 + # Read line-by-line. Format per line: + # ,[,k=v,...] flags ... [slots ...] + # The 'vars' line at EOF has no comma so we pass it through unchanged. + while IFS= read -r line || [ -n "$line" ]; do + case "$line" in + ''|vars\ *) + printf '%s\n' "$line" >>"$TMP" + continue + ;; + esac + # Field 2 is endpoint+metadata, field 1 is node id. + endpoint=$(printf '%s' "$line" | awk '{print $2}') + # Skip lines we can't parse (defensive — preserve verbatim). + case "$endpoint" in + *,*) ;; + *) printf '%s\n' "$line" >>"$TMP"; continue ;; + esac + # Pull out the host portion of "ip:port@busport" and the announced FQDN. + addr=${endpoint%%,*} + rest=${endpoint#*,} + fqdn=${rest%%,*} + old_ip=${addr%%:*} + port_and_bus=${addr#*:} + # Skip if the announced FQDN looks empty (older nodes.conf shapes). + if [ -z "$fqdn" ]; then + printf '%s\n' "$line" >>"$TMP"; continue + fi + new_ip=$(getent hosts "$fqdn" 2>/dev/null | awk '{print $1; exit}') + if [ -z "$new_ip" ]; then + missing=$(( missing + 1 )) + printf '%s\n' "$line" >>"$TMP" + continue + fi + if [ "$new_ip" = "$old_ip" ]; then + kept=$(( kept + 1 )) + printf '%s\n' "$line" >>"$TMP" + continue + fi + # Rewrite the endpoint token in field 2; everything else verbatim. + new_endpoint="${new_ip}:${port_and_bus},${rest}" + # Replace ONLY the first whitespace-separated token after the ID. + # Using awk to avoid sed quoting/regex hazards when fqdn contains dots. + printf '%s\n' "$line" | awk -v new="$new_endpoint" '{$2 = new; print}' >>"$TMP" + changed=$(( changed + 1 )) + done <"$NODES_CONF" + # Atomic swap so a kill mid-rewrite can't corrupt nodes.conf. + mv "$TMP" "$NODES_CONF" + log "nodes.conf refresh: ${changed} updated, ${kept} unchanged, ${missing} unresolved" + else + log "$NODES_CONF absent — first boot, nothing to refresh" + fi {{- end }} # Append extra configs if present diff --git a/valkey/tests/init_config_test.yaml b/valkey/tests/init_config_test.yaml index cc19b36f..8c6a8adc 100644 --- a/valkey/tests/init_config_test.yaml +++ b/valkey/tests/init_config_test.yaml @@ -186,3 +186,54 @@ tests: path: data["init.sh"] # The escape pass: s/\\/\\\\/g; s/"/\\"/g pattern: "REPL_PASSWORD_ESC=\\$\\(printf '%s' \"\\$REPL_PASSWORD\"" + + # --- nodes.conf IP refresh on cluster pod restart --- + # Cluster bus gossip dials by IP, even with cluster-preferred-endpoint-type= + # hostname. After a rolling restart pod IPs change, and a pod whose + # nodes.conf has only stale IPs becomes a stranded minority partition. + # The refresh block re-resolves each peer's announced FQDN and rewrites + # the IP before valkey-server starts. + - it: cluster mode should emit a nodes.conf IP refresh block + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - matchRegex: + path: data["init.sh"] + pattern: "NODES_CONF=/data/nodes\\.conf" + - matchRegex: + path: data["init.sh"] + pattern: "getent hosts \"\\$fqdn\"" + # Atomic swap so a kill mid-rewrite can't corrupt the file. + - matchRegex: + path: data["init.sh"] + pattern: 'mv "\$TMP" "\$NODES_CONF"' + # No-op when nodes.conf doesn't exist (first boot). + - matchRegex: + path: data["init.sh"] + pattern: "first boot, nothing to refresh" + # 'vars' line at EOF must be passed through verbatim — it's + # currentEpoch / lastVoteEpoch state and corrupting it would force + # a fresh cluster join. The case-pattern matches an empty line + # OR a line starting with 'vars '. + - matchRegex: + path: data["init.sh"] + pattern: "''\\|vars" + + - it: non-cluster mode should NOT emit the nodes.conf refresh block + # Standalone and replicated modes have no nodes.conf — emitting the + # block in their init.sh would just be dead code and could log + # confusing "first boot" messages on every restart. + asserts: + - notMatchRegex: + path: data["init.sh"] + pattern: "NODES_CONF=/data/nodes\\.conf" + + - it: replicated (non-cluster) mode should NOT emit the nodes.conf refresh block + set: + replica.enabled: true + replica.persistence.size: "5Gi" + asserts: + - notMatchRegex: + path: data["init.sh"] + pattern: "NODES_CONF=/data/nodes\\.conf" From 29a7fda7d0cf9c0314a393a7a93fa5bdb5ea8a53 Mon Sep 17 00:00:00 2001 From: Ankit Pati Date: Sat, 23 May 2026 01:39:35 +0530 Subject: [PATCH 21/23] test: stale `nodes.conf` correctly updated Signed-off-by: Ankit Pati --- functional-tests/run-extra-scenarios.sh | 150 ++++++++++++++++++++++++ 1 file changed, 150 insertions(+) diff --git a/functional-tests/run-extra-scenarios.sh b/functional-tests/run-extra-scenarios.sh index 57a1510a..76f8e467 100755 --- a/functional-tests/run-extra-scenarios.sh +++ b/functional-tests/run-extra-scenarios.sh @@ -805,6 +805,155 @@ scenario_rollout_restart_orderly_failover() { pass "${name}" } +# --------------------------------------------------------------------------- +# Scenario: cluster bus dials by IP, even with cluster-preferred-endpoint-type +# =hostname. After a rolling restart, a pod whose nodes.conf has only stale +# peer IPs becomes a stranded minority partition — every gossip attempt +# times out against dead IPs and it never gets the chance to learn fresh +# ones. The chart's init container re-resolves each peer's announced FQDN +# and rewrites stale IPs in /data/nodes.conf before valkey-server starts; +# this scenario proves that refresh works end-to-end. +# +# Reproduction: +# 1) Install cluster (replicasPerShard=1) and wait for cluster_state:ok. +# 2) Snapshot pod-0's nodes.conf to extract the real peer IPs. +# 3) Poison: replace every peer IP in pod-0's nodes.conf with TEST-NET-1 +# (192.0.2.0/24, RFC 5737 documentation range — guaranteed unroutable). +# 4) SIGKILL valkey-server (pid 1) so the shutdown handler can't rewrite +# nodes.conf back to good state; the pod restarts via the StatefulSet +# controller. +# 5) Wait for pod-0 to be Ready again. The init container's refresh +# block should re-resolve every peer FQDN and rewrite the IPs back +# to the real ones BEFORE valkey-server starts. +# 6) Assert: pod-0's nodes.conf no longer contains 192.0.2.99 and +# cluster_state from pod-0's perspective is back to ok. +# +# Without the refresh: pod-0 boots, dials 192.0.2.99 on the bus, every +# connection times out, cluster_state stays fail forever. So the +# assertion has teeth — a regression that drops the refresh would leave +# the poisoned IPs in place and cluster_state would never recover. +# --------------------------------------------------------------------------- +scenario_nodes_conf_ip_refresh() { + local name="cluster init refreshes stale nodes.conf IPs after pod restart" + log "SCENARIO: ${name}" + cleanup_release + + if ! hctl install "${RELEASE}" "${CHART_DIR}" \ + --set=cluster.enabled=true \ + --set=cluster.persistence.size=100Mi \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=1 \ + --wait --timeout=300s >/dev/null; then + fail "${name}" "helm install failed" + return + fi + kctl wait --for=condition=complete "job/${RELEASE}-cluster-init" --timeout=300s >/dev/null + + # Wait for gossip convergence — same rationale as the rollout + # scenario: the init Job returning doesn't mean every node has seen + # every PING/PONG yet, and we need cluster_state:ok before we can + # meaningfully assert it recovers. + local s + for _ in $(seq 1 60); do + s=$(kctl exec "${RELEASE}-0" -c "${RELEASE}" -- \ + valkey-cli cluster info 2>/dev/null \ + | awk -F: '/^cluster_state:/{print $2}' | tr -d '\r\n' || true) + [[ ${s} == ok ]] && break + sleep 2 + done + if [[ ${s} != ok ]]; then + fail "${name}" "cluster_state=${s:-} after install (need ok before poisoning)" + cleanup_release; return + fi + + # Snapshot the original nodes.conf for diagnostics and to confirm + # poisoning actually changes content. + local orig + orig=$(kctl exec "${RELEASE}-0" -c "${RELEASE}" -- cat /data/nodes.conf 2>/dev/null) + if [[ -z ${orig} ]]; then + fail "${name}" "failed to read /data/nodes.conf on ${RELEASE}-0" + cleanup_release; return + fi + + # Poison: replace every peer's IP token (the leading "ip:port@busport" + # of field 2) with 192.0.2.99:6379@16379. Then SIGKILL pid 1 so the + # graceful-shutdown handler doesn't rewrite nodes.conf during teardown. + # The poisoned file must reach disk; using cat-via-stdin keeps that + # write atomic from the pod's perspective. + log "Poisoning /data/nodes.conf on ${RELEASE}-0 and SIGKILLing valkey-server" + # shellcheck disable=SC2016 + if ! kctl exec "${RELEASE}-0" -c "${RELEASE}" -- sh -c ' + awk '"'"' + # Pass through blank lines and the "vars currentEpoch ..." footer. + /^$/ || /^vars / { print; next } + # Field 2 is ",,..." — replace ONLY + # the leading ip:port@busport, keep everything else. Skipping + # myself,* keeps Valkey from refusing to start with a + # mismatched cluster id, but the production bug ALSO had + # myself stale — the refresh block must handle it. Poison it. + { + # Split field 2 on commas: head is ip:port@busport, tail is rest. + n = split($2, a, ",") + head = a[1] + tail = "" + for (i = 2; i <= n; i++) tail = tail "," a[i] + # Replace the IP only; preserve port and bus port. + sub(/^[0-9.]+/, "192.0.2.99", head) + $2 = head tail + print + } + '"'"' /data/nodes.conf >/data/nodes.conf.poisoned \ + && mv /data/nodes.conf.poisoned /data/nodes.conf \ + && sync \ + && kill -9 1 + '; then + # Even when SIGKILL itself succeeds, the exec returns non-zero + # because the connection drops with the pid. Don't fail here — + # check whether the pod actually got recreated below. + : + fi + + # The pod must be replaced. The StatefulSet controller will recreate + # it; wait for the new pod to be Ready (i.e. probe passes against the + # newly-started valkey-server, which means the init container ran). + log "Waiting for ${RELEASE}-0 to come back up" + if ! kctl wait --for=condition=Ready "pod/${RELEASE}-0" --timeout=180s >/dev/null; then + fail "${name}" "${RELEASE}-0 never became Ready after SIGKILL" + cleanup_release; return + fi + + # The post-restart nodes.conf must NOT contain the poison IP — the + # init container's refresh step replaces it before valkey-server + # boots. (Valkey itself only writes peers' IPs to nodes.conf as it + # observes them via gossip; without our pre-boot refresh, the boot + # would proceed against 192.0.2.99 and the file would stay poisoned.) + local after + after=$(kctl exec "${RELEASE}-0" -c "${RELEASE}" -- cat /data/nodes.conf 2>/dev/null) + if grep -q '192\.0\.2\.99' <<<"${after}"; then + fail "${name}" "nodes.conf still contains poison IP 192.0.2.99 after restart — refresh did not run. Content: ${after}" + cleanup_release; return + fi + + # And the cluster must be functional from pod-0's view — the whole + # point of the refresh is that it boots into a cluster it can talk + # to. Poll because gossip needs a moment to re-converge after the + # restart. + for _ in $(seq 1 60); do + s=$(kctl exec "${RELEASE}-0" -c "${RELEASE}" -- \ + valkey-cli cluster info 2>/dev/null \ + | awk -F: '/^cluster_state:/{print $2}' | tr -d '\r\n' || true) + [[ ${s} == ok ]] && break + sleep 2 + done + if [[ ${s} != ok ]]; then + fail "${name}" "cluster_state=${s:-} after refresh (want ok). nodes.conf was: ${after}" + cleanup_release; return + fi + + cleanup_release + pass "${name}" +} + trap 'cleanup_release; cleanup_pair; cleanup_ambient_pair' EXIT scenario_aclconfig_metrics || true @@ -814,6 +963,7 @@ scenario_readiness_probe_exists || true scenario_two_clusters_isolated || true scenario_isolation_off_lets_merge_happen || true scenario_rollout_restart_orderly_failover || true +scenario_nodes_conf_ip_refresh || true scenario_ambient_authz_blocks_cross_release_meet || true scenario_ambient_ap_disabled_refused || true scenario_ambient_shared_default_sa_refused || true From 58bceefda333ee3a7326dfb3326d325464563eb2 Mon Sep 17 00:00:00 2001 From: Ankit Pati Date: Sat, 23 May 2026 03:50:14 +0530 Subject: [PATCH 22/23] fix: `livenessProbe` & `readinessProbe` under cluster mode Signed-off-by: Ankit Pati --- functional-tests/run-extra-scenarios.sh | 62 +++++++++++++++++++++++ valkey/templates/_helpers.tpl | 57 ++++++++++++++------- valkey/templates/cluster-statefulset.yaml | 9 ++-- valkey/templates/deploy_valkey.yaml | 9 ++-- valkey/templates/statefulset.yaml | 9 ++-- valkey/tests/cluster_test.yaml | 28 +++++----- valkey/tests/deployment_test.yaml | 12 ++--- valkey/tests/statefulset_test.yaml | 12 ++--- valkey/values.yaml | 33 +++++++----- 9 files changed, 165 insertions(+), 66 deletions(-) diff --git a/functional-tests/run-extra-scenarios.sh b/functional-tests/run-extra-scenarios.sh index 76f8e467..3735c0a2 100755 --- a/functional-tests/run-extra-scenarios.sh +++ b/functional-tests/run-extra-scenarios.sh @@ -954,6 +954,67 @@ scenario_nodes_conf_ip_refresh() { pass "${name}" } +# --------------------------------------------------------------------------- +# Scenario: probe LOADING-policy is wired correctly on the live workload. +# +# The chart applies a tri-state policy: +# * startupProbe — rejects LOADING (gate has teeth during initial RDB load) +# * livenessProbe — accepts LOADING (don't kill a replica mid-full-resync) +# * readinessProbe — rejects LOADING (don't route traffic to a loading pod) +# +# Production regression that motivates this test: a replica in a 38 GB cluster +# hit `cluster_state:fail` after a replication break triggered a full resync; +# the post-resync in-memory load took ~57 s, and livenessProbe +# (failureThreshold=6 * periodSeconds=10s = 60 s) killed the pod just before +# load completed. The kill discarded the freshly-streamed RDB; the next pod +# incarnation triggered yet another full resync. Crash-loop until intervention. +# +# helm-unittest already locks the rendered command strings in via +# matchRegex; this functional test goes one layer further by asserting +# that the live API objects in the cluster carry the right policy. A +# template change that bypasses the helper would slip past unit tests +# but get caught here. +# --------------------------------------------------------------------------- +scenario_probe_loading_policy() { + local name="probes carry tri-state LOADING policy on live workload" + log "SCENARIO: ${name}" + cleanup_release + + if ! hctl install "${RELEASE}" "${CHART_DIR}" \ + --set=cluster.enabled=true \ + --set=cluster.persistence.size=100Mi \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=0 \ + --wait --timeout=300s >/dev/null; then + fail "${name}" "helm install failed" + return + fi + + local startup liveness readiness + startup=$(kctl get statefulset "${RELEASE}" \ + -o jsonpath='{.spec.template.spec.containers[0].startupProbe.exec.command[2]}') + liveness=$(kctl get statefulset "${RELEASE}" \ + -o jsonpath='{.spec.template.spec.containers[0].livenessProbe.exec.command[2]}') + readiness=$(kctl get statefulset "${RELEASE}" \ + -o jsonpath='{.spec.template.spec.containers[0].readinessProbe.exec.command[2]}') + + if grep -q LOADING <<<"${startup}"; then + fail "${name}" "startupProbe must reject LOADING but accepts it: ${startup}" + cleanup_release; return + fi + if ! grep -q LOADING <<<"${liveness}"; then + fail "${name}" "livenessProbe must accept LOADING but rejects it: ${liveness}" + cleanup_release; return + fi + if grep -q LOADING <<<"${readiness}"; then + fail "${name}" "readinessProbe must reject LOADING but accepts it: ${readiness}" + cleanup_release; return + fi + + cleanup_release + pass "${name}" +} + trap 'cleanup_release; cleanup_pair; cleanup_ambient_pair' EXIT scenario_aclconfig_metrics || true @@ -964,6 +1025,7 @@ scenario_two_clusters_isolated || true scenario_isolation_off_lets_merge_happen || true scenario_rollout_restart_orderly_failover || true scenario_nodes_conf_ip_refresh || true +scenario_probe_loading_policy || true scenario_ambient_authz_blocks_cross_release_meet || true scenario_ambient_ap_disabled_refused || true scenario_ambient_shared_default_sa_refused || true diff --git a/valkey/templates/_helpers.tpl b/valkey/templates/_helpers.tpl index b2c15634..739ba844 100644 --- a/valkey/templates/_helpers.tpl +++ b/valkey/templates/_helpers.tpl @@ -322,32 +322,55 @@ empty mapping, to skip). {{/* Probe shell command. Returns the "sh -c" argument that pings valkey-server -locally and accepts replies that prove the server is up AND serving. +locally; the set of replies that count as healthy is parameterised. + +Args (passed as a dict): + ctx — the parent context (.) so we can read .Values.tls + acceptLoading — whether to treat 'LOADING' as healthy Replies to PING are one of: PONG — fully up, dataset loaded NOAUTH … — up, requires auth (treat as proof of liveness — the server is fully serving, we just lack credentials) - LOADING … — TCP listener is up but the dataset is still being read - from RDB/AOF; the server cannot serve traffic yet - -LOADING is deliberately NOT accepted, including by startupProbe. The -whole reason startupProbe exists in Kubernetes (added in 1.16) is to -gate liveness/readiness behind a slow-startup window — that gate has -to actually fail during startup or the gate does nothing. With LOADING -accepted by startupProbe, the probe passes the moment the TCP listener -opens; kubelet switches immediately to livenessProbe (which does not -accept LOADING) and the pod gets killed during load anyway, just -attributed to liveness. Operators with multi-GB RDBs bump -`startupProbe.failureThreshold` instead — that is the canonical -Kubernetes pattern for slow loaders. + LOADING … — TCP listener is up but the dataset is being read from + RDB/AOF; the server cannot serve traffic yet + +The three probes have different jobs and therefore different LOADING +policies: + + startupProbe (acceptLoading=false): the gate that holds liveness and + readiness off until the pod is actually serving. If startupProbe + accepted LOADING it would pass the moment the TCP listener opens, + kubelet would switch to liveness/readiness immediately, and the + gate would do nothing useful. Operators with multi-GB RDBs bump + `startupProbe.failureThreshold` to extend the load window — the + canonical Kubernetes pattern for slow loaders. + + livenessProbe (acceptLoading=true): runs only AFTER startupProbe + passes. After that point, LOADING almost always means a full-resync + from primary is in progress (replica fell behind, replication + backlog overflowed, etc.). Killing the pod here loses the in-flight + download work and forces yet another full resync, perpetuating the + very condition the kill was supposed to escape. A pod stuck loading + forever is rare and harmless compared to the kill-loop, so accept + LOADING and let the load complete. + + readinessProbe (acceptLoading=false): decides whether the pod is in + the Service endpoint set. A LOADING pod can't serve traffic, so it + must be removed from the rotation until it's truly ready. This + leaves the pod 'Running 0/1' during full-resync — exactly right. */}} {{- define "valkey.probeShellCommand" -}} +{{- $ctx := .ctx -}} {{- $pingCmd := "valkey-cli ping" -}} -{{- if .Values.tls.enabled -}} -{{- $pingCmd = printf "valkey-cli --tls --cacert /tls/%s ping" .Values.tls.caPublicKey -}} +{{- if $ctx.Values.tls.enabled -}} +{{- $pingCmd = printf "valkey-cli --tls --cacert /tls/%s ping" $ctx.Values.tls.caPublicKey -}} +{{- end -}} +{{- $accepted := "PONG|NOAUTH" -}} +{{- if .acceptLoading -}} +{{- $accepted = "PONG|NOAUTH|LOADING" -}} {{- end -}} -{{- printf "%s 2>&1 | grep -qE 'PONG|NOAUTH'" $pingCmd -}} +{{- printf "%s 2>&1 | grep -qE '%s'" $pingCmd $accepted -}} {{- end -}} {{/* diff --git a/valkey/templates/cluster-statefulset.yaml b/valkey/templates/cluster-statefulset.yaml index 259a2900..5dad0344 100644 --- a/valkey/templates/cluster-statefulset.yaml +++ b/valkey/templates/cluster-statefulset.yaml @@ -157,23 +157,24 @@ spec: - name: tcp-bus containerPort: {{ .Values.cluster.busPort }} protocol: TCP - {{- $probeCmd := include "valkey.probeShellCommand" . }} + {{- $strictCmd := include "valkey.probeShellCommand" (dict "ctx" . "acceptLoading" false) }} + {{- $loadCmd := include "valkey.probeShellCommand" (dict "ctx" . "acceptLoading" true) }} startupProbe: exec: - command: [ "sh", "-c", {{ $probeCmd | quote }} ] + command: [ "sh", "-c", {{ $strictCmd | quote }} ] initialDelaySeconds: 5 periodSeconds: {{ .Values.cluster.startupProbe.periodSeconds }} timeoutSeconds: {{ .Values.cluster.startupProbe.timeoutSeconds }} failureThreshold: {{ .Values.cluster.startupProbe.failureThreshold }} livenessProbe: exec: - command: [ "sh", "-c", {{ $probeCmd | quote }} ] + command: [ "sh", "-c", {{ $loadCmd | quote }} ] periodSeconds: {{ .Values.cluster.livenessProbe.periodSeconds }} timeoutSeconds: {{ .Values.cluster.livenessProbe.timeoutSeconds }} failureThreshold: {{ .Values.cluster.livenessProbe.failureThreshold }} readinessProbe: exec: - command: [ "sh", "-c", {{ $probeCmd | quote }} ] + command: [ "sh", "-c", {{ $strictCmd | quote }} ] periodSeconds: {{ .Values.cluster.readinessProbe.periodSeconds }} timeoutSeconds: {{ .Values.cluster.readinessProbe.timeoutSeconds }} failureThreshold: {{ .Values.cluster.readinessProbe.failureThreshold }} diff --git a/valkey/templates/deploy_valkey.yaml b/valkey/templates/deploy_valkey.yaml index 4fd97cd6..64501320 100644 --- a/valkey/templates/deploy_valkey.yaml +++ b/valkey/templates/deploy_valkey.yaml @@ -114,22 +114,23 @@ spec: - name: tcp containerPort: {{ .Values.service.port }} protocol: TCP - {{- $probeCmd := include "valkey.probeShellCommand" . }} + {{- $strictCmd := include "valkey.probeShellCommand" (dict "ctx" . "acceptLoading" false) }} + {{- $loadCmd := include "valkey.probeShellCommand" (dict "ctx" . "acceptLoading" true) }} startupProbe: exec: - command: [ "sh", "-c", {{ $probeCmd | quote }} ] + command: [ "sh", "-c", {{ $strictCmd | quote }} ] periodSeconds: {{ .Values.startupProbe.periodSeconds }} timeoutSeconds: {{ .Values.startupProbe.timeoutSeconds }} failureThreshold: {{ .Values.startupProbe.failureThreshold }} livenessProbe: exec: - command: [ "sh", "-c", {{ $probeCmd | quote }} ] + command: [ "sh", "-c", {{ $loadCmd | quote }} ] periodSeconds: {{ .Values.livenessProbe.periodSeconds }} timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} failureThreshold: {{ .Values.livenessProbe.failureThreshold }} readinessProbe: exec: - command: [ "sh", "-c", {{ $probeCmd | quote }} ] + command: [ "sh", "-c", {{ $strictCmd | quote }} ] periodSeconds: {{ .Values.readinessProbe.periodSeconds }} timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} failureThreshold: {{ .Values.readinessProbe.failureThreshold }} diff --git a/valkey/templates/statefulset.yaml b/valkey/templates/statefulset.yaml index e72a37bd..b7bd1ff6 100644 --- a/valkey/templates/statefulset.yaml +++ b/valkey/templates/statefulset.yaml @@ -127,22 +127,23 @@ spec: - name: tcp containerPort: {{ .Values.service.port }} protocol: TCP - {{- $probeCmd := include "valkey.probeShellCommand" . }} + {{- $strictCmd := include "valkey.probeShellCommand" (dict "ctx" . "acceptLoading" false) }} + {{- $loadCmd := include "valkey.probeShellCommand" (dict "ctx" . "acceptLoading" true) }} startupProbe: exec: - command: [ "sh", "-c", {{ $probeCmd | quote }} ] + command: [ "sh", "-c", {{ $strictCmd | quote }} ] periodSeconds: {{ .Values.replica.startupProbe.periodSeconds }} timeoutSeconds: {{ .Values.replica.startupProbe.timeoutSeconds }} failureThreshold: {{ .Values.replica.startupProbe.failureThreshold }} livenessProbe: exec: - command: [ "sh", "-c", {{ $probeCmd | quote }} ] + command: [ "sh", "-c", {{ $loadCmd | quote }} ] periodSeconds: {{ .Values.replica.livenessProbe.periodSeconds }} timeoutSeconds: {{ .Values.replica.livenessProbe.timeoutSeconds }} failureThreshold: {{ .Values.replica.livenessProbe.failureThreshold }} readinessProbe: exec: - command: [ "sh", "-c", {{ $probeCmd | quote }} ] + command: [ "sh", "-c", {{ $strictCmd | quote }} ] periodSeconds: {{ .Values.replica.readinessProbe.periodSeconds }} timeoutSeconds: {{ .Values.replica.readinessProbe.timeoutSeconds }} failureThreshold: {{ .Values.replica.readinessProbe.failureThreshold }} diff --git a/valkey/tests/cluster_test.yaml b/valkey/tests/cluster_test.yaml index d2709386..07a2d720 100644 --- a/valkey/tests/cluster_test.yaml +++ b/valkey/tests/cluster_test.yaml @@ -1273,14 +1273,18 @@ tests: - exists: path: spec.template.spec.containers[0].readinessProbe - # --- LOADING must NOT be accepted by ANY probe (slow-load handling) --- - # Why: startupProbe's job in Kubernetes is to gate liveness/readiness - # behind a slow-startup window. If startupProbe accepted LOADING it - # would pass the moment the TCP listener opens, kubelet would switch - # to liveness, and the not-LOADING-tolerant liveness probe would kill - # the still-loading container — same crash loop, different attribution. - # The right knob for slow loaders is cluster.startupProbe.failureThreshold. - - it: cluster startupProbe must reject LOADING (so the gate has teeth) + # --- Tri-state LOADING policy --- + # startupProbe: rejects LOADING — must keep the gate honest so a slow + # RDB load doesn't pass-through to liveness, which would then kill + # the still-loading container. Operators bump + # cluster.startupProbe.failureThreshold for slow loaders. + # livenessProbe: accepts LOADING — after startup passes, LOADING means + # a full-resync from primary is in progress; killing the pod loses + # the in-flight download and triggers another full resync, + # perpetuating the kill-loop. + # readinessProbe: rejects LOADING — a LOADING pod can't serve traffic + # and must drop out of the Service endpoint set until ready. + - it: cluster startupProbe must reject LOADING set: cluster.enabled: true cluster.persistence.size: "5Gi" @@ -1290,17 +1294,17 @@ tests: path: spec.template.spec.containers[0].startupProbe.exec.command[2] pattern: "LOADING" - - it: cluster livenessProbe must NOT accept LOADING + - it: cluster livenessProbe must accept LOADING (full-resync tolerance) set: cluster.enabled: true cluster.persistence.size: "5Gi" template: templates/cluster-statefulset.yaml asserts: - - notMatchRegex: + - matchRegex: path: spec.template.spec.containers[0].livenessProbe.exec.command[2] - pattern: "LOADING" + pattern: "PONG\\|NOAUTH\\|LOADING" - - it: cluster readinessProbe must NOT accept LOADING + - it: cluster readinessProbe must reject LOADING set: cluster.enabled: true cluster.persistence.size: "5Gi" diff --git a/valkey/tests/deployment_test.yaml b/valkey/tests/deployment_test.yaml index 3ba4848e..6368699b 100644 --- a/valkey/tests/deployment_test.yaml +++ b/valkey/tests/deployment_test.yaml @@ -512,22 +512,22 @@ tests: - exists: path: spec.template.spec.containers[0].readinessProbe - # --- LOADING must NOT be accepted by ANY probe (slow-load handling) --- - - it: standalone startupProbe must reject LOADING (so the gate has teeth) + # --- Tri-state LOADING policy (see cluster_test.yaml for rationale) --- + - it: standalone startupProbe must reject LOADING template: templates/deploy_valkey.yaml asserts: - notMatchRegex: path: spec.template.spec.containers[0].startupProbe.exec.command[2] pattern: "LOADING" - - it: standalone livenessProbe must NOT accept LOADING + - it: standalone livenessProbe must accept LOADING (full-resync tolerance) template: templates/deploy_valkey.yaml asserts: - - notMatchRegex: + - matchRegex: path: spec.template.spec.containers[0].livenessProbe.exec.command[2] - pattern: "LOADING" + pattern: "PONG\\|NOAUTH\\|LOADING" - - it: standalone readinessProbe must NOT accept LOADING + - it: standalone readinessProbe must reject LOADING template: templates/deploy_valkey.yaml asserts: - notMatchRegex: diff --git a/valkey/tests/statefulset_test.yaml b/valkey/tests/statefulset_test.yaml index e7a8706a..05efddde 100644 --- a/valkey/tests/statefulset_test.yaml +++ b/valkey/tests/statefulset_test.yaml @@ -403,8 +403,8 @@ tests: - exists: path: spec.template.spec.containers[0].readinessProbe - # --- LOADING must NOT be accepted by ANY probe (slow-load handling) --- - - it: replicated startupProbe must reject LOADING (so the gate has teeth) + # --- Tri-state LOADING policy (see cluster_test.yaml for rationale) --- + - it: replicated startupProbe must reject LOADING set: replica.enabled: true replica.persistence.size: "5Gi" @@ -414,17 +414,17 @@ tests: path: spec.template.spec.containers[0].startupProbe.exec.command[2] pattern: "LOADING" - - it: replicated livenessProbe must NOT accept LOADING + - it: replicated livenessProbe must accept LOADING (full-resync tolerance) set: replica.enabled: true replica.persistence.size: "5Gi" template: templates/statefulset.yaml asserts: - - notMatchRegex: + - matchRegex: path: spec.template.spec.containers[0].livenessProbe.exec.command[2] - pattern: "LOADING" + pattern: "PONG\\|NOAUTH\\|LOADING" - - it: replicated readinessProbe must NOT accept LOADING + - it: replicated readinessProbe must reject LOADING set: replica.enabled: true replica.persistence.size: "5Gi" diff --git a/valkey/values.yaml b/valkey/values.yaml index 506a180a..7517705e 100644 --- a/valkey/values.yaml +++ b/valkey/values.yaml @@ -98,21 +98,28 @@ resources: {} # cpu: 100m # memory: 128Mi -# Probe tuning. The probe command itself is fixed (valkey-cli ping with -# TLS args matching tls.enabled, accepting only PONG and NOAUTH — -# LOADING is rejected); only the timing knobs are configurable. +# Probe tuning. The probe command is `valkey-cli ping` (with TLS args +# when tls.enabled). Only the timing knobs are configurable. # -# Why LOADING is rejected: a multi-GB RDB load on a fresh primary takes -# minutes, during which every PING returns 'LOADING …'. startupProbe is -# the canonical Kubernetes gate for this — it suppresses liveness and -# readiness until it passes. So on a slow loader, bump -# startupProbe.failureThreshold (or periodSeconds) so -# failureThreshold * periodSeconds comfortably exceeds your worst-case -# RDB load time. Default 30 * 5s = 150s suits small dev datasets only. -# A 44 GB primary needs roughly 240 * 5s = 20 minutes. +# The three probes have different LOADING policies: # -# liveness and readiness keep their tight defaults — they only run -# AFTER startupProbe has passed, by which point PONG is the right reply. +# startupProbe — does NOT accept LOADING, so it actually gates +# liveness/readiness during initial RDB load. On a slow loader, bump +# failureThreshold (or periodSeconds) so failureThreshold * +# periodSeconds comfortably exceeds your worst-case RDB load time. +# Default 30 * 5s = 150s suits small dev datasets only; a 44 GB +# primary needs roughly 240 * 5s = 20 minutes. +# +# livenessProbe — DOES accept LOADING. After startup passes, LOADING +# almost always means a full-resync from primary is in progress +# (replica fell behind, replication backlog overflowed, etc.). +# Killing the pod here loses the in-flight download and forces yet +# another full resync, perpetuating the kill-loop. A pod stuck +# loading forever is rare and harmless compared to the kill-loop. +# +# readinessProbe — does NOT accept LOADING. A LOADING pod can't serve +# traffic, so it must leave the Service endpoint set until it can. +# The pod sits 'Running 0/1' during full-resync — exactly right. startupProbe: periodSeconds: 5 timeoutSeconds: 5 From 598a082a2ba803e4ce4ef0244737f7c00fb91ae0 Mon Sep 17 00:00:00 2001 From: Ankit Pati Date: Sat, 23 May 2026 04:58:51 +0530 Subject: [PATCH 23/23] fix: `nodes.conf` test Signed-off-by: Ankit Pati --- functional-tests/run-extra-scenarios.sh | 84 ++++++++++++++++++------- 1 file changed, 62 insertions(+), 22 deletions(-) diff --git a/functional-tests/run-extra-scenarios.sh b/functional-tests/run-extra-scenarios.sh index 3735c0a2..87f0ef59 100755 --- a/functional-tests/run-extra-scenarios.sh +++ b/functional-tests/run-extra-scenarios.sh @@ -875,22 +875,29 @@ scenario_nodes_conf_ip_refresh() { cleanup_release; return fi - # Poison: replace every peer's IP token (the leading "ip:port@busport" - # of field 2) with 192.0.2.99:6379@16379. Then SIGKILL pid 1 so the - # graceful-shutdown handler doesn't rewrite nodes.conf during teardown. - # The poisoned file must reach disk; using cat-via-stdin keeps that - # write atomic from the pod's perspective. - log "Poisoning /data/nodes.conf on ${RELEASE}-0 and SIGKILLing valkey-server" + # Poison: replace every peer's IP token with 192.0.2.99 (RFC 5737 + # documentation prefix — guaranteed unroutable). Critically, SIGSTOP + # valkey-server BEFORE rewriting nodes.conf — otherwise the live + # server's gossip tick (every cluster-node-timeout/2 ≈ 7.5 s) or any + # incoming gossip event from a peer would rewrite nodes.conf back to + # the real IPs, defeating the test. SIGSTOP freezes the process so + # it can't write the file; the subsequent force-delete sends SIGKILL + # which clears the STOP and tears the container down. + # + # Atomic file swap (write+mv) so a kill mid-write can't corrupt + # anything; sync forces the page cache to disk so the new pod's + # init container reads the poison from the PVC. + log "SIGSTOPping valkey-server and poisoning /data/nodes.conf on ${RELEASE}-0" # shellcheck disable=SC2016 if ! kctl exec "${RELEASE}-0" -c "${RELEASE}" -- sh -c ' - awk '"'"' + kill -STOP 1 \ + && awk '"'"' # Pass through blank lines and the "vars currentEpoch ..." footer. /^$/ || /^vars / { print; next } # Field 2 is ",,..." — replace ONLY - # the leading ip:port@busport, keep everything else. Skipping - # myself,* keeps Valkey from refusing to start with a - # mismatched cluster id, but the production bug ALSO had - # myself stale — the refresh block must handle it. Poison it. + # the leading ip:port@busport, keep everything else. The + # production bug had myself stale too, so we deliberately + # poison the myself line: the refresh block must handle it. { # Split field 2 on commas: head is ip:port@busport, tail is rest. n = split($2, a, ",") @@ -904,21 +911,54 @@ scenario_nodes_conf_ip_refresh() { } '"'"' /data/nodes.conf >/data/nodes.conf.poisoned \ && mv /data/nodes.conf.poisoned /data/nodes.conf \ - && sync \ - && kill -9 1 + && sync '; then - # Even when SIGKILL itself succeeds, the exec returns non-zero - # because the connection drops with the pid. Don't fail here — - # check whether the pod actually got recreated below. - : + fail "${name}" "failed to poison /data/nodes.conf on ${RELEASE}-0" + cleanup_release; return + fi + + # Capture the current pod UID so we can detect the replacement. + local old_uid + old_uid=$(kctl get pod "${RELEASE}-0" -o jsonpath='{.metadata.uid}' 2>/dev/null) + if [[ -z ${old_uid} ]]; then + fail "${name}" "could not read UID of ${RELEASE}-0 before delete" + cleanup_release; return + fi + + # Force-delete the pod to trigger pod RECREATION (not in-place + # container restart). Init containers only run on new pods; SIGKILL + # of pid 1 alone leaves the same pod object in place and kubelet + # just restarts the container, skipping the init phase entirely. + # Force + grace=0 also bypasses the preStop hook and the graceful- + # shutdown handler, both of which would otherwise rewrite nodes.conf + # back to a clean state and defeat the test. + log "Force-deleting ${RELEASE}-0 to trigger pod recreation" + kctl delete pod "${RELEASE}-0" --force --grace-period=0 \ + --wait=false >/dev/null 2>&1 || true + + # Wait for the StatefulSet controller to create a NEW pod with a + # different UID (the old one may briefly persist in Terminating + # state). + log "Waiting for ${RELEASE}-0 to be recreated with a fresh UID" + local new_uid + for _ in $(seq 1 60); do + new_uid=$(kctl get pod "${RELEASE}-0" -o jsonpath='{.metadata.uid}' 2>/dev/null || true) + if [[ -n ${new_uid} && ${new_uid} != "${old_uid}" ]]; then + break + fi + sleep 2 + done + if [[ ${new_uid} == "${old_uid}" || -z ${new_uid} ]]; then + fail "${name}" "${RELEASE}-0 was not recreated (UID still ${old_uid:-empty})" + cleanup_release; return fi - # The pod must be replaced. The StatefulSet controller will recreate - # it; wait for the new pod to be Ready (i.e. probe passes against the - # newly-started valkey-server, which means the init container ran). - log "Waiting for ${RELEASE}-0 to come back up" + # Now wait for the new pod to be Ready (init container ran, probe + # passes — which only happens if cluster_state recovered, which only + # happens if the refresh worked). + log "Waiting for the new ${RELEASE}-0 (uid=${new_uid}) to be Ready" if ! kctl wait --for=condition=Ready "pod/${RELEASE}-0" --timeout=180s >/dev/null; then - fail "${name}" "${RELEASE}-0 never became Ready after SIGKILL" + fail "${name}" "${RELEASE}-0 never became Ready after recreation" cleanup_release; return fi