diff --git a/.gitignore b/.gitignore index 92b40475..8faae3a7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ -*.sh *.lock dist/ .vscode -temp/ \ No newline at end of file +temp/ +*.tgz diff --git a/Justfile b/Justfile index 7cd0c6b3..d741f615 100644 --- a/Justfile +++ b/Justfile @@ -28,3 +28,30 @@ package: validate: lint test @echo "=== All validations passed ===" +# Create the kind cluster and shared fixtures used by the functional suite +functional-setup: + ./functional-tests/setup.sh + +# Tear down fixtures (pass --cluster to also delete the kind cluster) +functional-teardown *ARGS: + ./functional-tests/teardown.sh {{ARGS}} + +# Run one scenario against the already-set-up kind cluster, e.g. +# just functional-scenario off off on on sidecar +# tls/auth/shard/rep are on|off; istio is off|sidecar|ambient. +functional-scenario tls auth shard rep istio: + ./functional-tests/run-scenario.sh {{tls}} {{auth}} {{shard}} {{rep}} {{istio}} + +# Run the full 48-scenario matrix (set FILTER='tls=on istio=ambient' to narrow) +functional-run: + ./functional-tests/run-all.sh + +# Run the extra (non-matrix) regression scenarios on their own +functional-extras: + ./functional-tests/run-extra-scenarios.sh + +# Full functional suite: setup + matrix + teardown including cluster +functional-test: + ./functional-tests/setup.sh + ./functional-tests/run-all.sh + ./functional-tests/teardown.sh --cluster diff --git a/functional-tests/kind-config.yaml b/functional-tests/kind-config.yaml new file mode 100644 index 00000000..3c58a0b5 --- /dev/null +++ b/functional-tests/kind-config.yaml @@ -0,0 +1,5 @@ +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +name: valkey-functional +nodes: + - role: control-plane diff --git a/functional-tests/lib.sh b/functional-tests/lib.sh new file mode 100755 index 00000000..f4adda7c --- /dev/null +++ b/functional-tests/lib.sh @@ -0,0 +1,68 @@ +# Shared helpers for Valkey functional tests. +# Sourced by every script under functional-tests/. + +set -euo pipefail + +HERE=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) +REPO_ROOT=$(cd -- "${HERE}/.." && pwd) +CHART_DIR=${REPO_ROOT}/valkey + +CLUSTER_NAME=${VALKEY_KIND_CLUSTER:-valkey-functional} +KUBE_CONTEXT=kind-${CLUSTER_NAME} +NAMESPACE=${VALKEY_FUNCTIONAL_NAMESPACE:-default} +RELEASE=${VALKEY_RELEASE:-valkey} + +AUTH_SECRET=valkey-auth +TLS_SECRET=valkey-tls +# Three testbenches, covering every shape of mesh participation: +# valkey-testbench — never gets an Envoy sidecar (istio=off +# scenarios, or when Istio isn't installed at +# all). Opts out of both sidecar injection +# and ambient capture. +# valkey-testbench-injected — sidecar-injected (istio=on, mode=sidecar). +# valkey-testbench-ambient — ambient-enrolled (istio=on, mode=ambient): +# no sidecar, ztunnel captures its traffic so +# it presents the expected SPIFFE identity to +# Valkey pods' AuthorizationPolicy. +TESTBENCH_POD=valkey-testbench +TESTBENCH_POD_INJECTED=valkey-testbench-injected +TESTBENCH_POD_AMBIENT=valkey-testbench-ambient +# Deliberately hostile: spaces, shell metacharacters ($, `, &, !), a backslash, +# and a double-quote. Every auth=on scenario then exercises both layers of +# quoting on the chart side: +# - the init container's ACL hash pipe (printf %s | sha256sum) +# - the masterauth line in valkey.conf (must be quoted+escaped) +# - the cluster-init Job's REDISCLI_AUTH path +# - the helm-test pod's `cat /valkey-auth/...-password | xargs valkey-cli -a` +# Keeping these in one place means every future auth=on scenario inherits the +# coverage for free. +AUTH_PASSWORD='p@ss w/ spaces & $chars `backticks` "quoted" \backslash' + +ISTIO_NAMESPACE=istio-system + +log() { printf '=== %s ===\n' "$*"; } + +kctl() { kubectl --context="${KUBE_CONTEXT}" --namespace="${NAMESPACE}" "$@"; } +hctl() { helm --kube-context="${KUBE_CONTEXT}" --namespace="${NAMESPACE}" "$@"; } + +# kubectl exec into a testbench. First arg is the pod name; rest is the command. +testbench_exec_in() { + local pod=$1; shift + kctl exec "${pod}" -c "${pod}" -- "$@" +} + +wait_for_testbench() { + local pod=$1 + kctl wait --for=condition=Ready "pod/${pod}" --timeout=180s +} + +istio_installed() { + kubectl --context="${KUBE_CONTEXT}" get namespace "${ISTIO_NAMESPACE}" >/dev/null 2>&1 +} + +# Whether the cluster has Istio's ambient data plane (ztunnel DaemonSet) +# installed. Scenarios that require ambient exit-skip if this returns false. +istio_ambient_installed() { + kubectl --context="${KUBE_CONTEXT}" -n "${ISTIO_NAMESPACE}" \ + get daemonset ztunnel >/dev/null 2>&1 +} diff --git a/functional-tests/run-all.sh b/functional-tests/run-all.sh new file mode 100755 index 00000000..3ddbfbc4 --- /dev/null +++ b/functional-tests/run-all.sh @@ -0,0 +1,105 @@ +#!/usr/bin/env bash +# Drive every scenario in the matrix, sequentially. Assumes `setup.sh` +# has already created the kind cluster and fixtures. + +HERE=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=lib.sh +. "${HERE}/lib.sh" + +# 48 scenarios: every combination of tls/auth/shard/rep × istio= +# off|sidecar|ambient. The istio dimension is three-valued rather than two +# because sidecar and ambient share almost nothing below the chart-owned +# templates — different label paths, different mTLS enforcement points +# (Envoy vs ztunnel), different rendered resources (DestinationRule only +# in sidecar; AuthorizationPolicy in both but enforced differently). Keep +# them both in the matrix so a regression in one mode can't hide behind a +# passing result in the other. +SCENARIOS=() +for istio in off sidecar ambient; do + for tls in off on; do + for auth in off on; do + for shard in off on; do + for rep in off on; do + SCENARIOS+=("${tls} ${auth} ${shard} ${rep} ${istio}") + done + done + done + done +done + +# Optional filter: `FILTER='tls=on istio=ambient'` runs only matching +# scenarios. Filter values for `istio` are off|sidecar|ambient; `on` is +# accepted as an alias for "sidecar or ambient" to keep old habits working. +matches() { + local tls=$1 auth=$2 shard=$3 rep=$4 istio=$5 + for sel in ${FILTER:-}; do + local k=${sel%=*} v=${sel#*=} + local have + case "${k}" in + tls) have=${tls} ;; + auth) have=${auth} ;; + shard) have=${shard} ;; + rep) have=${rep} ;; + istio) + if [[ ${v} == on ]]; then + [[ ${istio} == sidecar || ${istio} == ambient ]] || return 1 + continue + fi + have=${istio} + ;; + *) echo "bad filter key: ${k}" >&2; exit 2 ;; + esac + [[ ${have} == "${v}" ]] || return 1 + done + return 0 +} + +passed=0 +failed=0 +skipped=0 +failures=() + +for s in "${SCENARIOS[@]}"; do + # shellcheck disable=SC2086 + read -r tls auth shard rep istio <<<"${s}" + if ! matches "${tls}" "${auth}" "${shard}" "${rep}" "${istio}"; then + continue + fi + + # Ambient scenarios require ztunnel to be installed. setup.sh now + # installs the ambient profile by default, but a user running against + # a pre-existing cluster might have only the sidecar data plane — + # skip rather than fail in that case so the rest of the matrix still + # runs. + if [[ ${istio} == ambient ]] && ! istio_ambient_installed; then + log "SKIP: tls=${tls} auth=${auth} shard=${shard} rep=${rep} istio=${istio} (ztunnel not installed)" + skipped=$(( skipped + 1 )) + continue + fi + + log "SCENARIO: tls=${tls} auth=${auth} shard=${shard} rep=${rep} istio=${istio}" + if "${HERE}/run-scenario.sh" "${tls}" "${auth}" "${shard}" "${rep}" "${istio}"; then + passed=$(( passed + 1 )) + else + failed=$(( failed + 1 )) + failures+=("tls=${tls} auth=${auth} shard=${shard} rep=${rep} istio=${istio}") + fi +done + +echo +log "Matrix summary: ${passed} passed, ${failed} failed, ${skipped} skipped" +if (( failed > 0 )); then + printf ' failed: %s\n' "${failures[@]}" + exit 1 +fi + +# Extra, non-matrix regressions (aclConfig+metrics, default-deny netpol, +# cross-release MEET isolation, ambient validator footguns, Prometheus +# scraping, etc.). Each one is independent of the tls/auth/shard/rep +# combinations — folding them into the matrix would just pay the +# install/teardown cost N times to exercise the same single assertion. +# Skipped when FILTER is set: filters are matrix-scoped, so the extras +# wouldn't match anyway and running them would be surprising. +if [[ -z ${FILTER:-} ]]; then + "${HERE}/run-extra-scenarios.sh" +fi diff --git a/functional-tests/run-extra-scenarios.sh b/functional-tests/run-extra-scenarios.sh new file mode 100755 index 00000000..87f0ef59 --- /dev/null +++ b/functional-tests/run-extra-scenarios.sh @@ -0,0 +1,1084 @@ +#!/usr/bin/env bash +# Targeted regressions that don't fit the tls/auth/shard/rep/istio matrix. +# Each scenario is self-contained: install, assert, uninstall. + +HERE=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=lib.sh +. "${HERE}/lib.sh" + +RESULTS=() +pass() { RESULTS+=("PASS: $1"); } +fail() { RESULTS+=("FAIL: $1: $2"); return 1; } + +cleanup_release() { + hctl uninstall "${RELEASE}" 2>/dev/null || true + kctl delete pvc --selector="app.kubernetes.io/instance=${RELEASE}" --ignore-not-found >/dev/null +} + +# --------------------------------------------------------------------------- +# Scenario: auth.enabled=true with aclConfig only (no aclUsers) and metrics +# enabled. This used to CrashLoop the exporter with CreateContainerConfigError +# because the chart pointed REDIS_PASSWORD at a key `default-password` that +# only exists when there's an inline aclUsers.default.password. The fix is to +# only wire REDIS_PASSWORD when a real key exists. +# --------------------------------------------------------------------------- +scenario_aclconfig_metrics() { + local name="aclConfig-only + metrics exporter must not crash" + log "SCENARIO: ${name}" + cleanup_release + + # Use an alternate release name to avoid colliding with the shared + # fixture secret `valkey-auth` (managed by setup.sh, not Helm). The chart + # generates `${release}-auth`, so a different release ⇒ a different secret. + local release="${RELEASE}-aclcfg" + hctl uninstall "${release}" 2>/dev/null || true + kctl delete pvc --selector="app.kubernetes.io/instance=${release}" --ignore-not-found >/dev/null + + if ! hctl install "${release}" "${CHART_DIR}" \ + --set=metrics.enabled=true \ + --set=auth.enabled=true \ + --set-string="auth.aclConfig=user default on >simplepass ~* &* +@all" \ + --wait --timeout=180s >/dev/null; then + fail "${name}" "helm install failed" + hctl uninstall "${release}" 2>/dev/null || true + return + fi + + # Main container must be Running, metrics sidecar must be Ready. The bug + # made the metrics container stick in CreateContainerConfigError forever — + # no amount of probe-waiting would ever flip it to Ready. + local pod + pod=$(kctl get pod -l "app.kubernetes.io/instance=${release}" \ + -o jsonpath='{.items[0].metadata.name}') + if ! kctl wait "pod/${pod}" \ + --for=condition=Ready --timeout=120s >/dev/null; then + local status + status=$(kctl get "pod/${pod}" -o jsonpath='{.status.containerStatuses[*].state}') + fail "${name}" "pod never became Ready (state=${status})" + hctl uninstall "${release}" 2>/dev/null || true + return + fi + + # Metrics endpoint actually responds. Use `kubectl port-forward` into a + # local port — lets us hit the exporter from the host with curl, without + # relying on either container having an HTTP client. + local pf_port=19121 pf_pid + kctl port-forward "pod/${pod}" "${pf_port}:9121" >/dev/null 2>&1 & + pf_pid=$! + # Give port-forward a moment to establish. + for _ in $(seq 1 20); do + if curl -sf --max-time 1 "http://127.0.0.1:${pf_port}/metrics" \ + >/dev/null 2>&1; then + break + fi + sleep 0.5 + done + + local metrics_out + metrics_out=$(curl -sf --max-time 5 "http://127.0.0.1:${pf_port}/metrics" \ + 2>/dev/null || true) + kill "${pf_pid}" 2>/dev/null || true + wait "${pf_pid}" 2>/dev/null || true + + if ! grep -q 'redis_exporter_' <<<"${metrics_out}"; then + fail "${name}" "/metrics did not serve redis_exporter_* counters" + hctl uninstall "${release}" 2>/dev/null || true + return + fi + + hctl uninstall "${release}" 2>/dev/null || true + kctl delete pvc --selector="app.kubernetes.io/instance=${release}" --ignore-not-found >/dev/null + pass "${name}" +} + +# --------------------------------------------------------------------------- +# Scenario: default-deny NetworkPolicy. Previously `networkPolicy.ingress: []` +# rendered an invalid policy (policyTypes: []), which the API accepts but is a +# no-op. The fix gates on hasKey, so an empty list still opts in. +# --------------------------------------------------------------------------- +scenario_default_deny_netpol() { + local name="networkPolicy.ingress=[] produces a real default-deny policy" + log "SCENARIO: ${name}" + cleanup_release + + if ! hctl install "${RELEASE}" "${CHART_DIR}" \ + --set-json='networkPolicy={"ingress":[]}' \ + --wait --timeout=120s >/dev/null; then + fail "${name}" "helm install failed" + return + fi + + # The original bug: `networkPolicy.ingress: []` rendered `policyTypes: []`, + # which Kubernetes treats as "no policy in either direction" — silently + # allowing all traffic despite the user clearly opting into default-deny. + # The fix is to gate on hasKey, not truthiness. + # + # Checking via the API alone is fragile (kube-apiserver drops empty lists + # on serialization), so: + # 1) Assert policyTypes contains Ingress. + # 2) Actually attempt a TCP connection from the testbench — a real + # default-deny policy blocks it; a no-op policy lets it through. + local types + types=$(kctl get networkpolicy "${RELEASE}" \ + -o jsonpath='{.spec.policyTypes[*]}') + if [[ ${types} != *Ingress* ]]; then + fail "${name}" "policyTypes=${types} (want to include Ingress)" + return + fi + + # Live traffic check. Use a short timeout — a default-deny policy drops + # SYN packets, so the testbench will sit in CONNECT until the timeout. + set +e + testbench_exec_in "${TESTBENCH_POD}" sh -c \ + "timeout 5 valkey-cli -h valkey.${NAMESPACE}.svc.cluster.local ping" \ + >/dev/null 2>&1 + local rc=$? + set -e + if (( rc == 0 )); then + fail "${name}" "ping succeeded — default-deny ingress policy is a no-op" + return + fi + + cleanup_release + pass "${name}" +} + +# --------------------------------------------------------------------------- +# Scenario: frontend Service must never expose the cluster bus port. The bus +# port is pod-to-pod gossip; routing it through a round-robin ClusterIP +# misdirects clients to arbitrary nodes. +# --------------------------------------------------------------------------- +scenario_bus_port_hidden() { + local name="frontend service does not expose the cluster bus port" + log "SCENARIO: ${name}" + cleanup_release + + if ! hctl install "${RELEASE}" "${CHART_DIR}" \ + --set=cluster.enabled=true \ + --set=cluster.persistence.size=100Mi \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=0 \ + --set=cluster.busPort=16379 \ + --wait --timeout=300s >/dev/null; then + fail "${name}" "helm install failed" + return + fi + + local frontend_ports headless_ports + frontend_ports=$(kctl get service "${RELEASE}" \ + -o jsonpath='{.spec.ports[*].name}') + headless_ports=$(kctl get service "${RELEASE}-headless" \ + -o jsonpath='{.spec.ports[*].name}') + + if grep -qw tcp-bus <<<"${frontend_ports}"; then + fail "${name}" "frontend exposes tcp-bus (ports=${frontend_ports})" + return + fi + if ! grep -qw tcp-bus <<<"${headless_ports}"; then + fail "${name}" "headless missing tcp-bus (ports=${headless_ports})" + return + fi + + cleanup_release + pass "${name}" +} + +# --------------------------------------------------------------------------- +# Scenario: readiness probe must exist on the valkey container. Previously +# only startup+liveness were defined, so a pod that lost server health but +# kept the TCP socket would keep receiving traffic. +# --------------------------------------------------------------------------- +scenario_readiness_probe_exists() { + local name="valkey container declares a readiness probe" + log "SCENARIO: ${name}" + cleanup_release + + if ! hctl install "${RELEASE}" "${CHART_DIR}" \ + --wait --timeout=120s >/dev/null; then + fail "${name}" "helm install failed" + return + fi + + local probe + probe=$(kctl get deployment "${RELEASE}" \ + -o jsonpath='{.spec.template.spec.containers[0].readinessProbe.exec.command}') + if [[ -z ${probe} ]]; then + fail "${name}" "readinessProbe is missing" + return + fi + # And it must be the NOAUTH-tolerant flavour. + if ! grep -q 'NOAUTH' <<<"${probe}"; then + fail "${name}" "readinessProbe does not tolerate NOAUTH (${probe})" + return + fi + + cleanup_release + pass "${name}" +} + +# --------------------------------------------------------------------------- +# Scenario: two independent Valkey clusters in the same namespace must stay +# independent. Valkey's CLUSTER MEET has no auth, so a MEET issued by (or +# forwarded through) a node in cluster A can merge cluster B into it. The +# chart's cluster-isolation NetworkPolicy pins the bus port to same-release +# pods; without it, a stray MEET wins. +# +# This test: +# 1) installs `valkey-a` and `valkey-b` in the same namespace, cluster mode; +# 2) issues CLUSTER MEET from a node in A targeting a node in B; +# 3) waits for gossip to propagate; +# 4) asserts A still has its original 3 nodes (not 6). +# +# Also runs a negative twin with `cluster.isolation.enabled=false` to prove +# the assertion has teeth — if isolation is the thing keeping them apart, +# disabling it must let the merge happen. +# --------------------------------------------------------------------------- + +# Install one cluster-mode release with a given name and isolation flag. +# Globals it expects: NAMESPACE, CHART_DIR, KUBE_CONTEXT. +install_cluster() { + local release=$1 isolation=$2 + hctl install "${release}" "${CHART_DIR}" \ + --set=cluster.enabled=true \ + --set=cluster.persistence.size=100Mi \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=0 \ + --set="cluster.isolation.enabled=${isolation}" \ + --wait --timeout=300s >/dev/null +} + +# Count unique nodes reported by `cluster nodes` on pod-0 of the given release. +# Returns 0 if the query itself fails (counts as "indeterminate"). +count_cluster_nodes() { + local release=$1 + # Filter blanks + the "myself" marker to get the real node count. + kctl exec "${release}-0" -c "${release}" -- sh -c \ + "valkey-cli cluster nodes 2>/dev/null | awk 'NF {print \$1}' | sort -u | wc -l" \ + 2>/dev/null | tr -d '[:space:]' || echo 0 +} + +# Fire CLUSTER MEET from src_release pod-0 targeting dst_release pod-0. +poison_meet() { + local src_release=$1 dst_release=$2 + local dst_ip + dst_ip=$(kctl get pod "${dst_release}-0" -o jsonpath='{.status.podIP}') + [[ -n ${dst_ip} ]] || return 1 + kctl exec "${src_release}-0" -c "${src_release}" -- \ + valkey-cli cluster meet "${dst_ip}" 6379 >/dev/null 2>&1 || true +} + +cleanup_pair() { + hctl uninstall valkey-iso-a 2>/dev/null || true + hctl uninstall valkey-iso-b 2>/dev/null || true + kctl delete pvc --selector='app.kubernetes.io/instance=valkey-iso-a' --ignore-not-found >/dev/null + kctl delete pvc --selector='app.kubernetes.io/instance=valkey-iso-b' --ignore-not-found >/dev/null +} + +scenario_two_clusters_isolated() { + local name="two cluster-mode releases in one namespace stay isolated" + log "SCENARIO: ${name}" + cleanup_pair + + if ! install_cluster valkey-iso-a true; then + fail "${name}" "install of valkey-iso-a failed"; cleanup_pair; return + fi + if ! install_cluster valkey-iso-b true; then + fail "${name}" "install of valkey-iso-b failed"; cleanup_pair; return + fi + + # Baseline — each cluster should see exactly 3 nodes (3 shards, 0 replicas). + local a_before b_before + a_before=$(count_cluster_nodes valkey-iso-a) + b_before=$(count_cluster_nodes valkey-iso-b) + if [[ ${a_before} != 3 || ${b_before} != 3 ]]; then + fail "${name}" "baseline wrong (a=${a_before}, b=${b_before}; want 3+3)" + cleanup_pair; return + fi + + # Try to merge B into A. + poison_meet valkey-iso-a valkey-iso-b + + # After a MEET, Valkey adds the peer to `cluster nodes` immediately as a + # handshake placeholder — so a count of 4 for a few seconds is EXPECTED + # whether or not the merge ultimately succeeds. The real signal is what + # happens *after* the handshake timeout: if bus connectivity exists, the + # node stays (count stays at 4+); if isolation blocks the bus, the + # handshake fails and the placeholder is evicted (count returns to 3). + # + # Cluster node-timeout defaults to 15s; give the failure detector + # multiple intervals to fire, then sample. + sleep 45 + + # After settling, the merge must NOT have stuck. + local a_after b_after + a_after=$(count_cluster_nodes valkey-iso-a) + b_after=$(count_cluster_nodes valkey-iso-b) + + if [[ ${a_after} != 3 || ${b_after} != 3 ]]; then + fail "${name}" "clusters merged (a=${a_after}, b=${b_after}; want 3+3 after settle)" + cleanup_pair; return + fi + + cleanup_pair + pass "${name}" +} + +# Negative twin: without isolation, the SAME MEET must succeed — otherwise +# the positive test isn't proving what we think it's proving. +scenario_isolation_off_lets_merge_happen() { + local name="disabling isolation lets CLUSTER MEET actually merge (teeth check)" + log "SCENARIO: ${name}" + cleanup_pair + + if ! install_cluster valkey-iso-a false; then + fail "${name}" "install of valkey-iso-a failed"; cleanup_pair; return + fi + if ! install_cluster valkey-iso-b false; then + fail "${name}" "install of valkey-iso-b failed"; cleanup_pair; return + fi + + poison_meet valkey-iso-a valkey-iso-b + + # Mirror the positive test's 45-second settle window: we're asking the + # SAME question (has the handshake completed?) and need the same amount + # of time for the node-timeout to fire. + sleep 45 + + local a_after + a_after=$(count_cluster_nodes valkey-iso-a) + if [[ ${a_after} -le 3 ]]; then + fail "${name}" "MEET did not merge even without isolation (a=${a_after}); positive test cannot prove isolation works" + cleanup_pair; return + fi + + cleanup_pair + pass "${name}" +} + +# --------------------------------------------------------------------------- +# Ambient-only regressions. Each of these tests a behaviour that's +# independent of the tls/auth/shard/rep dimensions, so it lives here +# rather than inflating the matrix with 16 copies of the same assertion. +# Each self-skips if the cluster lacks the ambient data plane. +# --------------------------------------------------------------------------- + +install_ambient_cluster() { + local release=$1 + hctl install "${release}" "${CHART_DIR}" \ + --set=istio.enabled=true \ + --set=istio.mode=ambient \ + --set=cluster.enabled=true \ + --set=cluster.persistence.size=100Mi \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=0 \ + --set=cluster.isolation.enabled=false \ + --wait --timeout=300s >/dev/null +} + +count_cluster_nodes_ambient() { + local release=$1 + kctl exec "${release}-0" -c "${release}" -- sh -c \ + "valkey-cli cluster nodes 2>/dev/null | awk 'NF {print \$1}' | sort -u | wc -l" \ + 2>/dev/null | tr -d '[:space:]' || echo 0 +} + +poison_meet_ambient() { + local src_release=$1 dst_release=$2 dst_ip + dst_ip=$(kctl get pod "${dst_release}-0" -o jsonpath='{.status.podIP}') + [[ -n ${dst_ip} ]] || return 1 + kctl exec "${src_release}-0" -c "${src_release}" -- \ + valkey-cli cluster meet "${dst_ip}" 6379 >/dev/null 2>&1 || true +} + +cleanup_ambient_pair() { + hctl uninstall valkey-amb-a 2>/dev/null || true + hctl uninstall valkey-amb-b 2>/dev/null || true + kctl delete pvc --selector='app.kubernetes.io/instance=valkey-amb-a' --ignore-not-found >/dev/null + kctl delete pvc --selector='app.kubernetes.io/instance=valkey-amb-b' --ignore-not-found >/dev/null +} + +# Cross-release CLUSTER MEET must be blocked by the ambient +# AuthorizationPolicy. Analogous to scenario_two_clusters_isolated above +# but driven at L4 via ztunnel rather than by NetworkPolicy (the +# NetworkPolicy is intentionally skipped in ambient — it would drop +# HBONE). The ONLY thing stopping the merge here is the AP, so we +# disable cluster.isolation.enabled to force that. +scenario_ambient_authz_blocks_cross_release_meet() { + local name="ambient: AuthorizationPolicy blocks cross-release CLUSTER MEET" + log "SCENARIO: ${name}" + if ! istio_ambient_installed; then + log "SKIP: ${name} (ztunnel not installed)" + return + fi + cleanup_ambient_pair + + if ! install_ambient_cluster valkey-amb-a; then + fail "${name}" "install of valkey-amb-a failed"; cleanup_ambient_pair; return + fi + if ! install_ambient_cluster valkey-amb-b; then + fail "${name}" "install of valkey-amb-b failed"; cleanup_ambient_pair; return + fi + kctl wait --for=condition=complete job/valkey-amb-a-cluster-init --timeout=300s >/dev/null + kctl wait --for=condition=complete job/valkey-amb-b-cluster-init --timeout=300s >/dev/null + + local a_before b_before + a_before=$(count_cluster_nodes_ambient valkey-amb-a) + b_before=$(count_cluster_nodes_ambient valkey-amb-b) + if [[ ${a_before} != 3 || ${b_before} != 3 ]]; then + fail "${name}" "baseline wrong (a=${a_before}, b=${b_before}; want 3+3)" + cleanup_ambient_pair; return + fi + + poison_meet_ambient valkey-amb-a valkey-amb-b + + # Same rationale as the sidecar-mode isolation test: after the MEET, + # `cluster nodes` on A briefly shows 4 as a handshake placeholder. + # The real signal is post-settle. Node-timeout defaults to 15s; give + # it multiple intervals. + sleep 45 + + local a_after b_after + a_after=$(count_cluster_nodes_ambient valkey-amb-a) + b_after=$(count_cluster_nodes_ambient valkey-amb-b) + if [[ ${a_after} != 3 || ${b_after} != 3 ]]; then + fail "${name}" "clusters merged despite AuthorizationPolicy (a=${a_after}, b=${b_after}; want 3+3)" + cleanup_ambient_pair; return + fi + + cleanup_ambient_pair + pass "${name}" +} + +# The chart must refuse to install in ambient+cluster mode when the +# AuthorizationPolicy is explicitly disabled — dropping it leaves the bus +# port with NO cross-release protection (the NetworkPolicy is also +# skipped in ambient to avoid blocking HBONE). Fail-closed at template +# time so nobody silently ships an open cluster. +scenario_ambient_ap_disabled_refused() { + local name="ambient: chart refuses install when authorizationPolicy.enabled=false + cluster" + log "SCENARIO: ${name}" + cleanup_release + + local out rc + set +e + out=$(hctl install "${RELEASE}" "${CHART_DIR}" \ + --set=istio.enabled=true \ + --set=istio.mode=ambient \ + --set=cluster.enabled=true \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=0 \ + --set=cluster.persistence.size=100Mi \ + --set=istio.authorizationPolicy.enabled=false \ + --dry-run 2>&1) + rc=$? + set -e + + if (( rc == 0 )); then + fail "${name}" "dry-run succeeded but should have failed: ${out}" + return + fi + if ! grep -q 'cluster-bus port unprotected' <<<"${out}"; then + fail "${name}" "got error without the expected message (rc=${rc}): ${out}" + return + fi + pass "${name}" +} + +# The chart must refuse when ambient + cluster + serviceAccount.create=false +# with no explicit name, because every release collapses to the namespace's +# `default` SA and the AP can no longer distinguish releases. Repro'd live +# during review: two clusters merged despite both having the AP rendered. +scenario_ambient_shared_default_sa_refused() { + local name="ambient: chart refuses install when serviceAccount defaults to namespace-wide 'default'" + log "SCENARIO: ${name}" + cleanup_release + + local out rc + set +e + out=$(hctl install "${RELEASE}" "${CHART_DIR}" \ + --set=istio.enabled=true \ + --set=istio.mode=ambient \ + --set=cluster.enabled=true \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=0 \ + --set=cluster.persistence.size=100Mi \ + --set=serviceAccount.create=false \ + --dry-run 2>&1) + rc=$? + set -e + + if (( rc == 0 )); then + fail "${name}" "dry-run succeeded but should have failed: ${out}" + return + fi + if ! grep -q "serviceAccount.create=false AND serviceAccount.name empty" <<<"${out}"; then + fail "${name}" "got error without the expected message (rc=${rc}): ${out}" + return + fi + pass "${name}" +} + +# Custom trustDomain must propagate into the AuthorizationPolicy principal. +# A cluster with `istio.trustDomain=my.mesh.example.com` whose AP still +# emits `cluster.local/…` would self-deny: same-release callers present an +# identity under the CUSTOM trust domain but the AP's ALLOW rule only +# matches the hardcoded one, so the bus port default-denies even for its +# own pods. +# We don't actually reconfigure Istio's trust domain here — that's a +# cluster-wide concern, not chart-level — so the install does NOT fully +# converge. The test inspects the rendered AP to confirm the principal +# string follows the override. That's the piece the chart owns. +scenario_ambient_trustdomain_override() { + local name="ambient: AP principal follows istio.trustDomain override" + log "SCENARIO: ${name}" + if ! istio_ambient_installed; then + log "SKIP: ${name} (ztunnel not installed)" + return + fi + cleanup_release + + if ! hctl install "${RELEASE}" "${CHART_DIR}" \ + --set=istio.enabled=true \ + --set=istio.mode=ambient \ + --set=cluster.enabled=true \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=0 \ + --set=cluster.persistence.size=100Mi \ + --set=istio.trustDomain=my.mesh.example.com \ + --wait --timeout=240s >/dev/null 2>&1; then + # Expected: install won't converge because the actual mesh trust + # domain is still cluster.local. We only need the AP rendered to + # verify the principal string. + : + fi + + local principals + principals=$(kctl get authorizationpolicy "${RELEASE}-cluster-bus" \ + -o jsonpath='{.spec.rules[0].from[0].source.principals[*]}' 2>/dev/null) + if [[ ${principals} != "my.mesh.example.com/ns/${NAMESPACE}/sa/${RELEASE}" ]]; then + fail "${name}" "AP principals=${principals}, want my.mesh.example.com/ns/${NAMESPACE}/sa/${RELEASE}" + return + fi + + cleanup_release + pass "${name}" +} + +# Prometheus scraping the metrics exporter must work in ambient mode. The +# AuthorizationPolicy is ALLOW-only, which triggers Istio default-deny for +# any non-matching traffic — if the chart forgets to include the metrics +# port in the open rule, production Prometheus stacks silently stop +# seeing Valkey metrics the moment someone enables Istio. +scenario_ambient_prometheus_scrape() { + local name="ambient: in-mesh Prometheus can scrape metrics exporter" + log "SCENARIO: ${name}" + if ! istio_ambient_installed; then + log "SKIP: ${name} (ztunnel not installed)" + return + fi + cleanup_release + + if ! hctl install "${RELEASE}" "${CHART_DIR}" \ + --set=istio.enabled=true \ + --set=istio.mode=ambient \ + --set=cluster.enabled=true \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=0 \ + --set=cluster.persistence.size=100Mi \ + --set=metrics.enabled=true \ + --wait --timeout=300s >/dev/null; then + fail "${name}" "helm install failed"; return + fi + kctl wait --for=condition=complete "job/${RELEASE}-cluster-init" --timeout=300s >/dev/null + + # An ambient-enrolled curl pod simulates an in-mesh Prometheus. + local scraper="scrape-${RELEASE}-$$" + kctl delete pod "${scraper}" --ignore-not-found --wait=true >/dev/null + kctl run "${scraper}" \ + --image=curlimages/curl \ + --labels='istio.io/dataplane-mode=ambient' \ + --restart=Never \ + --command -- sleep 300 >/dev/null + kctl wait --for=condition=Ready "pod/${scraper}" --timeout=120s >/dev/null + + local code out + set +e + out=$(kctl exec "${scraper}" -c "${scraper}" -- \ + curl -sS --max-time 10 -w '\nHTTP=%{http_code}\n' \ + "http://${RELEASE}-metrics.${NAMESPACE}.svc.cluster.local:9121/metrics" 2>&1) + set -e + code=$(awk -F= '/^HTTP=/{print $2}' <<<"${out}") + + kctl delete pod "${scraper}" --ignore-not-found --wait=false >/dev/null + + if [[ ${code} != "200" ]]; then + fail "${name}" "scrape returned HTTP=${code:-}, body was: ${out}" + return + fi + if ! grep -q '^redis_' <<<"${out}"; then + fail "${name}" "HTTP 200 but body lacks redis_* metrics" + return + fi + + cleanup_release + pass "${name}" +} + +# --------------------------------------------------------------------------- +# Scenario: `kubectl rollout restart` on a replicated cluster must not cause +# client-visible disruption. The preStop hook runs `CLUSTER FAILOVER` on +# every primary before SIGTERM, so the shard already has a new primary by +# the time the old pod terminates. We assert this by: +# +# 1) Installing cluster.shards=3, cluster.replicasPerShard=1 (6 pods). +# 2) Recording each pod's role (master/slave) — this is our baseline. +# 3) Writing a known key through any pod (cluster redirects handle placement). +# 4) `kubectl rollout restart` the STS and waiting for the rollout. +# 5) Re-checking cluster_state, master/slave counts, and the key's value. +# 6) Comparing new roles to baseline: since every primary is asked to hand +# off to its own replica, every primary/replica pair should have flipped +# ordinals. We assert AT LEAST ONE pod's role changed — any weaker check +# would pass even if the hook never ran and the cluster simply waited +# through node-timeout failovers. +# +# If the preStop hook is broken or absent, steps 5-6 still "work" in the sense +# that the cluster eventually self-heals via node-timeout, but: +# - there's a 15s+ window of unavailability per primary, +# - and the pod role stays the same after restart (the restarted pod +# re-joins as primary because its nodes.conf persisted), so the role-flip +# assertion catches it. +# --------------------------------------------------------------------------- +scenario_rollout_restart_orderly_failover() { + local name="rollout restart performs orderly CLUSTER FAILOVER (no client-visible gap)" + log "SCENARIO: ${name}" + cleanup_release + + # nodeTimeout pinned high (3 min) so cluster-node-timeout auto-failover + # CANNOT fire during the rollout — a normal per-pod restart takes ~10-30s + # and the whole rollout ~2-3min, so with a 15s default timeout the + # observed role-flip signal could be produced either by preStop OR by + # auto-failover of an in-flight primary. Bumping to 180s guarantees any + # observed flip is the work of preStop. + if ! hctl install "${RELEASE}" "${CHART_DIR}" \ + --set=cluster.enabled=true \ + --set=cluster.persistence.size=100Mi \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=1 \ + --set=cluster.nodeTimeout=180000 \ + --wait --timeout=300s >/dev/null; then + fail "${name}" "helm install failed" + return + fi + kctl wait --for=condition=complete "job/${RELEASE}-cluster-init" --timeout=300s >/dev/null + + # Gossip convergence lags job completion: the init Job returns "done" + # once `cluster create` is ACK'd, but `cluster_state:ok` requires every + # node to have seen every other node's PING/PONG. Writing canary data + # or triggering a rollout before that window closes lets the preStop + # script's own `cluster_state != ok` early-exit fire, bypassing the + # graceful FAILOVER and silently dropping in-memory writes when the + # primary pod is replaced. + local s + for _ in $(seq 1 60); do + s=$(kctl exec "${RELEASE}-0" -c "${RELEASE}" -- \ + valkey-cli cluster info 2>/dev/null \ + | awk -F: '/^cluster_state:/{print $2}' | tr -d '\r\n' || true) + [[ ${s} == ok ]] && break + sleep 2 + done + if [[ ${s} != ok ]]; then + fail "${name}" "cluster_state=${s:-} after install (want ok before rollout)" + cleanup_release; return + fi + + # Capture the role of every pod pre-restart. Keyed by pod ordinal so we + # can compare "same ordinal, different role" after. + snapshot_roles() { + local n=6 i role + for i in $(seq 0 $((n - 1))); do + role=$(kctl exec "${RELEASE}-${i}" -c "${RELEASE}" -- \ + valkey-cli info replication 2>/dev/null \ + | awk -F: '/^role:/{print $2}' | tr -d '\r\n' || true) + printf '%s=%s\n' "${i}" "${role}" + done + } + + local before + before=$(snapshot_roles) + local masters_before slaves_before + masters_before=$(printf '%s\n' "${before}" | grep -c '=master' || true) + slaves_before=$(printf '%s\n' "${before}" | grep -c '=slave\|=replica' || true) + if [[ ${masters_before} != 3 || ${slaves_before} != 3 ]]; then + fail "${name}" "baseline wrong: masters=${masters_before} slaves=${slaves_before} (want 3+3)" + cleanup_release; return + fi + + # Write a canary key so we can prove data integrity after the rollout. + # Must write through a CLUSTER-aware client so slot routing works — + # valkey-cli -c follows MOVED redirects. The value contains shell + # metacharacters for the same reason AUTH_PASSWORD does. + local canary_key="prestop-canary-$$" + local canary_val='rollout-ok $shell "quote" \back`tick`' + if ! kctl exec "${RELEASE}-0" -c "${RELEASE}" -- \ + valkey-cli -c set "${canary_key}" "${canary_val}" >/dev/null 2>&1; then + fail "${name}" "initial SET failed" + cleanup_release; return + fi + + # The actual rollout. Default updateStrategy=RollingUpdate → pods + # restart one at a time from highest ordinal (podManagementPolicy + # controls creation/deletion parallelism, not rolling-update pacing). + # Each primary-pod restart should trigger a preStop FAILOVER; each + # replica-pod restart should no-op. + log "triggering rollout restart" + kctl rollout restart "statefulset/${RELEASE}" >/dev/null + + # Rollout must complete within terminationGracePeriodSeconds * 6 + a + # little slack — each pod can take up to the grace period in the + # worst case (preStop timeout + SIGTERM flush). + if ! kctl rollout status "statefulset/${RELEASE}" --timeout=600s >/dev/null; then + fail "${name}" "rollout status never converged" + cleanup_release; return + fi + + # Give gossip a moment to settle post-rollout — cluster_state flips to + # :ok only after every node sees every other node, and the last pod to + # restart may still be converging when rollout status returns. + local state + for _ in $(seq 1 30); do + state=$(kctl exec "${RELEASE}-0" -c "${RELEASE}" -- \ + valkey-cli cluster info 2>/dev/null \ + | awk -F: '/^cluster_state:/{print $2}' | tr -d '\r\n' || true) + [[ ${state} == ok ]] && break + sleep 2 + done + if [[ ${state} != ok ]]; then + fail "${name}" "cluster_state=${state:-} after rollout (want ok)" + cleanup_release; return + fi + + # Still 3 masters / 3 slaves — i.e. the handovers completed and every + # shard has the right shape. + local after masters_after slaves_after + after=$(snapshot_roles) + masters_after=$(printf '%s\n' "${after}" | grep -c '=master' || true) + slaves_after=$(printf '%s\n' "${after}" | grep -c '=slave\|=replica' || true) + if [[ ${masters_after} != 3 || ${slaves_after} != 3 ]]; then + fail "${name}" "post-rollout shape wrong: masters=${masters_after} slaves=${slaves_after} (want 3+3)" + cleanup_release; return + fi + + # Canary key survives (via MOVED redirect if the slot moved to a + # different primary). + local got + got=$(kctl exec "${RELEASE}-0" -c "${RELEASE}" -- \ + valkey-cli -c get "${canary_key}" 2>/dev/null || true) + if [[ ${got} != "${canary_val}" ]]; then + fail "${name}" "canary key lost: got='${got}' want='${canary_val}'" + cleanup_release; return + fi + + # Expect every primary's ordinal to flip: the rollout restarts each pod + # once, each primary-pod restart's preStop hands off to a replica, and + # the ex-primary returns as replica. So of the 3 original primaries, + # all 3 should now be replicas on those ordinals ⇒ at least 3 flips. + # With nodeTimeout pinned high above, no other mechanism can produce + # flips during the rollout window, so this is a precise signal. + # A broken / missing preStop yields 0 flips (every pod persists its + # role in nodes.conf and rejoins as that role). + local flips=0 line ordinal role_before role_after + for line in ${before}; do + ordinal=${line%=*} + role_before=${line#*=} + role_after=$(printf '%s\n' "${after}" | awk -F= -v o="${ordinal}" '$1 == o {print $2}') + if [[ ${role_before} != "${role_after}" ]]; then + flips=$(( flips + 1 )) + fi + done + if (( flips < 3 )); then + fail "${name}" "only ${flips}/6 ordinals flipped — expected >=3 (every primary's preStop should hand off to a replica). before='${before}' after='${after}'" + cleanup_release; return + fi + log "roles flipped on ${flips}/6 pods — handover ran" + + cleanup_release + pass "${name}" +} + +# --------------------------------------------------------------------------- +# Scenario: cluster bus dials by IP, even with cluster-preferred-endpoint-type +# =hostname. After a rolling restart, a pod whose nodes.conf has only stale +# peer IPs becomes a stranded minority partition — every gossip attempt +# times out against dead IPs and it never gets the chance to learn fresh +# ones. The chart's init container re-resolves each peer's announced FQDN +# and rewrites stale IPs in /data/nodes.conf before valkey-server starts; +# this scenario proves that refresh works end-to-end. +# +# Reproduction: +# 1) Install cluster (replicasPerShard=1) and wait for cluster_state:ok. +# 2) Snapshot pod-0's nodes.conf to extract the real peer IPs. +# 3) Poison: replace every peer IP in pod-0's nodes.conf with TEST-NET-1 +# (192.0.2.0/24, RFC 5737 documentation range — guaranteed unroutable). +# 4) SIGKILL valkey-server (pid 1) so the shutdown handler can't rewrite +# nodes.conf back to good state; the pod restarts via the StatefulSet +# controller. +# 5) Wait for pod-0 to be Ready again. The init container's refresh +# block should re-resolve every peer FQDN and rewrite the IPs back +# to the real ones BEFORE valkey-server starts. +# 6) Assert: pod-0's nodes.conf no longer contains 192.0.2.99 and +# cluster_state from pod-0's perspective is back to ok. +# +# Without the refresh: pod-0 boots, dials 192.0.2.99 on the bus, every +# connection times out, cluster_state stays fail forever. So the +# assertion has teeth — a regression that drops the refresh would leave +# the poisoned IPs in place and cluster_state would never recover. +# --------------------------------------------------------------------------- +scenario_nodes_conf_ip_refresh() { + local name="cluster init refreshes stale nodes.conf IPs after pod restart" + log "SCENARIO: ${name}" + cleanup_release + + if ! hctl install "${RELEASE}" "${CHART_DIR}" \ + --set=cluster.enabled=true \ + --set=cluster.persistence.size=100Mi \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=1 \ + --wait --timeout=300s >/dev/null; then + fail "${name}" "helm install failed" + return + fi + kctl wait --for=condition=complete "job/${RELEASE}-cluster-init" --timeout=300s >/dev/null + + # Wait for gossip convergence — same rationale as the rollout + # scenario: the init Job returning doesn't mean every node has seen + # every PING/PONG yet, and we need cluster_state:ok before we can + # meaningfully assert it recovers. + local s + for _ in $(seq 1 60); do + s=$(kctl exec "${RELEASE}-0" -c "${RELEASE}" -- \ + valkey-cli cluster info 2>/dev/null \ + | awk -F: '/^cluster_state:/{print $2}' | tr -d '\r\n' || true) + [[ ${s} == ok ]] && break + sleep 2 + done + if [[ ${s} != ok ]]; then + fail "${name}" "cluster_state=${s:-} after install (need ok before poisoning)" + cleanup_release; return + fi + + # Snapshot the original nodes.conf for diagnostics and to confirm + # poisoning actually changes content. + local orig + orig=$(kctl exec "${RELEASE}-0" -c "${RELEASE}" -- cat /data/nodes.conf 2>/dev/null) + if [[ -z ${orig} ]]; then + fail "${name}" "failed to read /data/nodes.conf on ${RELEASE}-0" + cleanup_release; return + fi + + # Poison: replace every peer's IP token with 192.0.2.99 (RFC 5737 + # documentation prefix — guaranteed unroutable). Critically, SIGSTOP + # valkey-server BEFORE rewriting nodes.conf — otherwise the live + # server's gossip tick (every cluster-node-timeout/2 ≈ 7.5 s) or any + # incoming gossip event from a peer would rewrite nodes.conf back to + # the real IPs, defeating the test. SIGSTOP freezes the process so + # it can't write the file; the subsequent force-delete sends SIGKILL + # which clears the STOP and tears the container down. + # + # Atomic file swap (write+mv) so a kill mid-write can't corrupt + # anything; sync forces the page cache to disk so the new pod's + # init container reads the poison from the PVC. + log "SIGSTOPping valkey-server and poisoning /data/nodes.conf on ${RELEASE}-0" + # shellcheck disable=SC2016 + if ! kctl exec "${RELEASE}-0" -c "${RELEASE}" -- sh -c ' + kill -STOP 1 \ + && awk '"'"' + # Pass through blank lines and the "vars currentEpoch ..." footer. + /^$/ || /^vars / { print; next } + # Field 2 is ",,..." — replace ONLY + # the leading ip:port@busport, keep everything else. The + # production bug had myself stale too, so we deliberately + # poison the myself line: the refresh block must handle it. + { + # Split field 2 on commas: head is ip:port@busport, tail is rest. + n = split($2, a, ",") + head = a[1] + tail = "" + for (i = 2; i <= n; i++) tail = tail "," a[i] + # Replace the IP only; preserve port and bus port. + sub(/^[0-9.]+/, "192.0.2.99", head) + $2 = head tail + print + } + '"'"' /data/nodes.conf >/data/nodes.conf.poisoned \ + && mv /data/nodes.conf.poisoned /data/nodes.conf \ + && sync + '; then + fail "${name}" "failed to poison /data/nodes.conf on ${RELEASE}-0" + cleanup_release; return + fi + + # Capture the current pod UID so we can detect the replacement. + local old_uid + old_uid=$(kctl get pod "${RELEASE}-0" -o jsonpath='{.metadata.uid}' 2>/dev/null) + if [[ -z ${old_uid} ]]; then + fail "${name}" "could not read UID of ${RELEASE}-0 before delete" + cleanup_release; return + fi + + # Force-delete the pod to trigger pod RECREATION (not in-place + # container restart). Init containers only run on new pods; SIGKILL + # of pid 1 alone leaves the same pod object in place and kubelet + # just restarts the container, skipping the init phase entirely. + # Force + grace=0 also bypasses the preStop hook and the graceful- + # shutdown handler, both of which would otherwise rewrite nodes.conf + # back to a clean state and defeat the test. + log "Force-deleting ${RELEASE}-0 to trigger pod recreation" + kctl delete pod "${RELEASE}-0" --force --grace-period=0 \ + --wait=false >/dev/null 2>&1 || true + + # Wait for the StatefulSet controller to create a NEW pod with a + # different UID (the old one may briefly persist in Terminating + # state). + log "Waiting for ${RELEASE}-0 to be recreated with a fresh UID" + local new_uid + for _ in $(seq 1 60); do + new_uid=$(kctl get pod "${RELEASE}-0" -o jsonpath='{.metadata.uid}' 2>/dev/null || true) + if [[ -n ${new_uid} && ${new_uid} != "${old_uid}" ]]; then + break + fi + sleep 2 + done + if [[ ${new_uid} == "${old_uid}" || -z ${new_uid} ]]; then + fail "${name}" "${RELEASE}-0 was not recreated (UID still ${old_uid:-empty})" + cleanup_release; return + fi + + # Now wait for the new pod to be Ready (init container ran, probe + # passes — which only happens if cluster_state recovered, which only + # happens if the refresh worked). + log "Waiting for the new ${RELEASE}-0 (uid=${new_uid}) to be Ready" + if ! kctl wait --for=condition=Ready "pod/${RELEASE}-0" --timeout=180s >/dev/null; then + fail "${name}" "${RELEASE}-0 never became Ready after recreation" + cleanup_release; return + fi + + # The post-restart nodes.conf must NOT contain the poison IP — the + # init container's refresh step replaces it before valkey-server + # boots. (Valkey itself only writes peers' IPs to nodes.conf as it + # observes them via gossip; without our pre-boot refresh, the boot + # would proceed against 192.0.2.99 and the file would stay poisoned.) + local after + after=$(kctl exec "${RELEASE}-0" -c "${RELEASE}" -- cat /data/nodes.conf 2>/dev/null) + if grep -q '192\.0\.2\.99' <<<"${after}"; then + fail "${name}" "nodes.conf still contains poison IP 192.0.2.99 after restart — refresh did not run. Content: ${after}" + cleanup_release; return + fi + + # And the cluster must be functional from pod-0's view — the whole + # point of the refresh is that it boots into a cluster it can talk + # to. Poll because gossip needs a moment to re-converge after the + # restart. + for _ in $(seq 1 60); do + s=$(kctl exec "${RELEASE}-0" -c "${RELEASE}" -- \ + valkey-cli cluster info 2>/dev/null \ + | awk -F: '/^cluster_state:/{print $2}' | tr -d '\r\n' || true) + [[ ${s} == ok ]] && break + sleep 2 + done + if [[ ${s} != ok ]]; then + fail "${name}" "cluster_state=${s:-} after refresh (want ok). nodes.conf was: ${after}" + cleanup_release; return + fi + + cleanup_release + pass "${name}" +} + +# --------------------------------------------------------------------------- +# Scenario: probe LOADING-policy is wired correctly on the live workload. +# +# The chart applies a tri-state policy: +# * startupProbe — rejects LOADING (gate has teeth during initial RDB load) +# * livenessProbe — accepts LOADING (don't kill a replica mid-full-resync) +# * readinessProbe — rejects LOADING (don't route traffic to a loading pod) +# +# Production regression that motivates this test: a replica in a 38 GB cluster +# hit `cluster_state:fail` after a replication break triggered a full resync; +# the post-resync in-memory load took ~57 s, and livenessProbe +# (failureThreshold=6 * periodSeconds=10s = 60 s) killed the pod just before +# load completed. The kill discarded the freshly-streamed RDB; the next pod +# incarnation triggered yet another full resync. Crash-loop until intervention. +# +# helm-unittest already locks the rendered command strings in via +# matchRegex; this functional test goes one layer further by asserting +# that the live API objects in the cluster carry the right policy. A +# template change that bypasses the helper would slip past unit tests +# but get caught here. +# --------------------------------------------------------------------------- +scenario_probe_loading_policy() { + local name="probes carry tri-state LOADING policy on live workload" + log "SCENARIO: ${name}" + cleanup_release + + if ! hctl install "${RELEASE}" "${CHART_DIR}" \ + --set=cluster.enabled=true \ + --set=cluster.persistence.size=100Mi \ + --set=cluster.shards=3 \ + --set=cluster.replicasPerShard=0 \ + --wait --timeout=300s >/dev/null; then + fail "${name}" "helm install failed" + return + fi + + local startup liveness readiness + startup=$(kctl get statefulset "${RELEASE}" \ + -o jsonpath='{.spec.template.spec.containers[0].startupProbe.exec.command[2]}') + liveness=$(kctl get statefulset "${RELEASE}" \ + -o jsonpath='{.spec.template.spec.containers[0].livenessProbe.exec.command[2]}') + readiness=$(kctl get statefulset "${RELEASE}" \ + -o jsonpath='{.spec.template.spec.containers[0].readinessProbe.exec.command[2]}') + + if grep -q LOADING <<<"${startup}"; then + fail "${name}" "startupProbe must reject LOADING but accepts it: ${startup}" + cleanup_release; return + fi + if ! grep -q LOADING <<<"${liveness}"; then + fail "${name}" "livenessProbe must accept LOADING but rejects it: ${liveness}" + cleanup_release; return + fi + if grep -q LOADING <<<"${readiness}"; then + fail "${name}" "readinessProbe must reject LOADING but accepts it: ${readiness}" + cleanup_release; return + fi + + cleanup_release + pass "${name}" +} + +trap 'cleanup_release; cleanup_pair; cleanup_ambient_pair' EXIT + +scenario_aclconfig_metrics || true +scenario_default_deny_netpol || true +scenario_bus_port_hidden || true +scenario_readiness_probe_exists || true +scenario_two_clusters_isolated || true +scenario_isolation_off_lets_merge_happen || true +scenario_rollout_restart_orderly_failover || true +scenario_nodes_conf_ip_refresh || true +scenario_probe_loading_policy || true +scenario_ambient_authz_blocks_cross_release_meet || true +scenario_ambient_ap_disabled_refused || true +scenario_ambient_shared_default_sa_refused || true +scenario_ambient_trustdomain_override || true +scenario_ambient_prometheus_scrape || true + +echo +log "Extra scenario summary" +passed=0; failed=0 +for r in "${RESULTS[@]}"; do + printf ' %s\n' "${r}" + [[ ${r} == PASS:* ]] && passed=$(( passed + 1 )) || failed=$(( failed + 1 )) +done +echo +log "Extras: ${passed} passed, ${failed} failed" +(( failed == 0 )) diff --git a/functional-tests/run-scenario.sh b/functional-tests/run-scenario.sh new file mode 100755 index 00000000..4615e707 --- /dev/null +++ b/functional-tests/run-scenario.sh @@ -0,0 +1,366 @@ +#!/usr/bin/env bash +# Run a single scenario of the Valkey functional matrix against the +# already-created kind cluster. +# +# Usage: +# ./run-scenario.sh +# tls/auth/shard/rep are on|off; istio is off|sidecar|ambient. +# Example: +# ./run-scenario.sh off off on on ambient +# drives the "TLS off, auth off, shard on, rep on, Istio ambient" scenario. + +HERE=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=lib.sh +. "${HERE}/lib.sh" + +if (( $# != 5 )); then + echo "usage: $0 " >&2 + echo " tls/auth/shard/rep: on|off" >&2 + echo " istio: off|sidecar|ambient" >&2 + exit 2 +fi + +on_or_off() { + case "$1" in + on|off) return 0 ;; + *) echo "expected 'on' or 'off', got: $1" >&2; return 1 ;; + esac +} +for v in "$1" "$2" "$3" "$4"; do on_or_off "${v}"; done +case "$5" in + off|sidecar|ambient) ;; + *) echo "expected istio=off|sidecar|ambient, got: $5" >&2; exit 2 ;; +esac + +TLS=$1; AUTH=$2; SHARD=$3; REP=$4; ISTIO=$5 +SCENARIO="tls=${TLS} auth=${AUTH} shard=${SHARD} rep=${REP} istio=${ISTIO}" + +is_on() { [[ $1 == on ]]; } +is_mesh() { [[ ${ISTIO} != off ]]; } +is_sidecar() { [[ ${ISTIO} == sidecar ]]; } +is_ambient() { [[ ${ISTIO} == ambient ]]; } + +# Pick a testbench that shares the right mesh participation with the chart +# workload — that's the only way the in-mesh connectivity checks reflect +# what an in-production client on the same mesh would experience. The three +# testbench flavours are launched once by setup.sh. +case "${ISTIO}" in + off) TESTBENCH=${TESTBENCH_POD} ;; + sidecar) TESTBENCH=${TESTBENCH_POD_INJECTED} ;; + ambient) TESTBENCH=${TESTBENCH_POD_AMBIENT} ;; +esac +testbench_exec() { testbench_exec_in "${TESTBENCH}" "$@"; } + +# --------------------------------------------------------------------------- +# Build helm flags for this scenario. +# --------------------------------------------------------------------------- +helm_flags=() + +if is_mesh; then + helm_flags+=( + --set=istio.enabled=true + "--set=istio.mode=${ISTIO}" + ) +fi +# istio=off needs no extra flags: the chart emits zero mesh labels when +# istio.enabled=false, and setup.sh leaves the namespace unlabelled so +# pods stay out of both data planes by default. + +if is_on "${AUTH}"; then + helm_flags+=( + --set=auth.enabled=true + --set=auth.usersExistingSecret="${AUTH_SECRET}" + --set=auth.aclUsers.default.permissions='~* &* +@all' + ) +fi + +if is_on "${TLS}"; then + helm_flags+=( + --set=tls.enabled=true + --set=tls.existingSecret="${TLS_SECRET}" + ) +fi + +if is_on "${SHARD}"; then + helm_flags+=( + --set=cluster.enabled=true + --set=cluster.persistence.size=1Gi + --set=cluster.shards=3 + ) + if is_on "${REP}"; then + helm_flags+=(--set=cluster.replicasPerShard=1) + expected_node_count=6 + else + helm_flags+=(--set=cluster.replicasPerShard=0) + expected_node_count=3 + fi +elif is_on "${REP}"; then + helm_flags+=( + --set=replica.enabled=true + --set=replica.persistence.size=1Gi + ) + expected_node_count=0 # unused +else + expected_node_count=0 # unused +fi + +# --------------------------------------------------------------------------- +# Install. +# --------------------------------------------------------------------------- + +# Register cleanup BEFORE `helm install`. If the install itself fails +# (timeout, post-install hook never ready, etc.) Helm leaves a "failed" +# release in the cluster that blocks every subsequent scenario with a +# `cannot reuse a name that is still in use` error. Trap-before-install +# ensures we always clean up, even on install failure. +cleanup() { + local rc=$? + log "Cleaning up scenario: ${SCENARIO}" + hctl uninstall "${RELEASE}" 2>/dev/null || true + kctl delete pvc --selector="app.kubernetes.io/instance=${RELEASE}" --ignore-not-found + exit "${rc}" +} +trap cleanup EXIT + +# Also scrub anything left behind by a prior scenario that crashed hard +# (SIGKILL, harness panic) without running its trap. +hctl uninstall "${RELEASE}" 2>/dev/null || true + +log "Installing scenario: ${SCENARIO}" +hctl install "${RELEASE}" "${CHART_DIR}" "${helm_flags[@]}" + +# --------------------------------------------------------------------------- +# Wait for pods to become ready. +# --------------------------------------------------------------------------- +log "Waiting for workload to be ready" +if is_on "${SHARD}"; then + kctl rollout status "statefulset/${RELEASE}" --timeout=300s + # The cluster-init Job is a post-install hook; wait for it to complete. + kctl wait --for=condition=complete "job/${RELEASE}-cluster-init" --timeout=300s +elif is_on "${REP}"; then + kctl rollout status "statefulset/${RELEASE}" --timeout=300s +else + kctl rollout status "deployment/${RELEASE}" --timeout=300s +fi + +# --------------------------------------------------------------------------- +# Build the canonical "working" valkey-cli argv for this scenario. +# --------------------------------------------------------------------------- +cli_args_good=(valkey-cli -h "valkey.${NAMESPACE}.svc.cluster.local" --no-auth-warning) +if is_on "${AUTH}"; then + cli_args_good+=(-a "${AUTH_PASSWORD}") +fi +if is_on "${TLS}"; then + cli_args_good+=(--tls --cacert /tls/ca.crt) +fi + +# --------------------------------------------------------------------------- +# Assertions. +# --------------------------------------------------------------------------- +fail() { echo "FAIL: $*" >&2; exit 1; } + +assert_eq() { + local expected=$1 actual=$2 what=$3 + if [[ ${actual} != "${expected}" ]]; then + fail "${what}: expected '${expected}', got '${actual}'" + fi +} + +# Pick any chart pod so mode-specific checks can inspect live container / +# label state. The first matching pod is fine — all pods in a release +# share the same mesh participation shape. +pod=$(kctl get pod -l "app.kubernetes.io/instance=${RELEASE}" \ + -o jsonpath='{.items[0].metadata.name}') + +# Chart-owned Istio resources should be present iff istio is enabled. +# PeerAuthentication is mode-neutral (enforced by Envoy in sidecar, ztunnel +# in ambient). DestinationRule is sidecar-only — ambient's ztunnel HBONE +# supersedes it. AuthorizationPolicy renders only in cluster mode. +case "${ISTIO}" in + off) + log "Istio check: chart-owned resources must be absent" + if kctl get peerauthentication "${RELEASE}" >/dev/null 2>&1; then + fail "PeerAuthentication/${RELEASE} should not exist when istio=off" + fi + if kctl get destinationrule "${RELEASE}" >/dev/null 2>&1; then + fail "DestinationRule/${RELEASE} should not exist when istio=off" + fi + if kctl get authorizationpolicy "${RELEASE}-cluster-bus" >/dev/null 2>&1; then + fail "AuthorizationPolicy/${RELEASE}-cluster-bus should not exist when istio=off" + fi + # Pod must have no istio-proxy container. + if kctl get pod "${pod}" \ + -o jsonpath='{.spec.containers[*].name} {.spec.initContainers[*].name}' \ + | tr ' ' '\n' | grep -Fxq istio-proxy; then + fail "pod ${pod} has an istio-proxy container when istio=off" + fi + ;; + sidecar) + log "Istio check: sidecar-mode resources must exist" + kctl get peerauthentication "${RELEASE}" >/dev/null \ + || fail "PeerAuthentication/${RELEASE} missing in sidecar mode" + kctl get destinationrule "${RELEASE}" >/dev/null \ + || fail "DestinationRule/${RELEASE} missing in sidecar mode" + if is_on "${SHARD}" || is_on "${REP}"; then + kctl get destinationrule "${RELEASE}-headless" >/dev/null \ + || fail "DestinationRule/${RELEASE}-headless missing in sidecar mode" + fi + # Istio >=1.29 injects as a native sidecar (initContainer with + # restartPolicy=Always), so check both containers and initContainers. + if ! kctl get pod "${pod}" \ + -o jsonpath='{.spec.containers[*].name} {.spec.initContainers[*].name}' \ + | tr ' ' '\n' | grep -Fxq istio-proxy; then + fail "pod ${pod} has no istio-proxy container in sidecar mode" + fi + if is_on "${SHARD}"; then + # AP renders only in cluster mode, but it applies in BOTH sidecar + # and ambient. Verify once per mode so a sidecar-only regression + # (e.g. dropping the AP when !ambient) can't hide. + kctl get authorizationpolicy "${RELEASE}-cluster-bus" >/dev/null \ + || fail "AuthorizationPolicy/${RELEASE}-cluster-bus missing in sidecar+cluster mode" + # The bus-port exclude annotations are sidecar-only (ambient has + # no Envoy to exclude ports from). + excl=$(kctl get statefulset "${RELEASE}" \ + -o jsonpath='{.spec.template.metadata.annotations.traffic\.sidecar\.istio\.io/excludeInboundPorts}') + if [[ ${excl} != "16379" ]]; then + fail "traffic.sidecar.istio.io/excludeInboundPorts=${excl:-}, want '16379' in sidecar+cluster" + fi + else + # AP is cluster-mode only. Don't render for standalone/replica. + if kctl get authorizationpolicy "${RELEASE}-cluster-bus" >/dev/null 2>&1; then + fail "AuthorizationPolicy/${RELEASE}-cluster-bus should not render outside cluster mode" + fi + fi + ;; + ambient) + log "Istio check: ambient-mode resources must exist" + kctl get peerauthentication "${RELEASE}" >/dev/null \ + || fail "PeerAuthentication/${RELEASE} missing in ambient mode" + # DestinationRule is sidecar-only; a DR in ambient requires a + # waypoint proxy and layers a second mTLS inside ztunnel's HBONE. + if kctl get destinationrule "${RELEASE}" >/dev/null 2>&1; then + fail "DestinationRule/${RELEASE} must not exist in ambient mode" + fi + if kctl get destinationrule "${RELEASE}-headless" >/dev/null 2>&1; then + fail "DestinationRule/${RELEASE}-headless must not exist in ambient mode" + fi + # Ambient has no sidecar — ztunnel handles HBONE at the node. If any + # chart pod picks one up, our inject=false label is being ignored. + if kctl get pod "${pod}" \ + -o jsonpath='{.spec.containers[*].name} {.spec.initContainers[*].name}' \ + | tr ' ' '\n' | grep -Fxq istio-proxy; then + fail "pod ${pod} has an istio-proxy container in ambient mode" + fi + dpmode=$(kctl get pod "${pod}" -o jsonpath='{.metadata.labels.istio\.io/dataplane-mode}') + if [[ ${dpmode} != ambient ]]; then + fail "pod ${pod} has istio.io/dataplane-mode=${dpmode:-}, want ambient" + fi + if is_on "${SHARD}"; then + # Ambient skips the cluster-isolation NetworkPolicy (it would + # drop HBONE) and relies entirely on the AP at the ztunnel + # layer. Verify both halves of that swap. + kctl get authorizationpolicy "${RELEASE}-cluster-bus" >/dev/null \ + || fail "AuthorizationPolicy/${RELEASE}-cluster-bus missing in ambient+cluster mode" + if kctl get networkpolicy "${RELEASE}-cluster-isolation" >/dev/null 2>&1; then + fail "NetworkPolicy/${RELEASE}-cluster-isolation must not exist in ambient+cluster mode" + fi + # Sidecar-only exclude annotations must not leak through. + excl=$(kctl get statefulset "${RELEASE}" \ + -o jsonpath='{.spec.template.metadata.annotations.traffic\.sidecar\.istio\.io/excludeInboundPorts}') + if [[ -n ${excl} ]]; then + fail "traffic.sidecar.istio.io/excludeInboundPorts=${excl} leaked into ambient pod" + fi + # And the AP bus rule must be scoped to this release's SPIFFE + # principal, not a wildcard or missing `from` — that's the whole + # point of the ambient cross-release isolation promise. + principals=$(kctl get authorizationpolicy "${RELEASE}-cluster-bus" \ + -o jsonpath='{.spec.rules[0].from[0].source.principals[*]}') + if [[ ${principals} != *"/sa/${RELEASE}" ]]; then + fail "AuthorizationPolicy principals=${principals}, want .../sa/${RELEASE}" + fi + else + if kctl get authorizationpolicy "${RELEASE}-cluster-bus" >/dev/null 2>&1; then + fail "AuthorizationPolicy/${RELEASE}-cluster-bus should not render outside cluster mode" + fi + fi + ;; +esac + +# Positive: the fully-correct invocation should succeed. +log "Positive check" +if is_on "${SHARD}"; then + # Even after the cluster-init Job completes, gossip needs a few seconds to converge + # — each node updates `cluster_state` only after it sees the others. Poll for that. + state=fail + for _ in $(seq 1 30); do + state=$(testbench_exec "${cli_args_good[@]}" cluster info | awk -F: '/^cluster_state:/{print $2}' | tr -d '\r\n') + [[ ${state} == ok ]] && break + sleep 2 + done + assert_eq "ok" "${state}" "cluster_state" + + # Inspect the topology: exact count + master/slave split. + nodes=$(testbench_exec "${cli_args_good[@]}" cluster nodes) + actual_nodes=$(printf '%s\n' "${nodes}" | sed '/^$/d' | wc -l | tr -d ' ') + assert_eq "${expected_node_count}" "${actual_nodes}" "cluster node count" + + master_count=$(printf '%s\n' "${nodes}" | grep -c 'master' || true) + assert_eq "3" "${master_count}" "master count" + + if is_on "${REP}"; then + slave_count=$(printf '%s\n' "${nodes}" | grep -c 'slave' || true) + assert_eq "3" "${slave_count}" "slave count" + fi +else + pong=$(testbench_exec "${cli_args_good[@]}" ping | tr -d '\r\n') + assert_eq "PONG" "${pong}" "ping" +fi + +# Negative — auth. No password should be rejected with NOAUTH. +if is_on "${AUTH}"; then + log "Negative check: missing password must be rejected" + cli_args_noauth=(valkey-cli -h "valkey.${NAMESPACE}.svc.cluster.local" --no-auth-warning) + if is_on "${TLS}"; then + cli_args_noauth+=(--tls --cacert /tls/ca.crt) + fi + if is_on "${SHARD}"; then + probe_cmd=(cluster info) + else + probe_cmd=(ping) + fi + set +e + out=$(testbench_exec "${cli_args_noauth[@]}" "${probe_cmd[@]}" 2>&1) + rc=$? + set -e + if ! grep -qi 'NOAUTH' <<<"${out}"; then + fail "expected NOAUTH error, got (rc=${rc}): ${out}" + fi +fi + +# Negative — TLS. No --tls at all, and --tls without the CA, must both fail. +if is_on "${TLS}"; then + log "Negative check: plaintext client against TLS server must fail" + cli_args_plaintext=(valkey-cli -h "valkey.${NAMESPACE}.svc.cluster.local" --no-auth-warning) + if is_on "${AUTH}"; then cli_args_plaintext+=(-a "${AUTH_PASSWORD}"); fi + if is_on "${SHARD}"; then probe_cmd=(cluster info); else probe_cmd=(ping); fi + set +e + out=$(testbench_exec "${cli_args_plaintext[@]}" "${probe_cmd[@]}" 2>&1) + rc=$? + set -e + if (( rc == 0 )); then + fail "plaintext client should have failed but succeeded: ${out}" + fi + + log "Negative check: TLS client without CA must fail to verify" + cli_args_nocacert=(valkey-cli -h "valkey.${NAMESPACE}.svc.cluster.local" --tls --no-auth-warning) + if is_on "${AUTH}"; then cli_args_nocacert+=(-a "${AUTH_PASSWORD}"); fi + set +e + out=$(testbench_exec "${cli_args_nocacert[@]}" "${probe_cmd[@]}" 2>&1) + rc=$? + set -e + if (( rc == 0 )) || ! grep -qi 'certificate verify failed' <<<"${out}"; then + fail "expected 'certificate verify failed', got (rc=${rc}): ${out}" + fi +fi + +log "PASS: ${SCENARIO}" diff --git a/functional-tests/setup.sh b/functional-tests/setup.sh new file mode 100755 index 00000000..3fe57025 --- /dev/null +++ b/functional-tests/setup.sh @@ -0,0 +1,168 @@ +#!/usr/bin/env bash +# Bring up the kind cluster, install Istio (demo profile), and create the +# shared fixtures (auth secret, TLS secret, two testbench pods) used by +# every scenario. + +HERE=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=lib.sh +. "${HERE}/lib.sh" + +log "Creating kind cluster ${CLUSTER_NAME}" +if kind get clusters | grep -Fxq "${CLUSTER_NAME}"; then + echo "kind cluster '${CLUSTER_NAME}' already exists; reusing" +else + kind create cluster --config "${HERE}/kind-config.yaml" --wait 120s +fi + +log "Installing Istio (ambient profile)" +if istio_installed; then + echo "istio-system namespace already exists; assuming Istio is installed" +else + # `ambient` ships istiod + the ambient data plane (istio-cni DaemonSet + # for iptables redirection, ztunnel DaemonSet for node-local HBONE + # mTLS). It also installs the sidecar injection webhook, so classic + # sidecar-mode pods still work on the same cluster — we can run both + # the sidecar matrix and the ambient regressions against one install. + istioctl install --context="${KUBE_CONTEXT}" \ + --set profile=ambient --skip-confirmation +fi + +# Wait for the ambient data plane to be live before launching testbenches. +# Without this, the first few ambient scenarios race ztunnel startup and +# the testbench gets no HBONE wrapping. +if istio_ambient_installed; then + log "Waiting for ztunnel DaemonSet to be ready" + kubectl --context="${KUBE_CONTEXT}" -n "${ISTIO_NAMESPACE}" \ + rollout status daemonset/ztunnel --timeout=180s +fi + +# Namespace-level Istio injection intentionally NOT set. The chart now +# carries per-pod `sidecar.istio.io/inject` and `istio.io/dataplane-mode` +# labels derived from `istio.enabled` + `istio.mode`, so every workload +# opts in or out explicitly at the pod layer. Labelling the namespace +# `istio-injection=enabled` on top would (a) pull every istio=off pod +# into the sidecar data plane — since namespace injection is inherited +# unless each pod stamps `sidecar.istio.io/inject=false` to veto it — +# and (b) blur which layer is actually responsible for mesh capture +# when troubleshooting. Keep the decision at the pod level, the same as +# how the chart ships to real operators. +log "Namespace ${NAMESPACE} left unlabelled — chart controls mesh opt-in at the pod level" +kubectl --context="${KUBE_CONTEXT}" label namespace "${NAMESPACE}" \ + istio-injection- istio.io/dataplane-mode- 2>/dev/null || true + +log "Creating ${AUTH_SECRET} secret" +kctl delete secret "${AUTH_SECRET}" --ignore-not-found +kctl create secret generic "${AUTH_SECRET}" \ + --from-literal="default=${AUTH_PASSWORD}" + +log "Generating self-signed TLS material" +CERT_DIR=$(mktemp -d) +trap 'rm -rf -- "${CERT_DIR}"' EXIT + +# CA +openssl req -x509 -nodes -days 365 -newkey rsa:2048 \ + -keyout "${CERT_DIR}/valkey-ca.key" \ + -out "${CERT_DIR}/valkey-ca.crt" \ + -subj /CN=valkey-ca 2>/dev/null + +# Server CSR with SANs the chart's pods present on +openssl req -nodes -newkey rsa:2048 \ + -keyout "${CERT_DIR}/valkey-server.key" \ + -out "${CERT_DIR}/valkey-server.csr" \ + -subj "/CN=valkey.${NAMESPACE}.svc.cluster.local" \ + -addext "subjectAltName=DNS:valkey.${NAMESPACE}.svc.cluster.local,DNS:valkey-headless.${NAMESPACE}.svc.cluster.local,DNS:*.valkey-headless.${NAMESPACE}.svc.cluster.local" \ + 2>/dev/null + +openssl x509 -req \ + -in "${CERT_DIR}/valkey-server.csr" \ + -CA "${CERT_DIR}/valkey-ca.crt" \ + -CAkey "${CERT_DIR}/valkey-ca.key" \ + -CAcreateserial \ + -out "${CERT_DIR}/valkey-server.crt" \ + -days 365 \ + -copy_extensions copyall \ + 2>/dev/null + +log "Creating ${TLS_SECRET} secret" +kctl delete secret "${TLS_SECRET}" --ignore-not-found +kctl create secret generic "${TLS_SECRET}" \ + --from-file="server.crt=${CERT_DIR}/valkey-server.crt" \ + --from-file="server.key=${CERT_DIR}/valkey-server.key" \ + --from-file="ca.crt=${CERT_DIR}/valkey-ca.crt" + +# --------------------------------------------------------------------------- +# Testbench pods. Three flavours, each expressing its mesh intent via +# POD-level labels (the namespace is intentionally unlabelled — see the +# comment at the sidecar-injection step above). The chart's Valkey pods +# take the same pod-level approach, so the tests exercise the same opt-in +# path operators use in production. +# +# valkey-testbench — out of both meshes. Used for istio=off +# scenarios; no mesh labels emitted. +# valkey-testbench-injected — Envoy sidecar via per-pod inject=true. +# Used for istio=on mode=sidecar. +# valkey-testbench-ambient — ztunnel-wrapped via +# istio.io/dataplane-mode=ambient. +# Used for istio=on mode=ambient. +# --------------------------------------------------------------------------- +# $1: pod name +# $2: flavour (plain|sidecar|ambient) +launch_testbench() { + local pod=$1 flavour=$2 overrides labels + case "${flavour}" in + plain) + # No mesh labels: with the namespace unlabelled, the default is + # already "out of both meshes". + labels='' + ;; + sidecar) + labels='sidecar.istio.io/inject=true' + ;; + ambient) + labels='istio.io/dataplane-mode=ambient' + ;; + *) + echo "launch_testbench: unknown flavour ${flavour}" >&2 + return 2 + ;; + esac + overrides='{ + "spec": { + "containers": [{ + "name": "'"${pod}"'", + "image": "valkey/valkey:9.0.1", + "command": ["sleep", "infinity"], + "volumeMounts": [{"name": "tls", "mountPath": "/tls", "readOnly": true}] + }], + "volumes": [{ + "name": "tls", + "secret": {"secretName": "'"${TLS_SECRET}"'"} + }] + } + }' + local label_args=() + [[ -n ${labels} ]] && label_args=(--labels="${labels}") + kctl delete pod "${pod}" --ignore-not-found --wait=true + kctl run "${pod}" \ + --image=valkey/valkey:9.0.1 \ + "${label_args[@]}" \ + --restart=Never \ + --overrides="${overrides}" \ + --command -- sleep infinity + wait_for_testbench "${pod}" +} + +log "Launching ${TESTBENCH_POD} (no mesh)" +launch_testbench "${TESTBENCH_POD}" plain + +log "Launching ${TESTBENCH_POD_INJECTED} (Envoy sidecar)" +launch_testbench "${TESTBENCH_POD_INJECTED}" sidecar + +if istio_ambient_installed; then + log "Launching ${TESTBENCH_POD_AMBIENT} (ambient / ztunnel)" + launch_testbench "${TESTBENCH_POD_AMBIENT}" ambient +else + log "Skipping ${TESTBENCH_POD_AMBIENT} — ambient data plane not installed" +fi + +log "Setup complete" diff --git a/functional-tests/teardown.sh b/functional-tests/teardown.sh new file mode 100755 index 00000000..1349abad --- /dev/null +++ b/functional-tests/teardown.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Remove the shared fixtures and (optionally) the kind cluster itself. +# +# Usage: +# ./teardown.sh # remove fixtures, keep cluster +# ./teardown.sh --cluster # also delete the kind cluster + +HERE=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=lib.sh +. "${HERE}/lib.sh" + +DELETE_CLUSTER=0 +for arg in "$@"; do + case "${arg}" in + --cluster) DELETE_CLUSTER=1 ;; + *) echo "unknown arg: ${arg}" >&2; exit 2 ;; + esac +done + +if kind get clusters | grep -Fxq "${CLUSTER_NAME}"; then + log "Removing fixtures from ${CLUSTER_NAME}" + # Best-effort: any lingering release + PVCs. + hctl uninstall "${RELEASE}" 2>/dev/null || true + kctl delete pvc --selector="app.kubernetes.io/instance=${RELEASE}" --ignore-not-found + kctl delete pod "${TESTBENCH_POD}" "${TESTBENCH_POD_INJECTED}" "${TESTBENCH_POD_AMBIENT}" --ignore-not-found + kctl delete secret "${AUTH_SECRET}" "${TLS_SECRET}" --ignore-not-found +fi + +if (( DELETE_CLUSTER )); then + log "Deleting kind cluster ${CLUSTER_NAME}" + kind delete cluster --name "${CLUSTER_NAME}" +fi diff --git a/valkey/Chart.yaml b/valkey/Chart.yaml index 6d6c7114..ea8eaba4 100644 --- a/valkey/Chart.yaml +++ b/valkey/Chart.yaml @@ -3,7 +3,7 @@ name: valkey description: A Helm chart for Kubernetes type: application version: 0.9.4 -appVersion: "9.0.2" +appVersion: "9.1.0" home: https://valkey.io/valkey-helm/ sources: - https://github.com/valkey-io/valkey-helm.git diff --git a/valkey/README.md b/valkey/README.md index d069809a..7fea2332 100644 --- a/valkey/README.md +++ b/valkey/README.md @@ -58,6 +58,60 @@ replica: If fewer than `minReplicasToWrite` replicas are available, the master will reject write operations. +### Cluster Mode + +Deploy a sharded Valkey cluster for horizontal scaling and high availability: + +```bash +helm install valkey valkey/valkey --set cluster.enabled=true --set cluster.persistence.size=5Gi +``` + +**Architecture:** + +* Data is automatically sharded across multiple primary nodes (16384 hash slots distributed across shards) +* Each shard can have replicas for high availability within the shard +* Total nodes = `shards` × (1 + `replicasPerShard`) + +**Default Configuration (6 nodes):** + +```yaml +cluster: + enabled: true + shards: 3 # Minimum 3 shards required + replicasPerShard: 1 # 1 replica per shard + persistence: + size: 5Gi # Required +``` + +This creates 6 nodes: 3 primary shards + 3 replicas. + +**High Availability Configuration (15 nodes):** + +```yaml +cluster: + enabled: true + shards: 5 # 5 primary shards + replicasPerShard: 2 # 2 replicas per shard for extra redundancy + persistence: + size: 10Gi + storageClass: "fast-ssd" +``` + +**Services:** + +* `valkey`: Main service for client connections (routes to all nodes) +* `valkey-headless`: Headless service for pod discovery and cluster communication + +**Cluster Configuration Options:** + +```yaml +cluster: + nodeTimeout: 15000 # Milliseconds before a node is considered failed + requireFullCoverage: true # Require all hash slots covered to accept writes + allowReadsWhenDown: false # Allow reads when cluster is in down state + busPort: 16379 # Port for inter-node cluster communication +``` + ## Storage ### Standalone Storage @@ -93,6 +147,20 @@ replica: storageClass: "fast-ssd" # Optional ``` +### Cluster Storage + +Persistent storage is **mandatory** in cluster mode. Each node in the cluster maintains its own data partition and cluster state configuration. + +```yaml +cluster: + enabled: true + persistence: + size: 10Gi # Required + storageClass: "fast-ssd" # Optional + accessModes: + - ReadWriteOnce +``` + ## Authentication This chart supports ACL-based authentication for Valkey. @@ -174,6 +242,35 @@ replica: * This user MUST be defined in `auth.aclUsers` with appropriate permissions * Minimum permissions: `+psync +replconf +ping` +### Cluster with Authentication + +When using ACL authentication in cluster mode, nodes need credentials to authenticate with each other for cluster operations: + +```yaml +auth: + enabled: true + usersExistingSecret: "my-valkey-users" + aclUsers: + default: + permissions: "~* &* +@all" + cluster-user: + permissions: "+psync +replconf +ping" + +cluster: + enabled: true + shards: 3 + replicasPerShard: 1 + replicationUser: "cluster-user" # Must be defined in auth.aclUsers + persistence: + size: 5Gi +``` + +**Important Notes:** + +* `cluster.replicationUser` specifies which ACL user cluster nodes use to authenticate +* This user MUST be defined in `auth.aclUsers` with appropriate permissions +* Minimum permissions: `+psync +replconf +ping` + ## Metrics This chart supports Prometheus metrics collection using the [Redis exporter](https://github.com/oliver006/redis_exporter). @@ -349,6 +446,17 @@ tls: | replica.persistence.size | string | `""` | Required if replica is enabled | | replica.persistence.storageClass | string | `""` | | | replica.persistence.accessModes | list | `""` | | +| cluster.enabled | bool | `false` | Enable cluster mode (mutually exclusive with replica.enabled) | +| cluster.shards | int | `3` | Number of primary shards (minimum 3) | +| cluster.replicasPerShard | int | `1` | Number of replicas per shard | +| cluster.replicationUser | string | `"default"` | ACL user for cluster authentication (must be in auth.aclUsers) | +| cluster.nodeTimeout | int | `15000` | Milliseconds before node is considered failed | +| cluster.requireFullCoverage | bool | `true` | Require all slots covered to accept writes | +| cluster.allowReadsWhenDown | bool | `false` | Allow reads when cluster is down | +| cluster.busPort | int | `16379` | Port for inter-node cluster communication | +| cluster.persistence.size | string | `""` | Required if cluster is enabled | +| cluster.persistence.storageClass | string | `""` | | +| cluster.persistence.accessModes | list | `["ReadWriteOnce"]` | | | resources | object | `{}` | | | securityContext.capabilities.drop[0] | string | `"ALL"` | | | securityContext.readOnlyRootFilesystem | bool | `true` | | diff --git a/valkey/scripts/cluster-init-script.sh b/valkey/scripts/cluster-init-script.sh new file mode 100644 index 00000000..ff9a96f8 --- /dev/null +++ b/valkey/scripts/cluster-init-script.sh @@ -0,0 +1,201 @@ +#!/bin/sh +set -eu + +# --- Configuration & Initial Checks --- +if [ "${CLUSTER_NODE_COUNT}" -eq "1" ]; then + echo "Single node deployment. Skipping cluster initialization" + exit 0 +fi + +REPLICAS_PER_SHARD=${CLUSTER_REPLICAS_PER_SHARD:-1} +PRIMARIES=$(( CLUSTER_NODE_COUNT / (1 + REPLICAS_PER_SHARD) )) + +{{- if and .Values.auth.enabled .Values.auth.aclUsers }} +{{- $replUsername := .Values.cluster.replicationUser }} +{{- $replUser := index .Values.auth.aclUsers $replUsername }} +{{- $replPasswordKey := $replUser.passwordKey | default $replUsername }} +{{- if .Values.auth.usersExistingSecret }} +if [ -f "/valkey-users-secret/{{ $replPasswordKey }}" ]; then + REDISCLI_AUTH=$(cat "/valkey-users-secret/{{ $replPasswordKey }}") +elif [ -f "/valkey-auth-secret/{{ $replUsername }}-password" ]; then + REDISCLI_AUTH=$(cat "/valkey-auth-secret/{{ $replUsername }}-password") +else + echo "ERROR: No password found for cluster replication user {{ $replUsername }}" >&2 + exit 1 +fi +{{- else }} +if [ -f "/valkey-auth-secret/{{ $replUsername }}-password" ]; then + REDISCLI_AUTH=$(cat "/valkey-auth-secret/{{ $replUsername }}-password") +else + echo "ERROR: No password found for cluster replication user {{ $replUsername }}" >&2 + exit 1 +fi +{{- end }} +# Valkey/Redis clients honour REDISCLI_AUTH, which avoids passing the password +# on the command line (where it would leak via `ps` and trip over shell +# metacharacters). +export REDISCLI_AUTH +{{- end }} + +# vcli: thin wrapper that inherits REDISCLI_AUTH and always adds TLS args when +# configured. Callers pass only host/port/subcommand. +vcli() { +{{- if .Values.tls.enabled }} + valkey-cli --no-auth-warning --tls --cacert "/tls/{{ .Values.tls.caPublicKey }}" "$@" +{{- else }} + valkey-cli --no-auth-warning "$@" +{{- end }} +} + +echo "Cluster init job starting. Total nodes: ${CLUSTER_NODE_COUNT}, Primaries: ${PRIMARIES}, Replicas per shard: ${REPLICAS_PER_SHARD}" + +HEADLESS_SVC="{{ include "valkey.headlessServiceName" . }}" +NAMESPACE="{{ .Release.Namespace }}" +CLUSTER_DOMAIN="{{ .Values.clusterDomain }}" +PORT="{{ .Values.service.port }}" +FULLNAME="{{ include "valkey.fullname" . }}" + +node_host() { echo "${FULLNAME}-$1.${HEADLESS_SVC}.${NAMESPACE}.svc.${CLUSTER_DOMAIN}"; } + +# --- Wait for all Valkey nodes to be ready --- +for i in $(seq 0 $((CLUSTER_NODE_COUNT - 1))); do + NODE_HOST=$(node_host "${i}") + until vcli -h "${NODE_HOST}" -p "${PORT}" ping 2>/dev/null | grep -q "PONG"; do + echo "Waiting for ${NODE_HOST} to be ready..." + sleep 2 + done + echo "Node ${NODE_HOST} is ready." +done + +echo "All ${CLUSTER_NODE_COUNT} nodes are ready." + +# --- Discover Existing Cluster --- +HEALTHY_NODE="" +for i in $(seq 0 $((CLUSTER_NODE_COUNT - 1))); do + NODE_HOST=$(node_host "${i}") + if vcli -h "${NODE_HOST}" -p "${PORT}" cluster info 2>/dev/null | grep -q "cluster_state:ok"; then + HEALTHY_NODE="${NODE_HOST}" + echo "Found healthy cluster node: ${HEALTHY_NODE}" + break + fi +done + +# --- Logic for Joining an Existing Cluster (scaling up) --- +if [ -n "${HEALTHY_NODE}" ]; then + echo "Existing cluster found. Checking for new nodes to add..." + + KNOWN_NODES=$(vcli -h "${HEALTHY_NODE}" -p "${PORT}" cluster nodes 2>/dev/null) + + NEW_NODE_COUNT=0 + for i in $(seq 0 $((CLUSTER_NODE_COUNT - 1))); do + NODE_HOST=$(node_host "${i}") + NODE_IP=$(getent hosts "${NODE_HOST}" | awk '{print $1}') + + if echo "${KNOWN_NODES}" | grep -v "fail" | grep -q "${NODE_IP}:${PORT}"; then + echo "Node ${NODE_HOST} (${NODE_IP}) already in cluster." + continue + fi + + echo "New node found: ${NODE_HOST} (${NODE_IP}). Adding to cluster..." + NEW_NODE_COUNT=$((NEW_NODE_COUNT + 1)) + + # Forget any old, failed instance of this node + FAILED_NODE_ID=$(echo "${KNOWN_NODES}" | grep "${NODE_IP}:${PORT}" | grep "fail" | awk '{print $1}' || true) + if [ -n "${FAILED_NODE_ID}" ]; then + echo "Found node IP (${NODE_IP}) marked as failed with ID ${FAILED_NODE_ID}. Forgetting it..." + vcli --cluster call "${HEALTHY_NODE}:${PORT}" cluster forget "${FAILED_NODE_ID}" > /dev/null 2>&1 || true + sleep 3 + fi + + # Meet the cluster via the new node + HEALTHY_NODE_IP=$(getent hosts "${HEALTHY_NODE}" | awk '{print $1}') + echo "Sending CLUSTER MEET from ${NODE_HOST} to ${HEALTHY_NODE} (${HEALTHY_NODE_IP})" + vcli -h "${NODE_HOST}" -p "${PORT}" cluster meet "${HEALTHY_NODE_IP}" "${PORT}" + done + + if [ "${NEW_NODE_COUNT}" -eq 0 ]; then + echo "No new nodes to add. Cluster is up to date." + exit 0 + fi + + sleep 5 + + # Assign roles to new nodes: find masters needing replicas + for i in $(seq 0 $((CLUSTER_NODE_COUNT - 1))); do + NODE_HOST=$(node_host "${i}") + NODE_ID=$(vcli -h "${NODE_HOST}" -p "${PORT}" cluster myid) + + # Re-fetch cluster state from healthy node for current view + CURRENT_NODES=$(vcli -h "${HEALTHY_NODE}" -p "${PORT}" cluster nodes) + + # Check if this node is a master with no slots (new node) + NODE_INFO=$(echo "${CURRENT_NODES}" | grep "${NODE_ID}") + IS_MASTER=$(echo "${NODE_INFO}" | grep -c "master" || true) + HAS_SLOTS=$(echo "${NODE_INFO}" | awk '{for(i=9;i<=NF;i++) print $i}' | head -1) + + if [ "${IS_MASTER}" -gt 0 ] && [ -z "${HAS_SLOTS}" ]; then + echo "Node ${NODE_HOST} is an empty master. Searching for a master to replicate..." + + TARGET_MASTER_ID=$(echo "${CURRENT_NODES}" | awk -v replicas_needed="${REPLICAS_PER_SHARD}" -v my_id="${NODE_ID}" ' + /master/ && !/fail/ { masters[$1] = 1 } + /slave/ && !/fail/ { master_replicas[$4]++ } + END { + for (master_id in masters) { + if ( master_id != my_id && (master_replicas[master_id] < replicas_needed || master_replicas[master_id] == "") ) { + print master_id + exit + } + } + } + ') + + if [ -n "${TARGET_MASTER_ID}" ]; then + echo "Found target master ${TARGET_MASTER_ID} that needs a replica." + if vcli -h "${NODE_HOST}" -p "${PORT}" cluster replicate "${TARGET_MASTER_ID}"; then + echo "Successfully configured ${NODE_HOST} as a replica for ${TARGET_MASTER_ID}." + else + echo "WARNING: Failed to replicate master ${TARGET_MASTER_ID} from ${NODE_HOST}." + fi + fi + fi + done + + # Rebalance if needed + echo "Attempting cluster rebalance..." + + PROPAGATION_ATTEMPTS=0 + MAX_PROPAGATION_ATTEMPTS=60 + while [ ${PROPAGATION_ATTEMPTS} -lt ${MAX_PROPAGATION_ATTEMPTS} ]; do + CLUSTER_STATE=$(vcli -h "${HEALTHY_NODE}" -p "${PORT}" cluster info 2>/dev/null | grep "cluster_state:" | cut -d: -f2 | tr -d '\r\n') + if [ "${CLUSTER_STATE}" = "ok" ]; then + echo "Cluster state is OK. Proceeding with rebalance." + break + fi + echo "Cluster state is ${CLUSTER_STATE}. Waiting for propagation... (${PROPAGATION_ATTEMPTS}/${MAX_PROPAGATION_ATTEMPTS})" + PROPAGATION_ATTEMPTS=$((PROPAGATION_ATTEMPTS + 1)) + sleep 5 + done + + vcli --cluster rebalance "${HEALTHY_NODE}:${PORT}" --cluster-use-empty-masters --cluster-yes || true + + echo "Cluster update completed." + exit 0 +fi + +# --- Create New Cluster --- +echo "No existing cluster found. Creating new cluster..." +NODES="" +for i in $(seq 0 $((CLUSTER_NODE_COUNT - 1))); do + NODE_HOST=$(node_host "${i}") + NODES="${NODES} ${NODE_HOST}:${PORT}" +done + +# Allow time for cluster-enabled nodes to fully initialize +sleep 10 + +echo "Creating cluster with nodes:${NODES}" +# shellcheck disable=SC2086 +echo "yes" | vcli --cluster create ${NODES} --cluster-replicas "${REPLICAS_PER_SHARD}" +echo "Cluster created successfully." + +exit 0 diff --git a/valkey/scripts/cluster-prestop-script.sh b/valkey/scripts/cluster-prestop-script.sh new file mode 100644 index 00000000..308caf72 --- /dev/null +++ b/valkey/scripts/cluster-prestop-script.sh @@ -0,0 +1,168 @@ +#!/bin/sh +# preStop hook for cluster-mode Valkey pods: orchestrate an orderly +# CLUSTER FAILOVER before kubelet sends SIGTERM. +# +# Problem this solves +# ------------------- +# A rollout restart (or any voluntary pod eviction) sends SIGTERM to Valkey +# and — 30 seconds later by default — SIGKILL. Without a preStop hook, a +# primary pod dies with open client connections; the TCP sockets close +# abruptly, connection pools fill with dead handles, the app errors out on +# every pooled command, and the cluster takes up to cluster-node-timeout +# (15s default) to promote a replica. That is the behaviour the bug report +# describes. +# +# The fix: before the SIGTERM, detect if this pod is a primary; if so, ask +# one of its own replicas to run `CLUSTER FAILOVER`. Valkey then performs +# the canonical orderly handover — the primary pauses new writes, both +# sides sync replication offsets, the replica promotes, the old primary +# demotes to replica. Clients with cluster-topology refresh see the new +# primary immediately via MOVED; existing connections close cleanly as +# part of the demotion. No SIGTERM-during-write window, no pooled dead +# connections, no visible blip. +# +# No-op paths (deliberately best-effort — a failing preStop must never +# block pod shutdown; the old abrupt behaviour is still strictly better +# than hanging in Terminating): +# * This pod is already a replica — losing a replica is invisible to +# clients, no failover needed. +# * Shard has no replicas (cluster.replicasPerShard=0) — nothing to fail +# over to, accept the abrupt close as a topology choice. +# * This pod has no healthy replica of its own (all its replicas are +# marked fail) — skip; FAILOVER would target nothing. +# * Any vcli command fails — log and exit 0. +# +# Notably NOT a no-op path: cluster_state:fail. That state is expected +# mid-rollout (slots briefly uncovered between restarts). Skipping the +# hook there would perpetuate the degraded state by letting every +# subsequent primary also die abruptly. +# +# This script is templated at Helm render time so it can inline the same +# TLS/auth plumbing the cluster-init script uses. Keeping them separate +# (rather than a shared sourced helper) is intentional: Helm's text- +# template model makes shared sh includes fragile, the code is short, and +# the two scripts evolve independently. +set -eu + +log() { echo "preStop: $*" >&2; } + +PORT="{{ .Values.service.port }}" +TIMEOUT={{ .Values.cluster.preStopFailover.timeoutSeconds }} + +# Self-FQDN (matches what init_config.yaml announces via +# cluster-announce-hostname). Using 127.0.0.1 would work for TCP but +# break TLS SAN verification — the server cert's SAN lists the FQDN, not +# the loopback. Same rationale applies to the replica endpoint below. +SELF_FQDN="${HOSTNAME}.{{ include "valkey.headlessServiceName" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}" + +{{- if and .Values.auth.enabled .Values.auth.aclUsers }} +{{- $replUsername := .Values.cluster.replicationUser }} +{{- $replUser := index .Values.auth.aclUsers $replUsername }} +{{- $replPasswordKey := $replUser.passwordKey | default $replUsername }} +{{- if .Values.auth.usersExistingSecret }} +if [ -f "/valkey-users-secret/{{ $replPasswordKey }}" ]; then + REDISCLI_AUTH=$(cat "/valkey-users-secret/{{ $replPasswordKey }}") +elif [ -f "/valkey-auth-secret/{{ $replUsername }}-password" ]; then + REDISCLI_AUTH=$(cat "/valkey-auth-secret/{{ $replUsername }}-password") +else + log "no password found for user {{ $replUsername }}; cannot authenticate preStop" + exit 0 +fi +{{- else }} +if [ -f "/valkey-auth-secret/{{ $replUsername }}-password" ]; then + REDISCLI_AUTH=$(cat "/valkey-auth-secret/{{ $replUsername }}-password") +else + log "no password found for user {{ $replUsername }}; cannot authenticate preStop" + exit 0 +fi +{{- end }} +export REDISCLI_AUTH +{{- end }} + +vcli() { +{{- if .Values.tls.enabled }} + valkey-cli --no-auth-warning --tls --cacert "/tls/{{ .Values.tls.caPublicKey }}" "$@" +{{- else }} + valkey-cli --no-auth-warning "$@" +{{- end }} +} + +# We do NOT gate on cluster_state here. A rollout restarts pods one at a +# time, and between restarts this node sees cluster_state:fail until +# gossip observes the previous pod rejoin — exactly the window this +# preStop is meant to close. Skipping FAILOVER there would defeat the +# hook: without it, SIGTERM takes the primary's slots offline and the +# next pod also sees cluster_state:fail, perpetuating the degraded state +# for the rest of the rollout. We rely instead on CLUSTER FAILOVER's +# own preconditions (a healthy, caught-up replica) to decide whether the +# handover is safe. +role=$(vcli -h "${SELF_FQDN}" -p "${PORT}" info replication 2>/dev/null | awk -F: '/^role:/{print $2}' | tr -d '\r\n' || true) +case "${role}" in + master) ;; + slave|replica) + log "role=${role}; no failover needed" + exit 0 + ;; + *) + log "unexpected role=${role:-}; not attempting failover" + exit 0 + ;; +esac + +my_id=$(vcli -h "${SELF_FQDN}" -p "${PORT}" cluster myid 2>/dev/null | tr -d '\r\n' || true) +if [ -z "${my_id}" ]; then + log "cluster myid empty; not attempting failover" + exit 0 +fi + +# CLUSTER REPLICAS returns a subset of CLUSTER NODES, one line per +# replica of this primary, in the same eight-field format. We want a live +# (non-failing), online replica. Field 2 is the announce endpoint +# "host:port@busport[,hostname]"; Helm sets +# cluster-preferred-endpoint-type=hostname in init_config.yaml, so the +# host half is a DNS name that matches the TLS SAN when TLS is enabled. +replica_line=$(vcli -h "${SELF_FQDN}" -p "${PORT}" cluster replicas "${my_id}" 2>/dev/null \ + | awk '!/fail/ && NF' \ + | head -n1 || true) +if [ -z "${replica_line}" ]; then + log "no healthy replica for this primary; skipping failover" + exit 0 +fi + +endpoint=$(printf '%s\n' "${replica_line}" | awk '{print $2}' | cut -d@ -f1) +replica_host=${endpoint%:*} +replica_port=${endpoint##*:} + +if [ -z "${replica_host}" ] || [ -z "${replica_port}" ]; then + log "could not parse replica endpoint from '${replica_line}'; skipping failover" + exit 0 +fi + +log "primary ${my_id}; asking replica ${replica_host}:${replica_port} to take over" + +# Plain CLUSTER FAILOVER (no FORCE/TAKEOVER) is the graceful path: the +# replica negotiates with the primary, waits for replication-offset sync, +# then promotes. If the replica is too far behind or the primary is +# unreachable, it returns an error — we then exit 0 and let SIGTERM run. +if ! vcli -h "${replica_host}" -p "${replica_port}" cluster failover 2>/dev/null; then + log "CLUSTER FAILOVER rejected; proceeding with abrupt shutdown" + exit 0 +fi + +# CLUSTER FAILOVER returns OK as soon as the replica accepts the request; +# the actual role flip is asynchronous. Poll our own INFO until we see +# role=slave (or give up on TIMEOUT). +deadline=$(( $(date +%s) + TIMEOUT )) +while :; do + now=$(date +%s) + if [ "${now}" -ge "${deadline}" ]; then + log "timed out after ${TIMEOUT}s waiting for demotion; proceeding with shutdown" + exit 0 + fi + cur_role=$(vcli -h "${SELF_FQDN}" -p "${PORT}" info replication 2>/dev/null | awk -F: '/^role:/{print $2}' | tr -d '\r\n' || true) + if [ "${cur_role}" = "slave" ] || [ "${cur_role}" = "replica" ]; then + log "demoted to ${cur_role}; handover complete" + exit 0 + fi + sleep 1 +done diff --git a/valkey/templates/NOTES.txt b/valkey/templates/NOTES.txt index 07ddb6dd..c5ff2bba 100644 --- a/valkey/templates/NOTES.txt +++ b/valkey/templates/NOTES.txt @@ -10,7 +10,59 @@ Namespace: {{ .Release.Namespace }} Chart: {{ .Chart.Name }} {{ .Chart.Version }} App version: {{ .Chart.AppVersion }} -{{- if .Values.replica.enabled }} +{{- if .Values.cluster.enabled }} +================================================================================ +🌐 CLUSTER MODE (Sharded) +================================================================================ + +Your Valkey deployment is running in CLUSTER mode: +- {{ .Values.cluster.shards }} Shard(s) (primary nodes) +- {{ .Values.cluster.replicasPerShard }} Replica(s) per shard +- {{ include "valkey.clusterNodeCount" . }} Total node(s) + +Hash slots (16384 total) are distributed across the {{ .Values.cluster.shards }} shards. + +Service: {{ include "valkey.fullname" . }} +Type: {{ .Values.service.type }} +Port: {{ .Values.service.port }} + +Bus port {{ .Values.cluster.busPort }} is reachable only through the headless +service — it carries cluster gossip + failover traffic between nodes, so it +must bypass the round-robin frontend service. + +1) In-cluster access + From another Pod: + $ valkey-cli -h {{ include "valkey.fullname" . }} -p {{ .Values.service.port }}{{ if .Values.tls.enabled }} --tls{{- end }} -c PING + + Note: Use the `-c` flag to enable cluster mode in valkey-cli. + +2) Local access via kubectl port-forward + $ kubectl -n {{ .Release.Namespace }} port-forward svc/{{ include "valkey.fullname" . }} 6379:{{ .Values.service.port }} + In another terminal: + $ valkey-cli -h 127.0.0.1 -p 6379{{ if .Values.tls.enabled }} --tls{{- end }} -c PING +{{ if eq .Values.service.type "LoadBalancer" }} +3) External access (LoadBalancer) + $ export SERVICE_IP=$(kubectl -n {{ .Release.Namespace }} get svc {{ include "valkey.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + $ valkey-cli -h $SERVICE_IP -p {{ .Values.service.port }}{{ if .Values.tls.enabled }} --tls{{- end }} -c PING +{{ else if eq .Values.service.type "NodePort" }} +3) External access (NodePort) + $ export NODE_PORT=$(kubectl -n {{ .Release.Namespace }} get svc {{ include "valkey.fullname" . }} -o jsonpath='{.spec.ports[0].nodePort}') + $ export NODE_IP=$(kubectl get nodes -o jsonpath='{.items[0].status.addresses[?(@.type=="InternalIP")].address}') + $ valkey-cli -h $NODE_IP -p $NODE_PORT{{ if .Values.tls.enabled }} --tls{{- end }} -c PING +{{ end }} +Direct Pod Access (Headless Service): +{{- $shards := int .Values.cluster.shards }} +{{- $replicasPerShard := int .Values.cluster.replicasPerShard }} +{{- $totalNodes := mul $shards (add 1 $replicasPerShard) }} +{{- range $i := until (int $totalNodes) }} + {{ include "valkey.fullname" $ }}-{{ $i }}.{{ include "valkey.headlessServiceName" $ }}.{{ $.Release.Namespace }}.svc.{{ $.Values.clusterDomain }} +{{- end }} + +Cluster Info: + $ valkey-cli -h {{ include "valkey.fullname" . }} -p {{ .Values.service.port }}{{ if .Values.tls.enabled }} --tls{{- end }}{{ if .Values.auth.enabled }} --user -a {{ end }} cluster info + $ valkey-cli -h {{ include "valkey.fullname" . }} -p {{ .Values.service.port }}{{ if .Values.tls.enabled }} --tls{{- end }}{{ if .Values.auth.enabled }} --user -a {{ end }} cluster nodes + +{{- else if .Values.replica.enabled }} ================================================================================ 🔄 REPLICATION MODE ================================================================================ @@ -99,13 +151,29 @@ Port: {{ .Values.service.port }} {{ end }} ✅ Quick test +{{- if .Values.cluster.enabled }} +$ valkey-cli -h {{ include "valkey.fullname" . }} -p {{ .Values.service.port }}{{ if .Values.tls.enabled }} --tls{{- end }}{{ if .Values.auth.enabled }} --user -a {{ end }} -c +valkey> SET foo bar +valkey> GET foo +"bar" +valkey> CLUSTER INFO +{{- else }} $ valkey-cli -h {{ include "valkey.fullname" . }} -p {{ .Values.service.port }}{{ if .Values.tls.enabled }} --tls{{- end }}{{ if .Values.auth.enabled }} --user -a {{ end }} valkey> SET foo bar valkey> GET foo "bar" +{{- end }} 💾 Persistence -{{- if .Values.replica.enabled }} +{{- if .Values.cluster.enabled }} +- Persistence is ENABLED (required for cluster mode). Each node has its own volume. +- Size: {{ .Values.cluster.persistence.size }} +{{- if .Values.cluster.persistence.storageClass }} +- Storage class: {{ .Values.cluster.persistence.storageClass }} +{{- end }} +- To see PVCs: + $ kubectl -n {{ .Release.Namespace }} get pvc -l app.kubernetes.io/instance={{ .Release.Name }} +{{- else if .Values.replica.enabled }} - Persistence is ENABLED (required for replication mode). Each instance has its own volume. - Size: {{ .Values.replica.persistence.size }} {{- if .Values.replica.persistence.storageClass }} diff --git a/valkey/templates/_helpers.tpl b/valkey/templates/_helpers.tpl index 593cf77c..739ba844 100644 --- a/valkey/templates/_helpers.tpl +++ b/valkey/templates/_helpers.tpl @@ -82,19 +82,17 @@ Returns the Valkey exporter container image The common image function that renders the container image */}} {{- define "common.image" -}} -{{- $registryName := .image.registry }} -{{- $repositoryName := .image.repository }} -{{- $tag := .image.tag }} -{{- if .global }} - {{- if .global.imageRegistry }} - {{- $registryName = .global.imageRegistry }} - {{- end }} -{{- end }} -{{- if $registryName }} -{{- printf "%s/%s:%s" $registryName $repositoryName $tag }} -{{- else }} -{{- printf "%s:%s" $repositoryName $tag }} -{{ end }} +{{- $registryName := .image.registry -}} +{{- $repositoryName := .image.repository -}} +{{- $tag := .image.tag -}} +{{- if and .global .global.imageRegistry -}} +{{- $registryName = .global.imageRegistry -}} +{{- end -}} +{{- if $registryName -}} +{{- printf "%s/%s:%s" $registryName $repositoryName $tag -}} +{{- else -}} +{{- printf "%s:%s" $repositoryName $tag -}} +{{- end -}} {{- end -}} {{/* @@ -188,3 +186,249 @@ Validate replica authentication configuration {{- end }} {{- end -}} +{{/* +Validate cluster configuration +*/}} +{{- define "valkey.validateClusterConfig" -}} +{{- if .Values.cluster.enabled }} + {{- if .Values.replica.enabled }} + {{- fail "cluster.enabled and replica.enabled are mutually exclusive. Please enable only one mode." }} + {{- end }} + {{- if lt (int .Values.cluster.shards) 3 }} + {{- fail "Cluster mode requires at least 3 shards (cluster.shards >= 3) for proper cluster operation." }} + {{- end }} + {{- if not .Values.cluster.persistence.size }} + {{- fail "Cluster mode requires persistent storage. Please set cluster.persistence.size (e.g., '5Gi')" }} + {{- end }} +{{- end }} +{{- end -}} + +{{/* +Validate cluster authentication configuration +*/}} +{{- define "valkey.validateClusterAuth" -}} +{{- if and .Values.cluster.enabled .Values.auth.enabled }} + {{- if not (hasKey .Values.auth.aclUsers .Values.cluster.replicationUser) }} + {{- fail (printf "Cluster replication user '%s' (cluster.replicationUser) must be defined in auth.aclUsers. The chart requires this to retrieve the password for cluster authentication." .Values.cluster.replicationUser) }} + {{- end }} +{{- end }} +{{- end -}} + +{{/* +Calculate total number of nodes in the cluster +*/}} +{{- define "valkey.clusterNodeCount" -}} +{{- $shards := int .Values.cluster.shards -}} +{{- $replicasPerShard := int .Values.cluster.replicasPerShard -}} +{{- mul $shards (add 1 $replicasPerShard) -}} +{{- end -}} + +{{/* +Istio pod labels. Emits the labels that tell Istio exactly how to capture +this pod's traffic, so the chart works whether or not the namespace carries +`istio-injection=enabled` or `istio.io/dataplane-mode=ambient` — and, just +as importantly, so that toggling `istio.mode` on a dual-mode cluster moves +pods between data planes cleanly. + +Sidecar mode: + sidecar.istio.io/inject: "true" — force Envoy injection even if the + namespace lacks the injection label. + istio.io/dataplane-mode: none — veto ambient capture, so a cluster + that ALSO runs ambient (e.g. during + a sidecar→ambient migration) does + not double-redirect this pod. + +Ambient mode: + istio.io/dataplane-mode: ambient — ztunnel captures this pod's traffic. + sidecar.istio.io/inject: "false" — veto Envoy injection even if the + namespace has the injection label, + so the pod isn't simultaneously + sidecar'd (which double-redirects + and silently breaks mTLS, surfacing + as "Connection reset by peer" on + every request). + +Either mode by itself is enough; emitting both (per mode) makes pod-level +intent the source of truth and eliminates the cluster-configuration +dependency that's easy to miss at install time. + +When istio.enabled is false this helper emits nothing so the user remains +free to pick their own opt-in/out via podLabels (see the istio=off +functional-tests path). +*/}} +{{- define "valkey.istioPodLabels" -}} +{{- if .Values.istio.enabled -}} +{{- if eq (.Values.istio.mode | default "sidecar") "ambient" -}} +istio.io/dataplane-mode: ambient +sidecar.istio.io/inject: "false" +{{- else -}} +sidecar.istio.io/inject: "true" +istio.io/dataplane-mode: none +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Compute the merged pod labels map: selector + common + chart-computed mesh +labels + user podLabels (user wins on collision). Emits the merged dict as +YAML so the rendered output has no duplicate keys, even when a user sets +e.g. `sidecar.istio.io/inject=false` via podLabels alongside +`istio.enabled=true`. +*/}} +{{- define "valkey.podLabels" -}} +{{- $selector := fromYaml (include "valkey.selectorLabels" .) -}} +{{- $common := .Values.commonLabels | default dict -}} +{{- $mesh := fromYaml (include "valkey.istioPodLabels" .) | default dict -}} +{{- $user := .Values.podLabels | default dict -}} +{{- toYaml (mergeOverwrite $selector $common $mesh $user) -}} +{{- end -}} + +{{/* +Job-pod labels: same merge as valkey.podLabels with one extra layer for +`cluster.initJob.podLabels` applied last (so it wins). Lets operators +veto a globally-injected metrics/observability sidecar on the cluster- +init Job — which is a short-lived, exit-on-success batch task — without +having to disable the same injector for the long-running data pods. +mergeOverwrite handles the deep-merge and the no-duplicate-keys +guarantee just like the data-pod helper. +*/}} +{{- define "valkey.initJobPodLabels" -}} +{{- $selector := fromYaml (include "valkey.selectorLabels" .) -}} +{{- $common := .Values.commonLabels | default dict -}} +{{- $mesh := fromYaml (include "valkey.istioPodLabels" .) | default dict -}} +{{- $user := .Values.podLabels | default dict -}} +{{- $jobUser := (.Values.cluster.initJob).podLabels | default dict -}} +{{- toYaml (mergeOverwrite $selector $common $mesh $user $jobUser) -}} +{{- end -}} + +{{/* +Job-pod annotations: same shape as the global .Values.podAnnotations, +with `cluster.initJob.podAnnotations` merged on top so it wins on +collision. Same opt-out rationale as valkey.initJobPodLabels — some +sidecar injectors read annotations rather than labels. + +Emits nothing when the merged map is empty so the Job's metadata block +collapses cleanly (Helm/`with` semantics expect an absent key, not an +empty mapping, to skip). +*/}} +{{- define "valkey.initJobPodAnnotations" -}} +{{- $global := .Values.podAnnotations | default dict -}} +{{- $job := (.Values.cluster.initJob).podAnnotations | default dict -}} +{{- $merged := mergeOverwrite (deepCopy $global) $job -}} +{{- if $merged -}} +{{- toYaml $merged -}} +{{- end -}} +{{- end -}} + +{{/* +Probe shell command. Returns the "sh -c" argument that pings valkey-server +locally; the set of replies that count as healthy is parameterised. + +Args (passed as a dict): + ctx — the parent context (.) so we can read .Values.tls + acceptLoading — whether to treat 'LOADING' as healthy + +Replies to PING are one of: + PONG — fully up, dataset loaded + NOAUTH … — up, requires auth (treat as proof of liveness — the + server is fully serving, we just lack credentials) + LOADING … — TCP listener is up but the dataset is being read from + RDB/AOF; the server cannot serve traffic yet + +The three probes have different jobs and therefore different LOADING +policies: + + startupProbe (acceptLoading=false): the gate that holds liveness and + readiness off until the pod is actually serving. If startupProbe + accepted LOADING it would pass the moment the TCP listener opens, + kubelet would switch to liveness/readiness immediately, and the + gate would do nothing useful. Operators with multi-GB RDBs bump + `startupProbe.failureThreshold` to extend the load window — the + canonical Kubernetes pattern for slow loaders. + + livenessProbe (acceptLoading=true): runs only AFTER startupProbe + passes. After that point, LOADING almost always means a full-resync + from primary is in progress (replica fell behind, replication + backlog overflowed, etc.). Killing the pod here loses the in-flight + download work and forces yet another full resync, perpetuating the + very condition the kill was supposed to escape. A pod stuck loading + forever is rare and harmless compared to the kill-loop, so accept + LOADING and let the load complete. + + readinessProbe (acceptLoading=false): decides whether the pod is in + the Service endpoint set. A LOADING pod can't serve traffic, so it + must be removed from the rotation until it's truly ready. This + leaves the pod 'Running 0/1' during full-resync — exactly right. +*/}} +{{- define "valkey.probeShellCommand" -}} +{{- $ctx := .ctx -}} +{{- $pingCmd := "valkey-cli ping" -}} +{{- if $ctx.Values.tls.enabled -}} +{{- $pingCmd = printf "valkey-cli --tls --cacert /tls/%s ping" $ctx.Values.tls.caPublicKey -}} +{{- end -}} +{{- $accepted := "PONG|NOAUTH" -}} +{{- if .acceptLoading -}} +{{- $accepted = "PONG|NOAUTH|LOADING" -}} +{{- end -}} +{{- printf "%s 2>&1 | grep -qE '%s'" $pingCmd $accepted -}} +{{- end -}} + +{{/* +The valkey ServiceAccount name as an Istio SPIFFE principal. +Used by the AuthorizationPolicy to pin the cluster-bus port to same-release +pods cryptographically rather than by pod-selector IP. +*/}} +{{- define "valkey.istioPrincipal" -}} +{{- $trustDomain := .Values.istio.trustDomain | default "cluster.local" -}} +{{- printf "%s/ns/%s/sa/%s" $trustDomain .Release.Namespace (include "valkey.serviceAccountName" .) -}} +{{- end -}} + +{{/* +Validate istio configuration. Runs regardless of istio.enabled so a typo in +istio.mode (e.g. `mode: ambiet` buried in a GitOps values file) surfaces at +template time instead of silently rendering the sidecar-only code paths. +*/}} +{{- define "valkey.validateIstioConfig" -}} +{{- if hasKey .Values.istio "mode" }} + {{- if not (or (eq .Values.istio.mode "sidecar") (eq .Values.istio.mode "ambient")) }} + {{- fail (printf "istio.mode must be 'sidecar' or 'ambient', got: %s" .Values.istio.mode) }} + {{- end }} +{{- end }} +{{- /* +Guard against the silent-no-protection footgun for the cluster bus port: +when istio is enabled in ambient mode AND cluster mode is on, dropping BOTH +the NetworkPolicy (skipped for ambient) AND the AuthorizationPolicy leaves +the bus port open to any pod that can route to it. The feature's whole +point is cross-release isolation; failing closed is the only safe default. +Users who genuinely want the bus port unprotected can set +`cluster.isolation.enabled=true` (NetworkPolicy path still runs in sidecar +mode, but in ambient it's dropped) and explicitly acknowledge by setting +`istio.authorizationPolicy.enabled=true`; the chart refuses to let BOTH be +false when both layers have been chosen-off. +*/}} +{{- if and .Values.istio.enabled (eq .Values.istio.mode "ambient") .Values.cluster.enabled }} + {{- if not .Values.istio.authorizationPolicy.enabled }} + {{- fail "istio.authorizationPolicy.enabled=false in ambient mode + cluster mode leaves the cluster-bus port unprotected: the NetworkPolicy is skipped for ambient (it would block HBONE), and disabling the AuthorizationPolicy removes the only remaining cross-release isolation layer. Re-enable istio.authorizationPolicy.enabled, or switch to istio.mode=sidecar if you intend to rely on the NetworkPolicy." }} + {{- end }} +{{- end }} +{{- /* +Guard against the shared-ServiceAccount footgun. The AuthorizationPolicy +uses the SPIFFE principal `/ns//sa/` to scope the bus +port to same-release pods. If two releases in the same namespace share a SA +(e.g. both use `serviceAccount.create=false` with the namespace default, or +both explicitly set the same `serviceAccount.name`), their APs encode the +SAME principal — cross-release MEET passes the identity check and the +clusters silently merge. The chart cannot detect other releases at template +time, but it can surface the risk: refuse the obviously-unsafe case +(`serviceAccount.create=false` with no explicit name, i.e. the shared +`default` SA) whenever the AP is rendered. Users who deliberately share +a named SA across releases can still do so; they just have to type it. +*/}} +{{- if and .Values.istio.enabled .Values.istio.authorizationPolicy.enabled .Values.cluster.enabled }} + {{- if and (not .Values.serviceAccount.create) (not .Values.serviceAccount.name) }} + {{- fail "istio.authorizationPolicy gives cross-release cluster-bus isolation by scoping the bus port to a SPIFFE principal built from the pod's ServiceAccount. With serviceAccount.create=false AND serviceAccount.name empty, the chart falls back to the namespace's 'default' ServiceAccount — which every other release using the same fallback ALSO maps to, so the AuthorizationPolicy cannot distinguish them and cross-release CLUSTER MEET succeeds. Either set serviceAccount.create=true (per-release SA) or serviceAccount.name=." }} + {{- end }} +{{- end }} +{{- end -}} + + diff --git a/valkey/templates/cluster-init-job.yaml b/valkey/templates/cluster-init-job.yaml new file mode 100644 index 00000000..c875ef54 --- /dev/null +++ b/valkey/templates/cluster-init-job.yaml @@ -0,0 +1,120 @@ +{{- if .Values.cluster.enabled }} +{{- include "valkey.validateAuthConfig" . }} +{{- include "valkey.validateClusterConfig" . }} +{{- include "valkey.validateClusterAuth" . }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "valkey.fullname" . }}-cluster-init + labels: + {{- include "valkey.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": post-install,post-upgrade + "helm.sh/hook-weight": "0" + "helm.sh/hook-delete-policy": before-hook-creation +spec: + backoffLimit: 6 + ttlSecondsAfterFinished: {{ .Values.cluster.initJob.ttlSecondsAfterFinished }} + template: + metadata: + labels: + {{- /* + Job-scoped label set (see valkey.initJobPodLabels helper). Same + layering as the data-pod helper, plus `cluster.initJob.podLabels` + on top so operators can opt the short-lived batch Job out of + global label-driven injectors (sidecar metrics agents, etc.) + without affecting the long-running data pods. In ambient mode + the dataplane-mode label is still emitted automatically so + ztunnel captures the Job's outbound connections; in sidecar mode + sidecar.istio.io/inject=true is emitted so the Job works on + namespaces that don't carry the injection label. + */}} + {{- include "valkey.initJobPodLabels" . | nindent 8 }} + {{- $annotations := include "valkey.initJobPodAnnotations" . }} + {{- with $annotations }} + annotations: + {{- . | nindent 8 }} + {{- end }} + spec: + {{- (include "valkey.imagePullSecrets" .) | nindent 6 }} + automountServiceAccountToken: false + serviceAccountName: {{ include "valkey.serviceAccountName" . }} + restartPolicy: OnFailure + {{- if .Values.priorityClassName }} + priorityClassName: {{ .Values.priorityClassName | quote }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: cluster-init + image: {{ include "valkey.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + command: [ "/bin/sh", "/cluster-script/init-cluster.sh" ] + env: + - name: CLUSTER_NODE_COUNT + value: {{ include "valkey.clusterNodeCount" . | quote }} + - name: CLUSTER_REPLICAS_PER_SHARD + value: {{ .Values.cluster.replicasPerShard | quote }} + {{- with .Values.initResources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: cluster-script + mountPath: /cluster-script + {{- if .Values.tls.enabled }} + - name: {{ include "valkey.fullname" . }}-tls + mountPath: /tls + {{- end }} + {{- if .Values.auth.enabled }} + {{- if .Values.auth.usersExistingSecret }} + - name: valkey-users-secret + mountPath: /valkey-users-secret + readOnly: true + {{- end }} + {{- if or (include "valkey.hasInlinePasswords" . | eq "true") .Values.auth.aclConfig }} + - name: valkey-auth-secret + mountPath: /valkey-auth-secret + readOnly: true + {{- end }} + {{- end }} + volumes: + - name: cluster-script + configMap: + name: {{ include "valkey.fullname" . }}-cluster-script + defaultMode: 0555 + {{- if .Values.tls.enabled }} + - name: {{ include "valkey.fullname" . }}-tls + secret: + secretName: {{ required "An existing secret is required to enable TLS" .Values.tls.existingSecret }} + defaultMode: 0400 + {{- end }} + {{- if .Values.auth.enabled }} + {{- if .Values.auth.usersExistingSecret }} + - name: valkey-users-secret + secret: + secretName: {{ .Values.auth.usersExistingSecret }} + defaultMode: 0400 + {{- end }} + {{- if or (include "valkey.hasInlinePasswords" . | eq "true") .Values.auth.aclConfig }} + - name: valkey-auth-secret + secret: + secretName: {{ include "valkey.fullname" . }}-auth + defaultMode: 0400 + {{- end }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/valkey/templates/cluster-isolation-netpol.yaml b/valkey/templates/cluster-isolation-netpol.yaml new file mode 100644 index 00000000..3b51f42d --- /dev/null +++ b/valkey/templates/cluster-isolation-netpol.yaml @@ -0,0 +1,83 @@ +{{- /* +Cluster-bus isolation NetworkPolicy. + +Valkey's gossip/cluster-bus protocol has no authentication of its own: a pod +that can open a TCP connection to a node's bus port (default 16379) can send +CLUSTER MEET and merge into the cluster. When two independent Valkey clusters +share a Kubernetes cluster (or even a namespace), nothing in Valkey itself +stops an accidental or malicious MEET from fusing them. + +This policy pins the bus port INBOUND to same-release traffic only, by +matching on `app.kubernetes.io/instance`. Blocking the receiving side of +the MEET handshake is sufficient: the handshake is bidirectional, so with +the receiver refusing connections, the placeholder node is evicted by the +cluster-node-timeout and the two clusters stay separate. + +Client (6379) and metrics (9121) ports stay open — they're application-level +and have their own auth (ACL/TLS). + +We deliberately do NOT set an Egress policyType. Adding Egress here would +require enumerating every destination a Valkey pod legitimately needs to +reach (kube-dns, Istio's xDS on istiod:15012, Envoy's health port, JWKS +endpoints for Istio AuthorizationPolicy, and so on); getting that wrong +breaks Istio sidecar bootstrap. Users who want egress isolation on top of +this should add an Istio AuthorizationPolicy (when they have Istio) or a +separate NetworkPolicy targeting `valkey.selectorLabels` — Kubernetes +combines those additively with this one. + +Kubernetes policies are additive: adding this one alongside the user-defined +`networkPolicy` value still allows the user's ingress/egress rules to match. + +Running on a CNI that doesn't enforce NetworkPolicy (plain Flannel, the +in-tree kubenet, etc.) makes this rendered policy a no-op. There is no +namespace-based fallback — pod-to-pod traffic crosses namespaces freely +unless something actually enforces policy at the data plane. On such a +cluster there is no way to prevent a cross-release CLUSTER MEET from the +chart alone; either switch to a policy-enforcing CNI, add an Istio +AuthorizationPolicy at layer 7, or run each Valkey cluster in its own +Kubernetes cluster. + +Ambient mesh caveat: in ambient mode, ztunnel wraps all pod-to-pod traffic +in HBONE on port 15008, then unwraps it at the destination and re-delivers +to the pod-local port. A NetworkPolicy that only allows ingress on 6379 / +16379 / 9121 drops the inbound HBONE — the client port gets blocked at +the policy layer and every connection fails with "Connection reset by +peer". The chart-owned AuthorizationPolicy already provides equivalent +(and stronger, identity-based) bus-port scoping for ambient, so we skip +this NetworkPolicy entirely when istio.mode=ambient. Users who still want +a belt-and-braces IP-level NetworkPolicy in ambient can add their own via +.Values.networkPolicy (rendered by netpolicy.yaml) and include port 15008. +*/}} +{{- if and .Values.cluster.enabled .Values.cluster.isolation.enabled (not (and .Values.istio.enabled (eq .Values.istio.mode "ambient"))) }} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "valkey.fullname" . }}-cluster-isolation + labels: + {{- include "valkey.labels" . | nindent 4 }} +spec: + podSelector: + matchLabels: + {{- include "valkey.selectorLabels" . | nindent 6 }} + policyTypes: + - Ingress + ingress: + # Bus port: only other pods of THIS release. + - from: + - podSelector: + matchLabels: + {{- include "valkey.selectorLabels" . | nindent 14 }} + ports: + - protocol: TCP + port: {{ .Values.cluster.busPort }} + # Client port: anyone. ACL + TLS guard it above the network layer. + - ports: + - protocol: TCP + port: {{ .Values.service.port }} + {{- if .Values.metrics.enabled }} + # Metrics sidecar: anyone (typically Prometheus). + - ports: + - protocol: TCP + port: {{ .Values.metrics.exporter.port }} + {{- end }} +{{- end }} diff --git a/valkey/templates/cluster-script.yaml b/valkey/templates/cluster-script.yaml new file mode 100644 index 00000000..bd9fcdd7 --- /dev/null +++ b/valkey/templates/cluster-script.yaml @@ -0,0 +1,13 @@ +{{- if .Values.cluster.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "valkey.fullname" . }}-cluster-script + labels: + {{- include "valkey.labels" . | nindent 4 }} +data: + init-cluster.sh: |- +{{ tpl (.Files.Get "scripts/cluster-init-script.sh") . | indent 4 }} + prestop.sh: |- +{{ tpl (.Files.Get "scripts/cluster-prestop-script.sh") . | indent 4 }} +{{- end }} diff --git a/valkey/templates/cluster-statefulset.yaml b/valkey/templates/cluster-statefulset.yaml new file mode 100644 index 00000000..5dad0344 --- /dev/null +++ b/valkey/templates/cluster-statefulset.yaml @@ -0,0 +1,375 @@ +{{- if .Values.cluster.enabled }} +{{- include "valkey.validateAuthConfig" . }} +{{- include "valkey.validateClusterConfig" . }} +{{- include "valkey.validateClusterAuth" . }} +{{- include "valkey.validateIstioConfig" . }} +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ include "valkey.fullname" . }} + labels: + {{- include "valkey.labels" . | nindent 4 }} + {{- with .Values.workloadAnnotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + serviceName: {{ include "valkey.fullname" . }}-headless + replicas: {{ include "valkey.clusterNodeCount" . }} + podManagementPolicy: Parallel + {{- if .Values.cluster.persistentVolumeClaimRetentionPolicy }} + persistentVolumeClaimRetentionPolicy: + {{- toYaml .Values.cluster.persistentVolumeClaimRetentionPolicy | nindent 4 }} + {{- end }} + selector: + matchLabels: + {{- include "valkey.selectorLabels" . | nindent 6 }} + volumeClaimTemplates: + - metadata: + name: valkey-data + spec: + accessModes: {{ toYaml .Values.cluster.persistence.accessModes | nindent 8 }} + {{- if .Values.cluster.persistence.storageClass }} + storageClassName: {{ .Values.cluster.persistence.storageClass | quote }} + {{- end }} + resources: + requests: + storage: {{ .Values.cluster.persistence.size | quote }} + template: + metadata: + labels: + {{- /* + Single merged label set: selector + commonLabels + chart-computed + mesh labels + user podLabels (user wins on collision). Keeps the + rendered YAML free of duplicate keys when e.g. a user sets + sidecar.istio.io/inject=false via podLabels alongside + istio.enabled=true. + */}} + {{- include "valkey.podLabels" . | nindent 8 }} + annotations: + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + checksum/initconfig: {{ include (print $.Template.BasePath "/init_config.yaml") . | sha256sum | trunc 32 | quote }} + {{- if .Values.valkeyConfig }} + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum | trunc 32 | quote }} + {{- end }} + {{- /* + The cluster-bus port (16379 by default) carries raw Valkey gossip: a + binary, long-lived, bidirectional protocol that neither Envoy nor + ztunnel can proxy sensibly. The chart keeps it unproxied in both + modes, but the mechanics differ: + + sidecar — explicit: Envoy sees the port in its iptables rules, so + we emit traffic.sidecar.istio.io/exclude*Ports to take + it out. The AuthorizationPolicy (rendered separately) + does the cross-release enforcement via Envoy-terminated + mTLS on the OTHER ports. + + ambient — implicit: ztunnel only captures traffic for pods it + recognises, and the AuthorizationPolicy's ALLOW rules + only bind the client/metrics ports. That leaves the bus + port outside ztunnel's HBONE tunnel; pod-to-pod TCP on + 16379 takes the direct CNI path. No annotation needed + (they're sidecar-only). + */}} + {{- if and .Values.istio.enabled (eq .Values.istio.mode "sidecar") }} + traffic.sidecar.istio.io/excludeInboundPorts: {{ .Values.cluster.busPort | quote }} + traffic.sidecar.istio.io/excludeOutboundPorts: {{ .Values.cluster.busPort | quote }} + {{- end }} + spec: + {{- (include "valkey.imagePullSecrets" .) | nindent 6 }} + automountServiceAccountToken: {{ .Values.serviceAccount.automount }} + serviceAccountName: {{ include "valkey.serviceAccountName" . }} + {{- if .Values.priorityClassName }} + priorityClassName: {{ .Values.priorityClassName | quote }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + initContainers: + - name: {{ include "valkey.fullname" . }}-init + image: {{ include "valkey.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + command: [ "/scripts/init.sh" ] + env: + - name: POD_INDEX + valueFrom: + fieldRef: + fieldPath: metadata.labels['apps.kubernetes.io/pod-index'] + - name: CLUSTER_SHARDS + value: {{ .Values.cluster.shards | quote }} + - name: CLUSTER_REPLICAS_PER_SHARD + value: {{ .Values.cluster.replicasPerShard | quote }} + volumeMounts: + - name: valkey-data + mountPath: /data + - name: scripts + mountPath: /scripts + {{- if .Values.valkeyConfig }} + - name: valkey-config + mountPath: /usr/local/etc/valkey/valkey.conf + subPath: valkey.conf + {{- end }} + {{- if .Values.auth.enabled }} + - name: valkey-acl + mountPath: /etc/valkey + {{- if .Values.auth.usersExistingSecret }} + - name: valkey-users-secret + mountPath: /valkey-users-secret + readOnly: true + {{- end }} + {{- if or (include "valkey.hasInlinePasswords" . | eq "true") .Values.auth.aclConfig }} + - name: valkey-auth-secret + mountPath: /valkey-auth-secret + readOnly: true + {{- end }} + {{- end }} + {{- with .Values.initResources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.extraInitContainers }} + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ include "valkey.fullname" . }} + image: {{ include "valkey.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: [ "valkey-server" ] + args: [ "/data/conf/valkey.conf" ] + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + env: + {{- range $key, $val := .Values.env }} + - name: {{ $key }} + value: "{{ $val }}" + {{- end }} + - name: VALKEY_LOGLEVEL + value: "{{ .Values.valkeyLogLevel }}" + ports: + - name: tcp + containerPort: {{ .Values.service.port }} + protocol: TCP + - name: tcp-bus + containerPort: {{ .Values.cluster.busPort }} + protocol: TCP + {{- $strictCmd := include "valkey.probeShellCommand" (dict "ctx" . "acceptLoading" false) }} + {{- $loadCmd := include "valkey.probeShellCommand" (dict "ctx" . "acceptLoading" true) }} + startupProbe: + exec: + command: [ "sh", "-c", {{ $strictCmd | quote }} ] + initialDelaySeconds: 5 + periodSeconds: {{ .Values.cluster.startupProbe.periodSeconds }} + timeoutSeconds: {{ .Values.cluster.startupProbe.timeoutSeconds }} + failureThreshold: {{ .Values.cluster.startupProbe.failureThreshold }} + livenessProbe: + exec: + command: [ "sh", "-c", {{ $loadCmd | quote }} ] + periodSeconds: {{ .Values.cluster.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.cluster.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.cluster.livenessProbe.failureThreshold }} + readinessProbe: + exec: + command: [ "sh", "-c", {{ $strictCmd | quote }} ] + periodSeconds: {{ .Values.cluster.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.cluster.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.cluster.readinessProbe.failureThreshold }} + {{- if and (gt (int .Values.cluster.replicasPerShard) 0) .Values.cluster.preStopFailover.enabled }} + lifecycle: + # Graceful CLUSTER FAILOVER on primary-pod shutdown. Gated on + # replicasPerShard>0 (no replica to hand over to otherwise — + # the hook would no-op and just eat grace-period budget). The + # script itself is best-effort and never blocks SIGTERM. + preStop: + exec: + command: [ "/bin/sh", "/cluster-script/prestop.sh" ] + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumeMounts: + - name: valkey-data + mountPath: /data + {{- if and (gt (int .Values.cluster.replicasPerShard) 0) .Values.cluster.preStopFailover.enabled }} + - name: cluster-script + mountPath: /cluster-script + {{- end }} + {{- if .Values.tls.enabled }} + - name: {{ include "valkey.fullname" . }}-tls + mountPath: /tls + {{- end }} + {{- if .Values.auth.enabled }} + - name: valkey-acl + mountPath: /etc/valkey + {{- if .Values.auth.usersExistingSecret }} + - name: valkey-users-secret + mountPath: /valkey-users-secret + readOnly: true + {{- end }} + {{- if or (include "valkey.hasInlinePasswords" . | eq "true") .Values.auth.aclConfig }} + - name: valkey-auth-secret + mountPath: /valkey-auth-secret + readOnly: true + {{- end }} + {{- end }} + {{- range $secret := .Values.extraValkeySecrets }} + - name: {{ $secret.name }}-valkey + mountPath: {{ $secret.mountPath }} + {{- end }} + {{- range $config := .Values.extraValkeyConfigs }} + - name: {{ $config.name }}-valkey + mountPath: {{ $config.mountPath }} + {{- end }} + {{- with .Values.extraVolumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if .Values.metrics.enabled }} + - name: metrics + image: {{ include "valkey.metrics.exporter.image" . }} + imagePullPolicy: {{ .Values.metrics.exporter.image.pullPolicy | quote }} + {{- with .Values.metrics.exporter.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.metrics.exporter.command }} + command: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.metrics.exporter.args }} + args: + {{- toYaml . | nindent 12 }} + {{- end }} + ports: + - name: metrics + containerPort: {{ .Values.metrics.exporter.port }} + startupProbe: + tcpSocket: + port: metrics + livenessProbe: + tcpSocket: + port: metrics + readinessProbe: + httpGet: + path: / + port: metrics + {{- with .Values.metrics.exporter.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.metrics.exporter.extraVolumeMounts }} + volumeMounts: + {{- toYaml . | nindent 12 }} + {{- end }} + env: + - name: REDIS_ALIAS + value: {{ include "valkey.fullname" . }} + {{- if .Values.auth.enabled }} + {{- $defaultUser := get (.Values.auth.aclUsers | default dict) "default" | default dict }} + {{- $hasInlineDefaultPassword := hasKey $defaultUser "password" }} + {{- if .Values.auth.usersExistingSecret }} + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Values.auth.usersExistingSecret }} + key: {{ $defaultUser.passwordKey | default "default" }} + {{- else if $hasInlineDefaultPassword }} + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: {{ include "valkey.fullname" . }}-auth + key: default-password + {{- end }} + {{- end }} + {{- range $key, $val := .Values.metrics.exporter.extraEnvs }} + - name: {{ $key }} + value: "{{ $val }}" + {{- end }} + {{- end }} + {{- with .Values.extraContainers }} + {{- toYaml . | nindent 8 }} + {{- end }} + terminationGracePeriodSeconds: {{ .Values.cluster.terminationGracePeriodSeconds }} + volumes: + - name: scripts + configMap: + name: {{ include "valkey.fullname" . }}-init-scripts + defaultMode: 0555 + {{- if and (gt (int .Values.cluster.replicasPerShard) 0) .Values.cluster.preStopFailover.enabled }} + - name: cluster-script + configMap: + name: {{ include "valkey.fullname" . }}-cluster-script + defaultMode: 0555 + {{- end }} + {{- if .Values.auth.enabled }} + - name: valkey-acl + emptyDir: + medium: Memory + {{- end }} + {{- if .Values.valkeyConfig }} + - name: valkey-config + configMap: + name: {{ include "valkey.fullname" . }}-config + {{- end }} + {{- range .Values.extraValkeySecrets }} + - name: {{ .name }}-valkey + secret: + secretName: {{ .name }} + defaultMode: {{ .defaultMode | default 0440 }} + {{- end }} + {{- if .Values.tls.enabled }} + - name: {{ include "valkey.fullname" . }}-tls + secret: + secretName: {{ required "An existing secret is required to enable TLS" .Values.tls.existingSecret }} + defaultMode: 0400 + {{- end }} + {{- range .Values.extraValkeyConfigs }} + - name: {{ .name }}-valkey + configMap: + name: {{ .name }} + defaultMode: {{ .defaultMode | default 0440 }} + {{- end }} + {{- if .Values.metrics.enabled }} + {{- range .Values.metrics.exporter.extraExporterSecrets }} + - name: {{ .name }}-exporter + secret: + secretName: {{ .name }} + defaultMode: {{ .defaultMode | default 0440 }} + {{- end }} + {{- end }} + {{- if .Values.auth.enabled }} + {{- if .Values.auth.usersExistingSecret }} + - name: valkey-users-secret + secret: + secretName: {{ .Values.auth.usersExistingSecret }} + defaultMode: 0400 + {{- end }} + {{- if or (include "valkey.hasInlinePasswords" . | eq "true") .Values.auth.aclConfig }} + - name: valkey-auth-secret + secret: + secretName: {{ include "valkey.fullname" . }}-auth + defaultMode: 0400 + {{- end }} + {{- end }} + {{- with .Values.extraVolumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.topologySpreadConstraints }} + topologySpreadConstraints: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/valkey/templates/deploy_valkey.yaml b/valkey/templates/deploy_valkey.yaml index 7bc9a5a8..64501320 100644 --- a/valkey/templates/deploy_valkey.yaml +++ b/valkey/templates/deploy_valkey.yaml @@ -1,8 +1,9 @@ -{{- if not .Values.replica.enabled }} +{{- if not (or .Values.replica.enabled .Values.cluster.enabled) }} {{- $fullname := include "valkey.fullname" . }} {{- $storage := .Values.dataStorage }} {{- $createPVC := and $storage.enabled (not (empty $storage.requestedSize)) (empty $storage.persistentVolumeClaimName) }} {{- include "valkey.validateAuthConfig" . }} +{{- include "valkey.validateIstioConfig" . }} apiVersion: apps/v1 kind: Deployment metadata: @@ -23,13 +24,14 @@ spec: template: metadata: labels: - {{- include "valkey.selectorLabels" . | nindent 8 }} - {{- with .Values.commonLabels }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.podLabels }} - {{- toYaml . | nindent 8 }} - {{- end }} + {{- /* + Single merged label set: selector + commonLabels + chart-computed + mesh labels + user podLabels (user wins on collision). Keeps the + rendered YAML free of duplicate keys when e.g. a user sets + sidecar.istio.io/inject=false via podLabels alongside + istio.enabled=true. + */}} + {{- include "valkey.podLabels" . | nindent 8 }} annotations: {{- with .Values.podAnnotations }} {{- toYaml . | nindent 8 }} @@ -69,10 +71,6 @@ spec: mountPath: /usr/local/etc/valkey/valkey.conf subPath: valkey.conf {{- end }} - {{- if .Values.extraSecretValkeyConfigs }} - - name: extravalkeyconfigs-volume - mountPath: /extravalkeyconfigs - {{- end }} {{- if .Values.auth.enabled }} - name: valkey-acl mountPath: /etc/valkey @@ -116,20 +114,26 @@ spec: - name: tcp containerPort: {{ .Values.service.port }} protocol: TCP + {{- $strictCmd := include "valkey.probeShellCommand" (dict "ctx" . "acceptLoading" false) }} + {{- $loadCmd := include "valkey.probeShellCommand" (dict "ctx" . "acceptLoading" true) }} startupProbe: exec: - {{- if .Values.tls.enabled }} - command: [ "sh", "-c", "valkey-cli --cacert /tls/{{ .Values.tls.caPublicKey }} --tls ping" ] - {{- else }} - command: [ "sh", "-c", "valkey-cli ping" ] - {{- end }} + command: [ "sh", "-c", {{ $strictCmd | quote }} ] + periodSeconds: {{ .Values.startupProbe.periodSeconds }} + timeoutSeconds: {{ .Values.startupProbe.timeoutSeconds }} + failureThreshold: {{ .Values.startupProbe.failureThreshold }} livenessProbe: exec: - {{- if .Values.tls.enabled }} - command: [ "sh", "-c", "valkey-cli --cacert /tls/{{ .Values.tls.caPublicKey }} --tls ping" ] - {{- else }} - command: [ "sh", "-c", "valkey-cli ping" ] - {{- end }} + command: [ "sh", "-c", {{ $loadCmd | quote }} ] + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + readinessProbe: + exec: + command: [ "sh", "-c", {{ $strictCmd | quote }} ] + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} resources: {{- toYaml .Values.resources | nindent 12 }} volumeMounts: @@ -198,18 +202,21 @@ spec: - name: REDIS_ALIAS value: {{ include "valkey.fullname" . }} {{- if .Values.auth.enabled }} + {{- $defaultUser := get (.Values.auth.aclUsers | default dict) "default" | default dict }} + {{- $hasInlineDefaultPassword := hasKey $defaultUser "password" }} + {{- if .Values.auth.usersExistingSecret }} - name: REDIS_PASSWORD valueFrom: secretKeyRef: - {{- if .Values.auth.usersExistingSecret }} - {{- $defaultUser := index .Values.auth.aclUsers "default" | default dict }} - {{- $passwordKey := $defaultUser.passwordKey | default "default" }} name: {{ .Values.auth.usersExistingSecret }} - key: {{ $passwordKey }} - {{- else }} + key: {{ $defaultUser.passwordKey | default "default" }} + {{- else if $hasInlineDefaultPassword }} + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: name: {{ include "valkey.fullname" . }}-auth key: default-password - {{- end }} + {{- end }} {{- end }} {{- range $key, $val := .Values.metrics.exporter.extraEnvs }} - name: {{ $key }} diff --git a/valkey/templates/init_config.yaml b/valkey/templates/init_config.yaml index 9b0337e5..4d11e327 100644 --- a/valkey/templates/init_config.yaml +++ b/valkey/templates/init_config.yaml @@ -53,7 +53,10 @@ data: fi {{- end }} - echo "$password" + # printf is byte-safe; dash's `echo` quietly interprets backslash + # escapes (\b, \t, \\, etc.), corrupting any password that contains a + # backslash before it's hashed into the ACL. + printf '%s' "$password" } {{- end }} @@ -123,8 +126,9 @@ data: # User: {{ $username }} PASSWORD=$(get_user_password "{{ $username }}" "{{ $passwordKey }}") || exit 1 - # Hash the password and write ACL entry - PASSHASH=$(echo -n "$PASSWORD" | sha256sum | cut -f 1 -d " ") + # Hash the password and write ACL entry. printf (not echo -n) is POSIX — + # echo -n is implementation-defined and quietly emits `-n\n` under some shells. + PASSHASH=$(printf '%s' "$PASSWORD" | sha256sum | cut -f 1 -d " ") echo "user {{ $username }} on #$PASSHASH {{ $user.permissions }}" >> /etc/valkey/users.acl {{- end }} @@ -190,8 +194,10 @@ data: {{- $replPasswordKey := $replUser.passwordKey | default $replUsername }} REPL_PASSWORD=$(get_user_password "{{ $replUsername }}" "{{ $replPasswordKey }}") || exit 1 - # Write masterauth configuration - echo "masterauth $REPL_PASSWORD" >>"$VALKEY_CONFIG" + # Write masterauth configuration. Quote + backslash-escape so passwords + # containing quotes/backslashes survive valkey.conf parsing. + REPL_PASSWORD_ESC=$(printf '%s' "$REPL_PASSWORD" | sed 's/\\/\\\\/g; s/"/\\"/g') + printf 'masterauth "%s"\n' "$REPL_PASSWORD_ESC" >>"$VALKEY_CONFIG" echo "masteruser {{ $replUsername }}" >>"$VALKEY_CONFIG" log "Configured masterauth with user {{ $replUsername }}" {{- end }} @@ -219,13 +225,157 @@ data: {{- end }} {{- end }} + {{- if .Values.cluster.enabled }} + # Cluster mode configuration + log "Configuring cluster mode" + + # Use POD_INDEX from Kubernetes metadata + POD_INDEX=${POD_INDEX:-0} + + # Configure cluster-enabled settings + { + echo "" + echo "# Cluster Configuration" + echo "cluster-enabled yes" + echo "cluster-config-file /data/nodes.conf" + echo "cluster-node-timeout {{ .Values.cluster.nodeTimeout }}" + {{- if not .Values.cluster.requireFullCoverage }} + echo "cluster-require-full-coverage no" + {{- end }} + {{- if .Values.cluster.allowReadsWhenDown }} + echo "cluster-allow-reads-when-down yes" + {{- end }} + echo "" + echo "# Cluster node announcement" + echo "cluster-announce-hostname {{ include "valkey.fullname" . }}-$POD_INDEX.{{ include "valkey.headlessServiceName" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}" + echo "cluster-announce-port {{ .Values.service.port }}" + echo "cluster-announce-bus-port {{ .Values.cluster.busPort }}" + echo "cluster-preferred-endpoint-type hostname" + } >>"$VALKEY_CONFIG" + + log "Cluster node $POD_INDEX configured with announce IP" + + {{- if .Values.auth.enabled }} + # Configure cluster authentication + {{- $replUsername := .Values.cluster.replicationUser }} + {{- $replUser := index .Values.auth.aclUsers $replUsername }} + {{- $replPasswordKey := $replUser.passwordKey | default $replUsername }} + REPL_PASSWORD=$(get_user_password "{{ $replUsername }}" "{{ $replPasswordKey }}") || exit 1 + REPL_PASSWORD_ESC=$(printf '%s' "$REPL_PASSWORD" | sed 's/\\/\\\\/g; s/"/\\"/g') + + { + echo "" + echo "# Cluster authentication" + printf 'masterauth "%s"\n' "$REPL_PASSWORD_ESC" + echo "masteruser {{ $replUsername }}" + } >>"$VALKEY_CONFIG" + log "Configured cluster authentication with user {{ $replUsername }}" + {{- end }} + + {{- if .Values.tls.enabled }} + # TLS for cluster + { + echo "" + echo "# TLS for cluster" + echo "tls-replication yes" + echo "tls-cluster yes" + } >>"$VALKEY_CONFIG" + log "Enabled TLS for cluster communication" + {{- end }} + + # ---------------------------------------------------------------------- + # Refresh stale IPs in /data/nodes.conf before valkey-server starts. + # + # Why: cluster bus gossip (port 16379) is dialled by raw IP, even when + # cluster-preferred-endpoint-type=hostname (the hostname is announced + # over the bus, not used to establish it). After a rolling restart pod + # IPs change; if the first pod we restart on a given node also took + # the longest to load its RDB, by the time it comes back ALL its peers + # have new IPs and its own nodes.conf has none of them. The pod is + # then a stranded minority partition and never recovers without + # operator intervention. + # + # Fix: on every cluster pod start, re-resolve each peer's announced + # FQDN (already on-disk in nodes.conf as the second comma-separated + # token of field 2) and rewrite the IP in place. Valkey reads + # nodes.conf at startup and uses those IPs as its initial gossip + # targets — fresh IPs in, fresh gossip out, no stranded pod. + # + # No-ops cleanly when: + # * nodes.conf doesn't exist (first boot — there's nothing to refresh, + # and CLUSTER MEET from cluster-init-script.sh will populate it); + # * a peer's FQDN doesn't resolve (peer is also mid-restart) — we + # leave that line as-is and let Valkey's normal retry/gossip + # reconcile it once the peer's pod IP shows up in DNS. + # ---------------------------------------------------------------------- + NODES_CONF=/data/nodes.conf + if [ -f "$NODES_CONF" ]; then + log "Refreshing IPs in $NODES_CONF against current DNS" + # Write the temp file in /data (the PVC) rather than $TMPDIR — the + # init container runs with readOnlyRootFilesystem=true, which + # leaves /tmp read-only. /data is the only RW mount we have, and + # it's the same filesystem as the destination so the final mv is + # atomic (rename(2) within one mount point). + TMP=$(mktemp /data/nodes.conf.XXXXXX) + changed=0 + kept=0 + missing=0 + # Read line-by-line. Format per line: + # ,[,k=v,...] flags ... [slots ...] + # The 'vars' line at EOF has no comma so we pass it through unchanged. + while IFS= read -r line || [ -n "$line" ]; do + case "$line" in + ''|vars\ *) + printf '%s\n' "$line" >>"$TMP" + continue + ;; + esac + # Field 2 is endpoint+metadata, field 1 is node id. + endpoint=$(printf '%s' "$line" | awk '{print $2}') + # Skip lines we can't parse (defensive — preserve verbatim). + case "$endpoint" in + *,*) ;; + *) printf '%s\n' "$line" >>"$TMP"; continue ;; + esac + # Pull out the host portion of "ip:port@busport" and the announced FQDN. + addr=${endpoint%%,*} + rest=${endpoint#*,} + fqdn=${rest%%,*} + old_ip=${addr%%:*} + port_and_bus=${addr#*:} + # Skip if the announced FQDN looks empty (older nodes.conf shapes). + if [ -z "$fqdn" ]; then + printf '%s\n' "$line" >>"$TMP"; continue + fi + new_ip=$(getent hosts "$fqdn" 2>/dev/null | awk '{print $1; exit}') + if [ -z "$new_ip" ]; then + missing=$(( missing + 1 )) + printf '%s\n' "$line" >>"$TMP" + continue + fi + if [ "$new_ip" = "$old_ip" ]; then + kept=$(( kept + 1 )) + printf '%s\n' "$line" >>"$TMP" + continue + fi + # Rewrite the endpoint token in field 2; everything else verbatim. + new_endpoint="${new_ip}:${port_and_bus},${rest}" + # Replace ONLY the first whitespace-separated token after the ID. + # Using awk to avoid sed quoting/regex hazards when fqdn contains dots. + printf '%s\n' "$line" | awk -v new="$new_endpoint" '{$2 = new; print}' >>"$TMP" + changed=$(( changed + 1 )) + done <"$NODES_CONF" + # Atomic swap so a kill mid-rewrite can't corrupt nodes.conf. + mv "$TMP" "$NODES_CONF" + log "nodes.conf refresh: ${changed} updated, ${kept} unchanged, ${missing} unresolved" + else + log "$NODES_CONF absent — first boot, nothing to refresh" + fi + {{- end }} + # Append extra configs if present if [ -f /usr/local/etc/valkey/valkey.conf ]; then log "Appending /usr/local/etc/valkey/valkey.conf" cat /usr/local/etc/valkey/valkey.conf >>"$VALKEY_CONFIG" fi - if [ -d /extravalkeyconfigs ]; then - log "Appending files in /extravalkeyconfigs/" - cat /extravalkeyconfigs/* >>"$VALKEY_CONFIG" - fi diff --git a/valkey/templates/istio-authorization-policy.yaml b/valkey/templates/istio-authorization-policy.yaml new file mode 100644 index 00000000..4b983ded --- /dev/null +++ b/valkey/templates/istio-authorization-policy.yaml @@ -0,0 +1,82 @@ +{{- /* +AuthorizationPolicy for the cluster-bus port. + +Valkey's CLUSTER MEET has no authentication of its own: a pod that can open +a TCP connection to a node's bus port can merge into the cluster. The chart +already ships a NetworkPolicy that pins the bus port to same-release pods by +IP (cluster-isolation-netpol.yaml), but that only works on a CNI that +enforces NetworkPolicy. + +An Istio AuthorizationPolicy is the belt-and-braces: it matches on SPIFFE +principal (the caller's ServiceAccount identity), not IP, so a pod that +spoofs its way onto the right IP range still fails the check. It also works +regardless of CNI — the enforcement point is the sidecar Envoy (sidecar +mode) or the node-local ztunnel (ambient mode), both of which terminate +mTLS and have the peer's identity. + +Rendered only in cluster mode — no bus port to protect otherwise. + +Both L4 (sidecar via Envoy, ambient via ztunnel) enforce ALLOW/DENY on +principal+port, so a single policy shape works for both modes. Ambient's +ztunnel does NOT enforce L7 rules (HTTP method, path, etc.) — those need a +waypoint — but we only need L4 here. +*/}} +{{- if and .Values.istio.enabled .Values.istio.authorizationPolicy.enabled .Values.cluster.enabled }} +{{- include "valkey.validateIstioConfig" . }} +apiVersion: security.istio.io/v1 +kind: AuthorizationPolicy +metadata: + name: {{ include "valkey.fullname" . }}-cluster-bus + labels: + {{- include "valkey.labels" . | nindent 4 }} + {{- with .Values.istio.authorizationPolicy.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.istio.authorizationPolicy.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + matchLabels: + {{- include "valkey.selectorLabels" . | nindent 6 }} + action: ALLOW + rules: + # Same-release pods (identified by SPIFFE principal) may reach the bus port. + - from: + - source: + principals: + - {{ include "valkey.istioPrincipal" . | quote }} + to: + - operation: + ports: + - {{ .Values.cluster.busPort | quote }} + # Client and metrics ports stay wide open at this layer — they have + # their own auth (ACL, TLS) above the mesh. A separate deny rule on the + # bus port is unnecessary: this policy is ALLOW-only, and because at + # least one AuthorizationPolicy now targets these pods, Istio applies + # default-deny to anything not matched — i.e. the bus port for + # non-same-release principals. + # + # Istio-managed ports (15020 merged-stats, 15021 Envoy readiness, 15090 + # Envoy admin) are intentionally NOT listed: + # sidecar mode — Istio auto-excludes these via iptables so they never + # hit Envoy's authz stack; the AP has no bearing on + # them. Verified on a live kind+Istio 1.29 install: + # 15021/15090 reachable from in-mesh pods without an + # explicit allow rule; 15020 is bound to pilot-agent + # outside Envoy's path. + # ambient mode — no Envoy exists, so none of these ports have analogues + # (ztunnel metrics live on the NODE, not the pod). + # Prometheus scrapes this chart via the shipped Service/PodMonitor on + # the app-level 9121 port, so this is the only port Prometheus cares + # about here. Scrapers that rely on Istio's Envoy-merged 15020 path + # hit pilot-agent directly and aren't gated by this AP. + - to: + - operation: + ports: + - {{ .Values.service.port | quote }} + {{- if .Values.metrics.enabled }} + - {{ .Values.metrics.exporter.port | quote }} + {{- end }} +{{- end }} diff --git a/valkey/templates/istio-destination-rule.yaml b/valkey/templates/istio-destination-rule.yaml new file mode 100644 index 00000000..88e7f6f2 --- /dev/null +++ b/valkey/templates/istio-destination-rule.yaml @@ -0,0 +1,50 @@ +{{- /* +DestinationRule wraps outbound connections in ISTIO_MUTUAL mTLS. This is a +sidecar-mode concept — an outbound Envoy sees the DR and upgrades the TLS. +In ambient mode the ztunnel already wraps every pod-to-pod hop in HBONE mTLS +transparently, so a DR on top would layer a second mTLS (Envoy-in-ztunnel) +— double crypto for no security gain, and it requires a waypoint proxy to +even take effect. Skip it. +*/}} +{{- if and .Values.istio.enabled (eq .Values.istio.mode "sidecar") }} +{{- include "valkey.validateIstioConfig" . }} +apiVersion: networking.istio.io/v1 +kind: DestinationRule +metadata: + name: {{ include "valkey.fullname" . }} + labels: + {{- include "valkey.labels" . | nindent 4 }} + {{- with .Values.istio.destinationRule.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.istio.destinationRule.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + host: {{ include "valkey.fullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }} + trafficPolicy: + tls: + mode: {{ .Values.istio.destinationRule.mode }} +{{- if or .Values.replica.enabled .Values.cluster.enabled }} +--- +apiVersion: networking.istio.io/v1 +kind: DestinationRule +metadata: + name: {{ include "valkey.headlessServiceName" . }} + labels: + {{- include "valkey.labels" . | nindent 4 }} + {{- with .Values.istio.destinationRule.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.istio.destinationRule.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + host: {{ include "valkey.headlessServiceName" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }} + trafficPolicy: + tls: + mode: {{ .Values.istio.destinationRule.mode }} +{{- end }} +{{- end }} diff --git a/valkey/templates/istio-peer-authentication.yaml b/valkey/templates/istio-peer-authentication.yaml new file mode 100644 index 00000000..d04670f3 --- /dev/null +++ b/valkey/templates/istio-peer-authentication.yaml @@ -0,0 +1,26 @@ +{{- /* +PeerAuthentication applies in both sidecar and ambient mode — Envoy enforces +in sidecar, ztunnel enforces in ambient. The CRD shape is the same for both. +*/}} +{{- if .Values.istio.enabled }} +{{- include "valkey.validateIstioConfig" . }} +apiVersion: security.istio.io/v1 +kind: PeerAuthentication +metadata: + name: {{ include "valkey.fullname" . }} + labels: + {{- include "valkey.labels" . | nindent 4 }} + {{- with .Values.istio.peerAuthentication.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.istio.peerAuthentication.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + matchLabels: + {{- include "valkey.selectorLabels" . | nindent 6 }} + mtls: + mode: {{ .Values.istio.peerAuthentication.mode }} +{{- end }} diff --git a/valkey/templates/netpolicy.yaml b/valkey/templates/netpolicy.yaml index f65c504d..a4272636 100644 --- a/valkey/templates/netpolicy.yaml +++ b/valkey/templates/netpolicy.yaml @@ -1,4 +1,11 @@ {{- with .Values.networkPolicy }} +{{- /* +Gate on `hasKey` rather than truthiness: an empty list still counts as +the user declaring a policy (e.g. `ingress: []` for default-deny). +Otherwise an empty array would produce a NetworkPolicy with `policyTypes: []` +which the API server accepts but does nothing useful. +*/}} +{{- if or (hasKey . "ingress") (hasKey . "egress") }} apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: @@ -16,18 +23,25 @@ spec: matchLabels: {{- include "valkey.selectorLabels" $ | nindent 6 }} policyTypes: - {{- if .ingress }} + {{- if hasKey . "ingress" }} - Ingress {{- end }} - {{- if .egress }} + {{- if hasKey . "egress" }} - Egress {{- end }} - {{- with .ingress }} + {{- if hasKey . "ingress" }} ingress: - {{- toYaml . | nindent 4 }} + {{- if .ingress }} + {{- toYaml .ingress | nindent 4 }} + {{- else }} [] + {{- end }} {{- end }} - {{- with .egress }} + {{- if hasKey . "egress" }} egress: - {{- toYaml . | nindent 4 }} + {{- if .egress }} + {{- toYaml .egress | nindent 4 }} + {{- else }} [] + {{- end }} {{- end }} {{- end }} +{{- end }} diff --git a/valkey/templates/poddisruptionbudget.yaml b/valkey/templates/poddisruptionbudget.yaml index ff123525..00430f71 100644 --- a/valkey/templates/poddisruptionbudget.yaml +++ b/valkey/templates/poddisruptionbudget.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.podDisruptionBudget.enabled .Values.replica.enabled }} +{{- if and .Values.podDisruptionBudget.enabled (or .Values.replica.enabled .Values.cluster.enabled) }} apiVersion: policy/v1 kind: PodDisruptionBudget metadata: diff --git a/valkey/templates/pvc.yaml b/valkey/templates/pvc.yaml index aa20859b..9f25edf8 100644 --- a/valkey/templates/pvc.yaml +++ b/valkey/templates/pvc.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.dataStorage.enabled (not .Values.replica.enabled) (not (empty .Values.dataStorage.requestedSize)) (empty .Values.dataStorage.persistentVolumeClaimName) }} +{{- if and .Values.dataStorage.enabled (not .Values.replica.enabled) (not .Values.cluster.enabled) (not (empty .Values.dataStorage.requestedSize)) (empty .Values.dataStorage.persistentVolumeClaimName) }} apiVersion: v1 kind: PersistentVolumeClaim metadata: diff --git a/valkey/templates/service-headless.yaml b/valkey/templates/service-headless.yaml index 733ca683..796ccd90 100644 --- a/valkey/templates/service-headless.yaml +++ b/valkey/templates/service-headless.yaml @@ -1,4 +1,4 @@ -{{- if .Values.replica.enabled }} +{{- if or .Values.replica.enabled .Values.cluster.enabled }} apiVersion: v1 kind: Service metadata: @@ -15,6 +15,12 @@ spec: port: {{ .Values.service.port }} targetPort: tcp protocol: TCP + {{- if .Values.cluster.enabled }} + - name: tcp-bus + port: {{ .Values.cluster.busPort }} + targetPort: tcp-bus + protocol: TCP + {{- end }} selector: {{- include "valkey.selectorLabels" . | nindent 4 }} {{- end }} diff --git a/valkey/templates/service-read.yaml b/valkey/templates/service-read.yaml index 49ec54e7..de84466d 100644 --- a/valkey/templates/service-read.yaml +++ b/valkey/templates/service-read.yaml @@ -18,8 +18,9 @@ spec: {{- if .Values.replica.service.loadBalancerClass }} loadBalancerClass: {{ .Values.replica.service.loadBalancerClass }} {{- end }} - {{- if .Values.replica.service.loadBalancerSourceRanges }} - loadBalancerSourceRanges: {{ .Values.replica.service.loadBalancerSourceRanges }} + {{- with .Values.replica.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- toYaml . | nindent 4 }} {{- end }} ports: - name: tcp diff --git a/valkey/templates/service.yaml b/valkey/templates/service.yaml index 1e786826..353375c2 100644 --- a/valkey/templates/service.yaml +++ b/valkey/templates/service.yaml @@ -17,8 +17,9 @@ spec: {{- if .Values.service.loadBalancerClass }} loadBalancerClass: {{ .Values.service.loadBalancerClass }} {{- end }} - {{- if .Values.service.loadBalancerSourceRanges }} - loadBalancerSourceRanges: {{ .Values.service.loadBalancerSourceRanges }} + {{- with .Values.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- toYaml . | nindent 4 }} {{- end }} ports: - port: {{ .Values.service.port }} @@ -36,3 +37,4 @@ spec: {{- if .Values.replica.enabled }} statefulset.kubernetes.io/pod-name: {{ include "valkey.fullname" . }}-0 {{- end }} + {{- /* In cluster mode, the service routes to all nodes; clients handle redirections */}} diff --git a/valkey/templates/statefulset.yaml b/valkey/templates/statefulset.yaml index 4a8d4caa..b7bd1ff6 100644 --- a/valkey/templates/statefulset.yaml +++ b/valkey/templates/statefulset.yaml @@ -2,6 +2,7 @@ {{- include "valkey.validateAuthConfig" . }} {{- include "valkey.validateReplicaPersistence" . }} {{- include "valkey.validateReplicaAuth" . }} +{{- include "valkey.validateIstioConfig" . }} apiVersion: apps/v1 kind: StatefulSet metadata: @@ -37,13 +38,14 @@ spec: template: metadata: labels: - {{- include "valkey.selectorLabels" . | nindent 8 }} - {{- with .Values.commonLabels }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.podLabels }} - {{- toYaml . | nindent 8 }} - {{- end }} + {{- /* + Single merged label set: selector + commonLabels + chart-computed + mesh labels + user podLabels (user wins on collision). Keeps the + rendered YAML free of duplicate keys when e.g. a user sets + sidecar.istio.io/inject=false via podLabels alongside + istio.enabled=true. + */}} + {{- include "valkey.podLabels" . | nindent 8 }} annotations: {{- with .Values.podAnnotations }} {{- toYaml . | nindent 8 }} @@ -85,10 +87,6 @@ spec: mountPath: /usr/local/etc/valkey/valkey.conf subPath: valkey.conf {{- end }} - {{- if .Values.extraSecretValkeyConfigs }} - - name: extravalkeyconfigs-volume - mountPath: /extravalkeyconfigs - {{- end }} {{- if .Values.auth.enabled }} - name: valkey-acl mountPath: /etc/valkey @@ -119,10 +117,6 @@ spec: securityContext: {{- toYaml .Values.securityContext | nindent 12 }} env: - - name: POD_INDEX - valueFrom: - fieldRef: - fieldPath: metadata.labels['apps.kubernetes.io/pod-index'] {{- range $key, $val := .Values.env }} - name: {{ $key }} value: "{{ $val }}" @@ -133,20 +127,26 @@ spec: - name: tcp containerPort: {{ .Values.service.port }} protocol: TCP + {{- $strictCmd := include "valkey.probeShellCommand" (dict "ctx" . "acceptLoading" false) }} + {{- $loadCmd := include "valkey.probeShellCommand" (dict "ctx" . "acceptLoading" true) }} startupProbe: exec: - {{- if .Values.tls.enabled }} - command: [ "sh", "-c", "valkey-cli --cacert /tls/{{ .Values.tls.caPublicKey }} --tls ping" ] - {{- else }} - command: [ "sh", "-c", "valkey-cli ping" ] - {{- end }} + command: [ "sh", "-c", {{ $strictCmd | quote }} ] + periodSeconds: {{ .Values.replica.startupProbe.periodSeconds }} + timeoutSeconds: {{ .Values.replica.startupProbe.timeoutSeconds }} + failureThreshold: {{ .Values.replica.startupProbe.failureThreshold }} livenessProbe: exec: - {{- if .Values.tls.enabled }} - command: [ "sh", "-c", "valkey-cli --cacert /tls/{{ .Values.tls.caPublicKey }} --tls ping" ] - {{- else }} - command: [ "sh", "-c", "valkey-cli ping" ] - {{- end }} + command: [ "sh", "-c", {{ $loadCmd | quote }} ] + periodSeconds: {{ .Values.replica.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.replica.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.replica.livenessProbe.failureThreshold }} + readinessProbe: + exec: + command: [ "sh", "-c", {{ $strictCmd | quote }} ] + periodSeconds: {{ .Values.replica.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.replica.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.replica.readinessProbe.failureThreshold }} resources: {{- toYaml .Values.resources | nindent 12 }} volumeMounts: @@ -212,18 +212,21 @@ spec: - name: REDIS_ALIAS value: {{ include "valkey.fullname" . }} {{- if .Values.auth.enabled }} + {{- $defaultUser := get (.Values.auth.aclUsers | default dict) "default" | default dict }} + {{- $hasInlineDefaultPassword := hasKey $defaultUser "password" }} + {{- if .Values.auth.usersExistingSecret }} - name: REDIS_PASSWORD valueFrom: secretKeyRef: - {{- if .Values.auth.usersExistingSecret }} - {{- $defaultUser := index .Values.auth.aclUsers "default" | default dict }} - {{- $passwordKey := $defaultUser.passwordKey | default "default" }} name: {{ .Values.auth.usersExistingSecret }} - key: {{ $passwordKey }} - {{- else }} + key: {{ $defaultUser.passwordKey | default "default" }} + {{- else if $hasInlineDefaultPassword }} + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: name: {{ include "valkey.fullname" . }}-auth key: default-password - {{- end }} + {{- end }} {{- end }} {{- range $key, $val := .Values.metrics.exporter.extraEnvs }} - name: {{ $key }} diff --git a/valkey/templates/tests/auth.yaml b/valkey/templates/tests/auth.yaml index b289bb98..833d365a 100644 --- a/valkey/templates/tests/auth.yaml +++ b/valkey/templates/tests/auth.yaml @@ -19,9 +19,13 @@ metadata: "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded spec: restartPolicy: Never + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 4 }} containers: - name: test-auth image: {{ include "valkey.image" . | quote }} + securityContext: + {{- toYaml .Values.securityContext | nindent 8 }} command: - sh - -c @@ -95,9 +99,13 @@ metadata: "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded spec: restartPolicy: Never + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 4 }} containers: - name: test-auth image: {{ include "valkey.image" . | quote }} + securityContext: + {{- toYaml .Values.securityContext | nindent 8 }} command: - sh - -c diff --git a/valkey/tests/cluster_isolation_netpol_test.yaml b/valkey/tests/cluster_isolation_netpol_test.yaml new file mode 100644 index 00000000..52aa8903 --- /dev/null +++ b/valkey/tests/cluster_isolation_netpol_test.yaml @@ -0,0 +1,167 @@ +suite: cluster isolation network policy +templates: + - templates/cluster-isolation-netpol.yaml +tests: + - it: should not render when cluster mode is disabled + set: + cluster.enabled: false + asserts: + - hasDocuments: + count: 0 + + - it: should not render in replica mode + set: + replica.enabled: true + replica.persistence.size: "5Gi" + asserts: + - hasDocuments: + count: 0 + + - it: should not render when isolation is explicitly disabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.isolation.enabled: false + asserts: + - hasDocuments: + count: 0 + + - it: should render by default in cluster mode + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - isKind: + of: NetworkPolicy + - equal: + path: metadata.name + value: RELEASE-NAME-valkey-cluster-isolation + + - it: should select only pods of this release + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - equal: + path: spec.podSelector.matchLabels["app.kubernetes.io/name"] + value: valkey + - equal: + path: spec.podSelector.matchLabels["app.kubernetes.io/instance"] + value: RELEASE-NAME + + # Egress is intentionally NOT restricted here — locking it down breaks + # Istio sidecar bootstrap (xDS to istiod) and any DNS-heavy flow. Users + # who want egress isolation should add their own NetworkPolicy on top. + - it: should restrict ingress only + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - equal: + path: spec.policyTypes + value: + - Ingress + - notExists: + path: spec.egress + + # The core guarantee: the bus port inbound is scoped to same-instance pods. + - it: bus port ingress must be scoped to same-release pods + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.busPort: 16379 + asserts: + - contains: + path: spec.ingress + content: + from: + - podSelector: + matchLabels: + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: RELEASE-NAME + ports: + - protocol: TCP + port: 16379 + + # The client port must NOT be scoped — arbitrary clients need to reach it. + # If a future change accidentally restricts it to same-release pods, every + # client outside the chart will lose access. + - it: client port ingress must not require the same-release selector + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - contains: + path: spec.ingress + content: + ports: + - protocol: TCP + port: 6379 + + - it: should include metrics port ingress only when metrics enabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + metrics.enabled: true + metrics.exporter.port: 9121 + asserts: + - contains: + path: spec.ingress + content: + ports: + - protocol: TCP + port: 9121 + + - it: should not include metrics port ingress when metrics disabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + metrics.enabled: false + asserts: + - notContains: + path: spec.ingress + content: + ports: + - protocol: TCP + port: 9121 + + # --- Istio ambient mesh interaction --- + # In ambient mode, ztunnel wraps all pod-to-pod hops in HBONE (port 15008) + # then re-delivers to the pod-local port. A NetworkPolicy that only lists + # 6379/16379/9121 drops the inbound HBONE and every connection breaks with + # "Connection reset by peer". The chart-owned AuthorizationPolicy gives + # equivalent (and cryptographically stronger) isolation at the ztunnel + # layer, so we render NO NetworkPolicy when ambient is on. Bus-port + # protection still exists — just at a different layer. + - it: should not render in ambient mode + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.enabled: true + istio.mode: ambient + asserts: + - hasDocuments: + count: 0 + + - it: should still render in sidecar mode (Envoy's iptables capture is + per-pod and leaves the chart's pod-selector-based netpol correct) + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.enabled: true + istio.mode: sidecar + asserts: + - hasDocuments: + count: 1 + - isKind: + of: NetworkPolicy + + - it: should still render when istio is disabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.enabled: false + asserts: + - hasDocuments: + count: 1 + diff --git a/valkey/tests/cluster_test.yaml b/valkey/tests/cluster_test.yaml new file mode 100644 index 00000000..07a2d720 --- /dev/null +++ b/valkey/tests/cluster_test.yaml @@ -0,0 +1,1496 @@ +suite: cluster configuration +templates: + - templates/cluster-statefulset.yaml + - templates/cluster-script.yaml + - templates/cluster-init-job.yaml + - templates/service-headless.yaml + - templates/service.yaml + - templates/init_config.yaml +tests: + # Validation tests + - it: should fail when cluster enabled but no persistence size provided + set: + cluster.enabled: true + cluster.persistence.size: "" + template: templates/cluster-statefulset.yaml + asserts: + - failedTemplate: + errorPattern: "Cluster mode requires persistent storage.*" + + - it: should fail when cluster enabled with less than 3 shards + set: + cluster.enabled: true + cluster.shards: 2 + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - failedTemplate: + errorPattern: "Cluster mode requires at least 3 shards.*" + + - it: should fail when both cluster and replica are enabled + set: + cluster.enabled: true + replica.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - failedTemplate: + errorPattern: "cluster.enabled and replica.enabled are mutually exclusive.*" + + # StatefulSet tests + - it: should create StatefulSet when cluster is enabled + set: + cluster.enabled: true + cluster.shards: 3 + cluster.replicasPerShard: 1 + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - isKind: + of: StatefulSet + - equal: + path: spec.replicas + value: 6 # 3 shards * (1 + 1 replica) = 6 nodes + + - it: should create StatefulSet with 3 shards and 0 replicas (3 nodes total) + set: + cluster.enabled: true + cluster.shards: 3 + cluster.replicasPerShard: 0 + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - isKind: + of: StatefulSet + - equal: + path: spec.replicas + value: 3 + + - it: should create StatefulSet with 5 shards and 2 replicas (15 nodes total) + set: + cluster.enabled: true + cluster.shards: 5 + cluster.replicasPerShard: 2 + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - isKind: + of: StatefulSet + - equal: + path: spec.replicas + value: 15 # 5 shards * (1 + 2 replicas) = 15 nodes + + - it: should use Parallel pod management policy for cluster mode + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - equal: + path: spec.podManagementPolicy + value: Parallel + + - it: should configure PVC with correct storage settings + set: + cluster.enabled: true + cluster.persistence.size: "10Gi" + cluster.persistence.storageClass: "fast-ssd" + template: templates/cluster-statefulset.yaml + asserts: + - equal: + path: spec.volumeClaimTemplates[0].spec.resources.requests.storage + value: "10Gi" + - equal: + path: spec.volumeClaimTemplates[0].spec.storageClassName + value: "fast-ssd" + + - it: should expose both tcp and tcp-bus ports in cluster mode + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.busPort: 16379 + template: templates/cluster-statefulset.yaml + asserts: + - contains: + path: spec.template.spec.containers[0].ports + content: + name: tcp + containerPort: 6379 + protocol: TCP + - contains: + path: spec.template.spec.containers[0].ports + content: + name: tcp-bus + containerPort: 16379 + protocol: TCP + + # StatefulSet runs valkey-server directly (no background init script) + - it: should run valkey-server directly without background init script + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - equal: + path: spec.template.spec.containers[0].command + value: [ "valkey-server" ] + - equal: + path: spec.template.spec.containers[0].args + value: [ "/data/conf/valkey.conf" ] + + # Cluster-script is consumed on the STS side exclusively by the preStop + # CLUSTER FAILOVER hook: no replicas in the shard ⇒ no failover target + # ⇒ no need for the script on the main container. + - it: should not mount cluster-script volume in StatefulSet container when no replicas + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.replicasPerShard: 0 + template: templates/cluster-statefulset.yaml + asserts: + - notContains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: cluster-script + mountPath: /cluster-script + + - it: should not define cluster-script volume in StatefulSet when no replicas + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.replicasPerShard: 0 + template: templates/cluster-statefulset.yaml + asserts: + - notContains: + path: spec.template.spec.volumes + content: + name: cluster-script + any: true + + - it: should not mount cluster-script volume when preStopFailover disabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.replicasPerShard: 1 + cluster.preStopFailover.enabled: false + template: templates/cluster-statefulset.yaml + asserts: + - notContains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: cluster-script + mountPath: /cluster-script + - notContains: + path: spec.template.spec.volumes + content: + name: cluster-script + any: true + + - it: should mount cluster-script volume in StatefulSet container when replicas>=1 (default) + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.replicasPerShard: 1 + template: templates/cluster-statefulset.yaml + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: cluster-script + mountPath: /cluster-script + - contains: + path: spec.template.spec.volumes + content: + name: cluster-script + configMap: + name: RELEASE-NAME-valkey-cluster-script + defaultMode: 365 # 0555 + + # --- preStop CLUSTER FAILOVER hook --- + - it: should render preStop CLUSTER FAILOVER hook when replicas>=1 (default) + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.replicasPerShard: 1 + template: templates/cluster-statefulset.yaml + asserts: + - equal: + path: spec.template.spec.containers[0].lifecycle.preStop.exec.command + value: [ "/bin/sh", "/cluster-script/prestop.sh" ] + + - it: should NOT render preStop hook when replicasPerShard=0 + # Nothing to hand over to — the hook would be a no-op that just eats + # grace-period budget. + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.replicasPerShard: 0 + template: templates/cluster-statefulset.yaml + asserts: + - notExists: + path: spec.template.spec.containers[0].lifecycle + + - it: should NOT render preStop hook when preStopFailover explicitly disabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.replicasPerShard: 1 + cluster.preStopFailover.enabled: false + template: templates/cluster-statefulset.yaml + asserts: + - notExists: + path: spec.template.spec.containers[0].lifecycle + + - it: should set terminationGracePeriodSeconds from cluster.terminationGracePeriodSeconds + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.terminationGracePeriodSeconds: 120 + template: templates/cluster-statefulset.yaml + asserts: + - equal: + path: spec.template.spec.terminationGracePeriodSeconds + value: 120 + + - it: should default terminationGracePeriodSeconds to 60 (enough for default preStop timeout of 40s + SIGTERM flush) + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - equal: + path: spec.template.spec.terminationGracePeriodSeconds + value: 60 + + - it: cluster-script ConfigMap should contain prestop.sh with CLUSTER FAILOVER + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-script.yaml + asserts: + - isNotNull: + path: data["prestop.sh"] + - matchRegex: + path: data["prestop.sh"] + pattern: "cluster failover" + + - it: prestop.sh should inline TLS args when tls.enabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + tls.enabled: true + tls.existingSecret: valkey-tls + template: templates/cluster-script.yaml + asserts: + - matchRegex: + path: data["prestop.sh"] + pattern: "--tls --cacert" + + - it: prestop.sh should NOT inline TLS args when tls disabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-script.yaml + asserts: + - notMatchRegex: + path: data["prestop.sh"] + pattern: "--tls --cacert" + + - it: prestop.sh should source REDISCLI_AUTH when auth.enabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + auth.aclUsers: + default: + permissions: "~* &* +@all" + password: "secretpass" + template: templates/cluster-script.yaml + asserts: + - matchRegex: + path: data["prestop.sh"] + pattern: "REDISCLI_AUTH" + + - it: prestop.sh timeout should follow cluster.preStopFailover.timeoutSeconds + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.preStopFailover.timeoutSeconds: 25 + template: templates/cluster-script.yaml + asserts: + - matchRegex: + path: data["prestop.sh"] + pattern: "TIMEOUT=25" + + # Init container tests + - it: should have init container with cluster environment variables + set: + cluster.enabled: true + cluster.shards: 4 + cluster.replicasPerShard: 2 + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - contains: + path: spec.template.spec.initContainers[0].env + content: + name: CLUSTER_SHARDS + value: "4" + - contains: + path: spec.template.spec.initContainers[0].env + content: + name: CLUSTER_REPLICAS_PER_SHARD + value: "2" + + # Service headless tests + - it: should create headless service with bus port in cluster mode + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.busPort: 16379 + template: templates/service-headless.yaml + asserts: + - isKind: + of: Service + - equal: + path: spec.clusterIP + value: None + - contains: + path: spec.ports + content: + name: tcp + port: 6379 + targetPort: tcp + protocol: TCP + - contains: + path: spec.ports + content: + name: tcp-bus + port: 16379 + targetPort: tcp-bus + protocol: TCP + + # Main service tests + - it: should not expose the bus port on the frontend service in cluster mode + # Bus port is pod-to-pod only; clients reach nodes via the headless service. + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.busPort: 16379 + template: templates/service.yaml + asserts: + - isKind: + of: Service + - notContains: + path: spec.ports + content: + name: tcp-bus + - notContains: + path: spec.ports + content: + port: 16379 + + # Cluster init script ConfigMap tests + - it: should create cluster-script ConfigMap when cluster is enabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-script.yaml + asserts: + - isKind: + of: ConfigMap + - equal: + path: metadata.name + value: RELEASE-NAME-valkey-cluster-script + + - it: cluster-script ConfigMap should contain init-cluster.sh + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-script.yaml + asserts: + - isNotNull: + path: data["init-cluster.sh"] + - matchRegex: + path: data["init-cluster.sh"] + pattern: "CLUSTER MEET" + + - it: cluster-script should contain cluster create logic + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-script.yaml + asserts: + - matchRegex: + path: data["init-cluster.sh"] + pattern: "--cluster create" + + # --- Cluster Init Job tests --- + - it: should create cluster-init Job when cluster is enabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-init-job.yaml + asserts: + - isKind: + of: Job + - equal: + path: metadata.name + value: RELEASE-NAME-valkey-cluster-init + + - it: Job should have Helm hook annotations for post-install and post-upgrade + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: metadata.annotations["helm.sh/hook"] + value: "post-install,post-upgrade" + - equal: + path: metadata.annotations["helm.sh/hook-weight"] + value: "0" + - equal: + path: metadata.annotations["helm.sh/hook-delete-policy"] + value: "before-hook-creation" + + - it: Job should have backoffLimit of 6 + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.backoffLimit + value: 6 + + - it: Job should use OnFailure restart policy + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.spec.restartPolicy + value: OnFailure + + - it: Job should run init-cluster.sh from cluster-script ConfigMap + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.spec.containers[0].command + value: [ "/bin/sh", "/cluster-script/init-cluster.sh" ] + + - it: Job should mount cluster-script volume + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-init-job.yaml + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: cluster-script + mountPath: /cluster-script + - contains: + path: spec.template.spec.volumes + content: + name: cluster-script + configMap: + name: RELEASE-NAME-valkey-cluster-script + defaultMode: 365 + + - it: Job should have CLUSTER_NODE_COUNT and CLUSTER_REPLICAS_PER_SHARD env vars + set: + cluster.enabled: true + cluster.shards: 4 + cluster.replicasPerShard: 2 + cluster.persistence.size: "5Gi" + template: templates/cluster-init-job.yaml + asserts: + - contains: + path: spec.template.spec.containers[0].env + content: + name: CLUSTER_NODE_COUNT + value: "12" + - contains: + path: spec.template.spec.containers[0].env + content: + name: CLUSTER_REPLICAS_PER_SHARD + value: "2" + + - it: Job should use same image as StatefulSet + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + image.registry: "myregistry.io" + image.repository: "valkey/valkey" + image.tag: "7.0.0" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.spec.containers[0].image + value: "myregistry.io/valkey/valkey:7.0.0" + + - it: Job should use pod security context + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + podSecurityContext: + fsGroup: 1000 + runAsUser: 1000 + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.spec.securityContext.fsGroup + value: 1000 + - equal: + path: spec.template.spec.securityContext.runAsUser + value: 1000 + + - it: Job should use container security context + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + securityContext: + allowPrivilegeEscalation: false + runAsNonRoot: true + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.spec.containers[0].securityContext.allowPrivilegeEscalation + value: false + - equal: + path: spec.template.spec.containers[0].securityContext.runAsNonRoot + value: true + + - it: Job should use initResources when set + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + initResources: + limits: + cpu: 200m + memory: 256Mi + requests: + cpu: 100m + memory: 128Mi + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.spec.containers[0].resources.limits.cpu + value: 200m + - equal: + path: spec.template.spec.containers[0].resources.requests.memory + value: 128Mi + + - it: Job should use service account + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + serviceAccount.create: true + serviceAccount.name: "my-sa" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.spec.serviceAccountName + value: "my-sa" + + - it: Job should not automount service account token + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.spec.automountServiceAccountToken + value: false + + - it: Job should include common labels + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-init-job.yaml + asserts: + - isNotNull: + path: metadata.labels["helm.sh/chart"] + - isNotNull: + path: metadata.labels["app.kubernetes.io/name"] + + - it: Job should include pod labels and annotations when set + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + podLabels: + custom-label: my-value + podAnnotations: + custom-annotation: my-annotation + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.metadata.labels["custom-label"] + value: my-value + - equal: + path: spec.template.metadata.annotations["custom-annotation"] + value: my-annotation + + # --- cluster.initJob.podLabels / .podAnnotations override surface --- + # Lets operators veto a globally-injected sidecar (metrics agent, mesh + # proxy via namespace label, policy webhook, etc.) on the short-lived + # cluster-init Job without affecting the long-running data pods. + + - it: Job should NOT have annotations key when none are set + # Important for Helm/`with` semantics: an empty mapping is not the + # same as an absent key; preserving the absent-key shape keeps the + # rendered manifest identical to its pre-feature state when no + # overrides are configured. + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-init-job.yaml + asserts: + - notExists: + path: spec.template.metadata.annotations + + - it: cluster.initJob.podLabels should land on the Job pod + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.initJob.podLabels: + my-injector/skip: "true" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.metadata.labels["my-injector/skip"] + value: "true" + + - it: cluster.initJob.podAnnotations should land on the Job pod + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.initJob.podAnnotations: + sidecar-injector.example.com/skip: "true" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.metadata.annotations["sidecar-injector.example.com/skip"] + value: "true" + + - it: cluster.initJob.podLabels should NOT leak onto the data StatefulSet + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.initJob.podLabels: + my-injector/skip: "true" + template: templates/cluster-statefulset.yaml + asserts: + - notExists: + path: spec.template.metadata.labels["my-injector/skip"] + + - it: cluster.initJob.podAnnotations should NOT leak onto the data StatefulSet + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.initJob.podAnnotations: + sidecar-injector.example.com/skip: "true" + template: templates/cluster-statefulset.yaml + asserts: + - notExists: + path: spec.template.metadata.annotations["sidecar-injector.example.com/skip"] + + - it: cluster.initJob.podLabels should win over global podLabels on key collision + # Same key, different values. The Job-scoped layer must override the + # global one so an opt-out can be expressed Job-only. + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + podLabels: + observe.example.com/inject: "true" + cluster.initJob.podLabels: + observe.example.com/inject: "false" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.metadata.labels["observe.example.com/inject"] + value: "false" + + - it: cluster.initJob.podAnnotations should win over global podAnnotations on key collision + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + podAnnotations: + sidecar.example.com/inject: "true" + cluster.initJob.podAnnotations: + sidecar.example.com/inject: "false" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.metadata.annotations["sidecar.example.com/inject"] + value: "false" + + - it: cluster.initJob.podLabels should be able to veto chart-emitted istio sidecar inject label + # Real-world repro: in sidecar mode the chart emits + # sidecar.istio.io/inject=true on every pod. Operators may want the + # Job to skip injection (e.g. their own scrape-only inspector + # mid-rollout, or to avoid the Job-hangs-on-sidecar problem). The + # override must sit at the END of the merge so it can replace a + # chart-computed mesh label, not just a user-supplied one. + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.initJob.podLabels: + sidecar.istio.io/inject: "false" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.metadata.labels["sidecar.istio.io/inject"] + value: "false" + + - it: cluster.initJob overrides do NOT veto the chart-emitted mesh label on the data StatefulSet + # Symmetric guard for the test above — proves the veto is Job-only, + # so flipping it in the Job can't accidentally take the data pods + # out of the mesh. + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.initJob.podLabels: + sidecar.istio.io/inject: "false" + template: templates/cluster-statefulset.yaml + asserts: + - equal: + path: spec.template.metadata.labels["sidecar.istio.io/inject"] + value: "true" + + - it: Job should default ttlSecondsAfterFinished to 300 (auto-cleanup window) + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.ttlSecondsAfterFinished + value: 300 + + - it: cluster.initJob.ttlSecondsAfterFinished should be configurable + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.initJob.ttlSecondsAfterFinished: 60 + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.ttlSecondsAfterFinished + value: 60 + + - it: cluster.initJob.ttlSecondsAfterFinished=0 deletes immediately on completion + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.initJob.ttlSecondsAfterFinished: 0 + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.ttlSecondsAfterFinished + value: 0 + + - it: global podLabels still flow through to the Job when initJob.podLabels does not collide + # Don't shadow the existing pre-feature behaviour: a user who has set + # only the global podLabels gets them on the Job too. + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + podLabels: + team: platform + cluster.initJob.podLabels: + my-injector/skip: "true" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.metadata.labels.team + value: platform + - equal: + path: spec.template.metadata.labels["my-injector/skip"] + value: "true" + + - it: Job should include node selector when set + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + nodeSelector: + kubernetes.io/os: linux + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.spec.nodeSelector["kubernetes.io/os"] + value: linux + + - it: Job should include tolerations when set + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + tolerations: + - key: "dedicated" + operator: "Equal" + value: "valkey" + effect: "NoSchedule" + template: templates/cluster-init-job.yaml + asserts: + - contains: + path: spec.template.spec.tolerations + content: + key: "dedicated" + operator: "Equal" + value: "valkey" + effect: "NoSchedule" + + - it: Job should include affinity when set + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-type + operator: In + values: + - cache + template: templates/cluster-init-job.yaml + asserts: + - isNotNull: + path: spec.template.spec.affinity.nodeAffinity + + - it: Job should include priority class name when set + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + priorityClassName: "high-priority" + template: templates/cluster-init-job.yaml + asserts: + - equal: + path: spec.template.spec.priorityClassName + value: "high-priority" + + # --- Job TLS tests --- + - it: Job should mount TLS volume when TLS is enabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + tls.enabled: true + tls.existingSecret: "valkey-tls-secret" + template: templates/cluster-init-job.yaml + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: RELEASE-NAME-valkey-tls + mountPath: /tls + - contains: + path: spec.template.spec.volumes + content: + name: RELEASE-NAME-valkey-tls + secret: + secretName: valkey-tls-secret + defaultMode: 256 + + - it: Job should not mount TLS volume when TLS is disabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + tls.enabled: false + template: templates/cluster-init-job.yaml + asserts: + - notContains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: RELEASE-NAME-valkey-tls + mountPath: /tls + + # --- Job Authentication tests --- + - it: Job should mount valkey-users-secret when usersExistingSecret is set + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + auth.usersExistingSecret: "my-valkey-users" + auth.aclUsers: + default: + permissions: "~* &* +@all" + template: templates/cluster-init-job.yaml + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: valkey-users-secret + mountPath: /valkey-users-secret + readOnly: true + - contains: + path: spec.template.spec.volumes + content: + name: valkey-users-secret + secret: + secretName: my-valkey-users + defaultMode: 256 + + - it: Job should mount valkey-auth-secret when inline passwords are used + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + auth.aclUsers: + default: + password: "testpass" + permissions: "~* &* +@all" + template: templates/cluster-init-job.yaml + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: valkey-auth-secret + mountPath: /valkey-auth-secret + readOnly: true + - contains: + path: spec.template.spec.volumes + content: + name: valkey-auth-secret + secret: + secretName: RELEASE-NAME-valkey-auth + defaultMode: 256 + + - it: Job should mount both auth secrets when both are configured + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + auth.usersExistingSecret: "my-valkey-users" + auth.aclUsers: + default: + permissions: "~* &* +@all" + password: "fallback" + template: templates/cluster-init-job.yaml + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: valkey-users-secret + mountPath: /valkey-users-secret + readOnly: true + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: valkey-auth-secret + mountPath: /valkey-auth-secret + readOnly: true + + - it: Job should not mount auth secrets when auth is disabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: false + template: templates/cluster-init-job.yaml + asserts: + - notContains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: valkey-users-secret + any: true + - notContains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: valkey-auth-secret + any: true + + # --- Cluster init script password retrieval tests --- + - it: cluster-script should read password from valkey-users-secret when usersExistingSecret is set + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + auth.usersExistingSecret: "my-valkey-users" + auth.aclUsers: + default: + permissions: "~* &* +@all" + template: templates/cluster-script.yaml + asserts: + - matchRegex: + path: data["init-cluster.sh"] + pattern: '/valkey-users-secret/' + - notMatchRegex: + path: data["init-cluster.sh"] + pattern: '/etc/valkey/users.acl' + + - it: cluster-script should read password from valkey-auth-secret when inline passwords are used + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + auth.aclUsers: + default: + password: "testpass" + permissions: "~* &* +@all" + template: templates/cluster-script.yaml + asserts: + - matchRegex: + path: data["init-cluster.sh"] + pattern: '/valkey-auth-secret/default-password' + - notMatchRegex: + path: data["init-cluster.sh"] + pattern: '/etc/valkey/users.acl' + + - it: cluster-script should use custom passwordKey when configured + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + auth.usersExistingSecret: "my-valkey-users" + auth.aclUsers: + default: + permissions: "~* &* +@all" + passwordKey: "default-pwd" + template: templates/cluster-script.yaml + asserts: + - matchRegex: + path: data["init-cluster.sh"] + pattern: '/valkey-users-secret/default-pwd' + + - it: cluster-script should use custom replicationUser for auth + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.replicationUser: "clusteruser" + auth.enabled: true + auth.aclUsers: + default: + password: "defaultpass" + permissions: "~* &* +@all" + clusteruser: + password: "clusterpass" + permissions: "~* &* +@all" + template: templates/cluster-script.yaml + asserts: + - matchRegex: + path: data["init-cluster.sh"] + pattern: '/valkey-auth-secret/clusteruser-password' + + - it: cluster-script should NOT parse password hash from ACL file + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + auth.aclUsers: + default: + password: "testpass" + permissions: "~* &* +@all" + template: templates/cluster-script.yaml + asserts: + # Ensure we don't try to extract the hash from the ACL file + - notMatchRegex: + path: data["init-cluster.sh"] + pattern: 'grep.*users\.acl' + + # Authentication tests (StatefulSet) + - it: should fail when cluster auth enabled but replication user not in aclUsers + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + cluster.replicationUser: "clusteruser" + auth.aclUsers: + default: + password: "test" + permissions: "~* &* +@all" + template: templates/cluster-statefulset.yaml + asserts: + - failedTemplate: + errorPattern: "Cluster replication user 'clusteruser'.*must be defined in auth.aclUsers.*" + + - it: should succeed when cluster auth is properly configured + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + cluster.replicationUser: "default" + auth.aclUsers: + default: + password: "testpass" + permissions: "~* &* +@all" + template: templates/cluster-statefulset.yaml + asserts: + - isKind: + of: StatefulSet + + # TLS tests (StatefulSet) + - it: should configure TLS volume mount in cluster mode + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + tls.enabled: true + tls.existingSecret: "valkey-tls-secret" + template: templates/cluster-statefulset.yaml + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: RELEASE-NAME-valkey-tls + mountPath: /tls + + # Init config tests (cluster mode config generation) + - it: should generate cluster config in init script + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.nodeTimeout: 20000 + template: templates/init_config.yaml + asserts: + - matchRegex: + path: data["init.sh"] + pattern: "cluster-enabled yes" + - matchRegex: + path: data["init.sh"] + pattern: "cluster-config-file /data/nodes.conf" + - matchRegex: + path: data["init.sh"] + pattern: "cluster-node-timeout 20000" + + - it: should configure cluster-require-full-coverage when disabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.requireFullCoverage: false + template: templates/init_config.yaml + asserts: + - matchRegex: + path: data["init.sh"] + pattern: "cluster-require-full-coverage no" + + - it: should configure cluster-allow-reads-when-down when enabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.allowReadsWhenDown: true + template: templates/init_config.yaml + asserts: + - matchRegex: + path: data["init.sh"] + pattern: "cluster-allow-reads-when-down yes" + + # Cluster auth secret mount tests (StatefulSet - main container still needs ACL for Valkey server) + - it: should mount valkey-users-secret to main container when auth.usersExistingSecret is set + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + auth.usersExistingSecret: "my-valkey-users" + auth.aclUsers: + default: + permissions: "~* &* +@all" + template: templates/cluster-statefulset.yaml + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: valkey-users-secret + mountPath: /valkey-users-secret + readOnly: true + + - it: should mount valkey-auth-secret to main container when inline passwords are used + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + auth.aclUsers: + default: + password: "testpass" + permissions: "~* &* +@all" + template: templates/cluster-statefulset.yaml + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: valkey-auth-secret + mountPath: /valkey-auth-secret + readOnly: true + + - it: should mount both auth secrets to main container when both are configured + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + auth.usersExistingSecret: "my-valkey-users" + auth.aclUsers: + default: + permissions: "~* &* +@all" + password: "fallback" + template: templates/cluster-statefulset.yaml + asserts: + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: valkey-users-secret + mountPath: /valkey-users-secret + readOnly: true + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: valkey-auth-secret + mountPath: /valkey-auth-secret + readOnly: true + + # Regression: probes must accept NOAUTH as proof of liveness. + - it: should use a PONG|NOAUTH-tolerant probe on the valkey container + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + auth.enabled: true + auth.aclUsers: + default: + permissions: "~* &* +@all" + password: "p" + template: templates/cluster-statefulset.yaml + asserts: + - matchRegex: + path: spec.template.spec.containers[0].startupProbe.exec.command[2] + pattern: "PONG\\|NOAUTH" + - matchRegex: + path: spec.template.spec.containers[0].livenessProbe.exec.command[2] + pattern: "PONG\\|NOAUTH" + - matchRegex: + path: spec.template.spec.containers[0].readinessProbe.exec.command[2] + pattern: "PONG\\|NOAUTH" + + - it: should define a readiness probe + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - exists: + path: spec.template.spec.containers[0].readinessProbe + + # --- Tri-state LOADING policy --- + # startupProbe: rejects LOADING — must keep the gate honest so a slow + # RDB load doesn't pass-through to liveness, which would then kill + # the still-loading container. Operators bump + # cluster.startupProbe.failureThreshold for slow loaders. + # livenessProbe: accepts LOADING — after startup passes, LOADING means + # a full-resync from primary is in progress; killing the pod loses + # the in-flight download and triggers another full resync, + # perpetuating the kill-loop. + # readinessProbe: rejects LOADING — a LOADING pod can't serve traffic + # and must drop out of the Service endpoint set until ready. + - it: cluster startupProbe must reject LOADING + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - notMatchRegex: + path: spec.template.spec.containers[0].startupProbe.exec.command[2] + pattern: "LOADING" + + - it: cluster livenessProbe must accept LOADING (full-resync tolerance) + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - matchRegex: + path: spec.template.spec.containers[0].livenessProbe.exec.command[2] + pattern: "PONG\\|NOAUTH\\|LOADING" + + - it: cluster readinessProbe must reject LOADING + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/cluster-statefulset.yaml + asserts: + - notMatchRegex: + path: spec.template.spec.containers[0].readinessProbe.exec.command[2] + pattern: "LOADING" + + # Tuning knobs flow through to the rendered probe. Operators with + # large datasets bump cluster.startupProbe.failureThreshold to extend + # the load window without affecting steady-state probes. + - it: cluster.startupProbe overrides should land on the startupProbe + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.startupProbe.periodSeconds: 10 + cluster.startupProbe.timeoutSeconds: 8 + cluster.startupProbe.failureThreshold: 240 + template: templates/cluster-statefulset.yaml + asserts: + - equal: + path: spec.template.spec.containers[0].startupProbe.periodSeconds + value: 10 + - equal: + path: spec.template.spec.containers[0].startupProbe.timeoutSeconds + value: 8 + - equal: + path: spec.template.spec.containers[0].startupProbe.failureThreshold + value: 240 + # And the overrides must be probe-scoped: liveness/readiness keep + # their defaults. + - equal: + path: spec.template.spec.containers[0].livenessProbe.periodSeconds + value: 10 + - equal: + path: spec.template.spec.containers[0].livenessProbe.failureThreshold + value: 6 + + - it: cluster.livenessProbe overrides should land on the livenessProbe only + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.livenessProbe.failureThreshold: 12 + template: templates/cluster-statefulset.yaml + asserts: + - equal: + path: spec.template.spec.containers[0].livenessProbe.failureThreshold + value: 12 + - equal: + path: spec.template.spec.containers[0].startupProbe.failureThreshold + value: 30 + - equal: + path: spec.template.spec.containers[0].readinessProbe.failureThreshold + value: 3 + + # Regression: extraContainers and extraVolumes were unwired in cluster mode. + - it: should wire extraContainers and extraVolumes through in cluster mode + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + extraContainers: + - name: sidecar + image: busybox:1.36 + extraVolumes: + - name: extra + emptyDir: {} + template: templates/cluster-statefulset.yaml + asserts: + - contains: + path: spec.template.spec.containers + content: + name: sidecar + image: busybox:1.36 + - contains: + path: spec.template.spec.volumes + content: + name: extra + emptyDir: {} + + # Regression: REDIS_PASSWORD should be wired through in cluster mode too. + - it: should wire REDIS_PASSWORD to the metrics exporter from the generated auth secret + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + metrics.enabled: true + auth.enabled: true + auth.aclUsers: + default: + permissions: "~* &* +@all" + password: "p" + template: templates/cluster-statefulset.yaml + asserts: + - contains: + path: spec.template.spec.containers[1].env + content: + name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: RELEASE-NAME-valkey-auth + key: default-password + + # --- Istio ambient mode (cluster) --- + - it: should add ambient dataplane-mode label when istio.mode=ambient + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.enabled: true + istio.mode: ambient + template: templates/cluster-statefulset.yaml + asserts: + - equal: + path: spec.template.metadata.labels["istio.io/dataplane-mode"] + value: ambient + + - it: should NOT emit traffic.sidecar.istio.io exclude annotations in ambient mode + # Ambient has no pod-local Envoy; the exclude* annotations are sidecar-only + # and meaningless to ztunnel. ztunnel leaves the bus port unproxied by + # default (nothing in the AuthorizationPolicy's ALLOW set binds it). + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.enabled: true + istio.mode: ambient + template: templates/cluster-statefulset.yaml + asserts: + - notExists: + path: spec.template.metadata.annotations["traffic.sidecar.istio.io/excludeInboundPorts"] + - notExists: + path: spec.template.metadata.annotations["traffic.sidecar.istio.io/excludeOutboundPorts"] + + - it: should emit traffic.sidecar.istio.io exclude annotations in sidecar mode + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.enabled: true + istio.mode: sidecar + cluster.busPort: 16379 + template: templates/cluster-statefulset.yaml + asserts: + - equal: + path: spec.template.metadata.annotations["traffic.sidecar.istio.io/excludeInboundPorts"] + value: "16379" + - equal: + path: spec.template.metadata.annotations["traffic.sidecar.istio.io/excludeOutboundPorts"] + value: "16379" + + - it: should NOT emit traffic.sidecar.istio.io annotations when istio is disabled + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.enabled: false + template: templates/cluster-statefulset.yaml + asserts: + - notExists: + path: spec.template.metadata.annotations["traffic.sidecar.istio.io/excludeInboundPorts"] + + - it: should emit sidecar-mode mesh labels on the cluster statefulset + # Sidecar mode is self-sufficient now: we pin dataplane-mode=none AND + # sidecar.istio.io/inject=true on the pod, so injection works whether + # or not the namespace is labelled, and ztunnel stays out of the way + # on dual-mode clusters. Regression: we used to emit neither, leaning + # on namespace labels exclusively. + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.enabled: true + istio.mode: sidecar + template: templates/cluster-statefulset.yaml + asserts: + - equal: + path: spec.template.metadata.labels["istio.io/dataplane-mode"] + value: none + - equal: + path: spec.template.metadata.labels["sidecar.istio.io/inject"] + value: "true" + + - it: should emit no mesh labels when istio is disabled (cluster) + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.enabled: false + template: templates/cluster-statefulset.yaml + asserts: + - notExists: + path: spec.template.metadata.labels["istio.io/dataplane-mode"] + - notExists: + path: spec.template.metadata.labels["sidecar.istio.io/inject"] + diff --git a/valkey/tests/deployment_test.yaml b/valkey/tests/deployment_test.yaml index 28c2653d..6368699b 100644 --- a/valkey/tests/deployment_test.yaml +++ b/valkey/tests/deployment_test.yaml @@ -3,6 +3,42 @@ templates: - templates/deploy_valkey.yaml - templates/init_config.yaml tests: + - it: should not create Deployment when replica.enabled is true + set: + replica.enabled: true + template: templates/deploy_valkey.yaml + asserts: + - hasDocuments: + count: 0 + + - it: should not create Deployment when cluster.enabled is true + set: + cluster.enabled: true + template: templates/deploy_valkey.yaml + asserts: + - hasDocuments: + count: 0 + + - it: should not create Deployment when both replica.enabled and cluster.enabled are true + set: + replica.enabled: true + cluster.enabled: true + template: templates/deploy_valkey.yaml + asserts: + - hasDocuments: + count: 0 + + - it: should create Deployment when both replica.enabled and cluster.enabled are false + set: + replica.enabled: false + cluster.enabled: false + template: templates/deploy_valkey.yaml + asserts: + - hasDocuments: + count: 1 + - isKind: + of: Deployment + - it: should not have auth volumes when auth disabled set: auth.enabled: false @@ -428,3 +464,235 @@ tests: secretKeyRef: name: my-custom-secret key: my-password-key + + # Regression: previously the exporter pointed REDIS_PASSWORD at a key + # (`default-password`) that is never created in aclConfig-only mode, so the + # container crash-looped on CreateContainerConfigError. + - it: should omit REDIS_PASSWORD when only auth.aclConfig is set + set: + auth.enabled: true + auth.aclConfig: "user default on >p ~* &* +@all" + metrics.enabled: true + template: templates/deploy_valkey.yaml + asserts: + - isKind: + of: Deployment + - notContains: + path: spec.template.spec.containers[1].env + content: + name: REDIS_PASSWORD + any: true + + # Regression: probes must accept NOAUTH as proof of liveness, otherwise every + # auth-enabled deployment's liveness probe silently passes on exit code 0 + # while not actually checking anything meaningful. + - it: should use a PONG|NOAUTH-tolerant probe on the valkey container + set: + auth.enabled: true + auth.aclUsers: + default: + permissions: "~* &* +@all" + password: "p" + template: templates/deploy_valkey.yaml + asserts: + - matchRegex: + path: spec.template.spec.containers[0].startupProbe.exec.command[2] + pattern: "PONG\\|NOAUTH" + - matchRegex: + path: spec.template.spec.containers[0].livenessProbe.exec.command[2] + pattern: "PONG\\|NOAUTH" + - matchRegex: + path: spec.template.spec.containers[0].readinessProbe.exec.command[2] + pattern: "PONG\\|NOAUTH" + + # Regression: there should be a readiness probe at all — previously missing. + - it: should define a readiness probe + template: templates/deploy_valkey.yaml + asserts: + - exists: + path: spec.template.spec.containers[0].readinessProbe + + # --- Tri-state LOADING policy (see cluster_test.yaml for rationale) --- + - it: standalone startupProbe must reject LOADING + template: templates/deploy_valkey.yaml + asserts: + - notMatchRegex: + path: spec.template.spec.containers[0].startupProbe.exec.command[2] + pattern: "LOADING" + + - it: standalone livenessProbe must accept LOADING (full-resync tolerance) + template: templates/deploy_valkey.yaml + asserts: + - matchRegex: + path: spec.template.spec.containers[0].livenessProbe.exec.command[2] + pattern: "PONG\\|NOAUTH\\|LOADING" + + - it: standalone readinessProbe must reject LOADING + template: templates/deploy_valkey.yaml + asserts: + - notMatchRegex: + path: spec.template.spec.containers[0].readinessProbe.exec.command[2] + pattern: "LOADING" + + - it: top-level startupProbe overrides should land on the startupProbe only + set: + startupProbe.failureThreshold: 240 + template: templates/deploy_valkey.yaml + asserts: + - equal: + path: spec.template.spec.containers[0].startupProbe.failureThreshold + value: 240 + - equal: + path: spec.template.spec.containers[0].livenessProbe.failureThreshold + value: 6 + - equal: + path: spec.template.spec.containers[0].readinessProbe.failureThreshold + value: 3 + + # --- Istio ambient mode --- + - it: should add ambient dataplane-mode label when istio.mode=ambient + set: + istio.enabled: true + istio.mode: ambient + template: templates/deploy_valkey.yaml + asserts: + - equal: + path: spec.template.metadata.labels["istio.io/dataplane-mode"] + value: ambient + + # In sidecar mode istio.io/dataplane-mode is set to "none" (not absent) so + # a dual-mode cluster running ambient too doesn't accidentally capture + # this pod via ztunnel on top of its Envoy sidecar. + - it: should set istio.io/dataplane-mode=none in sidecar mode + set: + istio.enabled: true + istio.mode: sidecar + template: templates/deploy_valkey.yaml + asserts: + - equal: + path: spec.template.metadata.labels["istio.io/dataplane-mode"] + value: none + + # Namespaces labelled `istio-injection=enabled` would otherwise inject an + # Envoy sidecar AND the ambient capture label would direct traffic to + # ztunnel — the pod then gets double-redirected and every connection + # fails with "Connection reset by peer". Opting the pod out of injection + # explicitly is the only reliable way to make ambient work in that setup. + - it: should set sidecar.istio.io/inject=false in ambient mode + set: + istio.enabled: true + istio.mode: ambient + template: templates/deploy_valkey.yaml + asserts: + - equal: + path: spec.template.metadata.labels["sidecar.istio.io/inject"] + value: "false" + + # Sidecar mode now forces injection on at the pod level so the chart + # doesn't silently depend on the namespace carrying + # istio-injection=enabled. Previously this was the user's problem to get + # right — half the "it's not working" issue reports in sidecar mode came + # down to "the namespace wasn't labelled". + - it: should set sidecar.istio.io/inject=true in sidecar mode + set: + istio.enabled: true + istio.mode: sidecar + template: templates/deploy_valkey.yaml + asserts: + - equal: + path: spec.template.metadata.labels["sidecar.istio.io/inject"] + value: "true" + + - it: should NOT add ambient dataplane-mode label when istio is disabled + set: + istio.enabled: false + template: templates/deploy_valkey.yaml + asserts: + - notExists: + path: spec.template.metadata.labels["istio.io/dataplane-mode"] + + # Regression: the istioPodLabels helper used to emit nothing in sidecar + # mode AND still have its caller run `nindent 8`, which injects a blank + # line into the labels map. Valid YAML, but crap to read in diffs. After + # the fix the labels map holds exactly the selector labels + the two + # mode-specific keys — no blank key, no stray whitespace. + - it: should produce exactly the sidecar label set in sidecar mode + set: + istio.enabled: true + istio.mode: sidecar + template: templates/deploy_valkey.yaml + asserts: + - equal: + path: spec.template.metadata.labels + value: + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: RELEASE-NAME + sidecar.istio.io/inject: "true" + istio.io/dataplane-mode: none + + # And with ambient, exactly the two ambient-specific keys on top of the + # selector labels — and nothing else. + - it: should produce exactly the ambient label set in ambient mode + set: + istio.enabled: true + istio.mode: ambient + template: templates/deploy_valkey.yaml + asserts: + - equal: + path: spec.template.metadata.labels + value: + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: RELEASE-NAME + istio.io/dataplane-mode: ambient + sidecar.istio.io/inject: "false" + + # And with istio disabled, no mesh labels at all — users can still opt + # into their own mesh labels via podLabels. + - it: should emit no mesh labels when istio.enabled is false + set: + istio.enabled: false + template: templates/deploy_valkey.yaml + asserts: + - equal: + path: spec.template.metadata.labels + value: + app.kubernetes.io/name: valkey + app.kubernetes.io/instance: RELEASE-NAME + + # User override: a podLabels entry that collides with a chart-computed + # mesh label (e.g. sidecar.istio.io/inject) must win cleanly, with NO + # duplicate YAML keys in the rendered output. This lets operators run + # istio.enabled=true but force a specific release out of the sidecar + # mesh (rare but legitimate — e.g. pinning a canary pod to plain TCP). + - it: user podLabels must override chart mesh labels cleanly + set: + istio.enabled: true + istio.mode: sidecar + podLabels: + sidecar.istio.io/inject: "false" + custom-label: "custom-value" + template: templates/deploy_valkey.yaml + asserts: + - equal: + path: spec.template.metadata.labels["sidecar.istio.io/inject"] + value: "false" + - equal: + path: spec.template.metadata.labels["istio.io/dataplane-mode"] + value: none + - equal: + path: spec.template.metadata.labels["custom-label"] + value: "custom-value" + + # Regression: installing istio.enabled=false with a typo'd istio.mode + # (istio.mode=ambiet) used to sail through because the validator gated + # on istio.enabled. With schema validation active it now fails at lint, + # and the template helper would catch it too if someone force-fed an + # invalid value past the schema. This ensures the helper still errors. + - it: should reject typo'd istio.mode even when istio.enabled=false (at schema layer) + set: + istio.enabled: false + istio.mode: ambiet + template: templates/deploy_valkey.yaml + asserts: + - failedTemplate: + errorMessage: "values don't meet the specifications of the schema(s) in the following chart(s):\nvalkey:\n- at '/istio/mode': value must be one of 'sidecar', 'ambient'\n" diff --git a/valkey/tests/init_config_test.yaml b/valkey/tests/init_config_test.yaml index 0b1067de..8c6a8adc 100644 --- a/valkey/tests/init_config_test.yaml +++ b/valkey/tests/init_config_test.yaml @@ -145,3 +145,95 @@ tests: - equal: path: metadata.labels["app.kubernetes.io/name"] value: valkey + + # Regression: `echo "$password"` in dash interprets \b, \t, \n, \\, etc. + # before writing them out. A password containing a backslash then gets a + # DIFFERENT sha256 than the bytes that were stored in the Secret — client + # auth with the real password fails WRONGPASS, since the stored ACL hash + # is of the mangled bytes. Must be `printf '%s'` (byte-safe). + - it: get_user_password must be byte-safe (no echo) + set: + auth.enabled: true + auth.aclUsers: + admin: + permissions: "~* &* +@all" + password: "admin-password" + asserts: + - notMatchRegex: + path: data["init.sh"] + pattern: 'echo "\$password"' + - matchRegex: + path: data["init.sh"] + pattern: "printf '%s' \"\\$password\"" + + # Regression: masterauth line in valkey.conf used to be unquoted, so a + # password with whitespace or `#` (valkey.conf comment char) would break + # the config parser. Must be double-quoted with backslash-escapes. + - it: masterauth must be written as a quoted+escaped string + set: + auth.enabled: true + replica.enabled: true + replica.persistence.size: "5Gi" + auth.aclUsers: + default: + permissions: "~* &* +@all" + password: "whatever" + asserts: + - matchRegex: + path: data["init.sh"] + pattern: "printf 'masterauth \"%s\"\\\\n' \"\\$REPL_PASSWORD_ESC\"" + - matchRegex: + path: data["init.sh"] + # The escape pass: s/\\/\\\\/g; s/"/\\"/g + pattern: "REPL_PASSWORD_ESC=\\$\\(printf '%s' \"\\$REPL_PASSWORD\"" + + # --- nodes.conf IP refresh on cluster pod restart --- + # Cluster bus gossip dials by IP, even with cluster-preferred-endpoint-type= + # hostname. After a rolling restart pod IPs change, and a pod whose + # nodes.conf has only stale IPs becomes a stranded minority partition. + # The refresh block re-resolves each peer's announced FQDN and rewrites + # the IP before valkey-server starts. + - it: cluster mode should emit a nodes.conf IP refresh block + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - matchRegex: + path: data["init.sh"] + pattern: "NODES_CONF=/data/nodes\\.conf" + - matchRegex: + path: data["init.sh"] + pattern: "getent hosts \"\\$fqdn\"" + # Atomic swap so a kill mid-rewrite can't corrupt the file. + - matchRegex: + path: data["init.sh"] + pattern: 'mv "\$TMP" "\$NODES_CONF"' + # No-op when nodes.conf doesn't exist (first boot). + - matchRegex: + path: data["init.sh"] + pattern: "first boot, nothing to refresh" + # 'vars' line at EOF must be passed through verbatim — it's + # currentEpoch / lastVoteEpoch state and corrupting it would force + # a fresh cluster join. The case-pattern matches an empty line + # OR a line starting with 'vars '. + - matchRegex: + path: data["init.sh"] + pattern: "''\\|vars" + + - it: non-cluster mode should NOT emit the nodes.conf refresh block + # Standalone and replicated modes have no nodes.conf — emitting the + # block in their init.sh would just be dead code and could log + # confusing "first boot" messages on every restart. + asserts: + - notMatchRegex: + path: data["init.sh"] + pattern: "NODES_CONF=/data/nodes\\.conf" + + - it: replicated (non-cluster) mode should NOT emit the nodes.conf refresh block + set: + replica.enabled: true + replica.persistence.size: "5Gi" + asserts: + - notMatchRegex: + path: data["init.sh"] + pattern: "NODES_CONF=/data/nodes\\.conf" diff --git a/valkey/tests/istio_authorization_policy_test.yaml b/valkey/tests/istio_authorization_policy_test.yaml new file mode 100644 index 00000000..b47406a9 --- /dev/null +++ b/valkey/tests/istio_authorization_policy_test.yaml @@ -0,0 +1,427 @@ +suite: istio authorization policy (cluster-bus isolation) +templates: + - templates/istio-authorization-policy.yaml +tests: + # --- Feature flag tests --- + - it: should not render when istio is disabled + set: + istio.enabled: false + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - hasDocuments: + count: 0 + + - it: should not render when cluster mode is disabled + # No bus port to protect in standalone/replica mode. + set: + istio.enabled: true + asserts: + - hasDocuments: + count: 0 + + - it: should not render in replica (non-cluster) mode + set: + istio.enabled: true + replica.enabled: true + replica.persistence.size: "5Gi" + asserts: + - hasDocuments: + count: 0 + + - it: should not render when authorizationPolicy is explicitly disabled + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.authorizationPolicy.enabled: false + asserts: + - hasDocuments: + count: 0 + + - it: should render in cluster mode with istio enabled (sidecar default) + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - hasDocuments: + count: 1 + - isKind: + of: AuthorizationPolicy + - isAPIVersion: + of: security.istio.io/v1 + + - it: should render in cluster mode with istio enabled (ambient) + # Ambient ztunnel enforces the same L4 AuthorizationPolicy shape. + set: + istio.enabled: true + istio.mode: ambient + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - hasDocuments: + count: 1 + - isKind: + of: AuthorizationPolicy + + # --- Resource identity --- + - it: should have correct name + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - equal: + path: metadata.name + value: RELEASE-NAME-valkey-cluster-bus + + - it: should use fullnameOverride in name + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + fullnameOverride: "my-valkey" + asserts: + - equal: + path: metadata.name + value: my-valkey-cluster-bus + + - it: should include chart labels + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - isNotNull: + path: metadata.labels["helm.sh/chart"] + - isNotNull: + path: metadata.labels["app.kubernetes.io/name"] + - isNotNull: + path: metadata.labels["app.kubernetes.io/managed-by"] + + - it: should include commonLabels + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + commonLabels: + env: production + asserts: + - equal: + path: metadata.labels.env + value: production + + - it: should include custom labels on the policy + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.authorizationPolicy.labels: + security.example.com/reviewed: "true" + asserts: + - equal: + path: metadata.labels["security.example.com/reviewed"] + value: "true" + + - it: should include custom annotations on the policy + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.authorizationPolicy.annotations: + security.example.com/reviewed: "yes" + asserts: + - equal: + path: metadata.annotations["security.example.com/reviewed"] + value: "yes" + + - it: should not have annotations when none are set + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - notExists: + path: metadata.annotations + + # --- Selector and action --- + - it: should target Valkey pods via selector labels + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - equal: + path: spec.selector.matchLabels["app.kubernetes.io/name"] + value: valkey + - equal: + path: spec.selector.matchLabels["app.kubernetes.io/instance"] + value: RELEASE-NAME + + - it: should be an ALLOW policy + # An ALLOW AuthorizationPolicy attached to these pods triggers Istio's + # implicit default-deny: anything not matched by a rule is blocked. + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - equal: + path: spec.action + value: ALLOW + + # --- Bus-port principal rule --- + - it: bus-port rule must be scoped to the release's SPIFFE principal + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.busPort: 16379 + asserts: + - contains: + path: spec.rules + content: + from: + - source: + principals: + - "cluster.local/ns/NAMESPACE/sa/RELEASE-NAME-valkey" + to: + - operation: + ports: + - "16379" + + - it: bus-port rule should follow custom busPort + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + cluster.busPort: 26379 + asserts: + - contains: + path: spec.rules + content: + from: + - source: + principals: + - "cluster.local/ns/NAMESPACE/sa/RELEASE-NAME-valkey" + to: + - operation: + ports: + - "26379" + + - it: bus-port rule should follow custom serviceAccount name + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + serviceAccount.name: "my-sa" + asserts: + - contains: + path: spec.rules + content: + from: + - source: + principals: + - "cluster.local/ns/NAMESPACE/sa/my-sa" + to: + - operation: + ports: + - "16379" + + # --- Client / metrics open rules --- + - it: client port should be open (no principal restriction) + # ACL/TLS live above the mesh; locking the client port to the release's + # own principal would lock out every legitimate caller. + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - contains: + path: spec.rules + content: + to: + - operation: + ports: + - "6379" + + - it: metrics port should be in the open rule when metrics enabled + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + metrics.enabled: true + metrics.exporter.port: 9121 + asserts: + - contains: + path: spec.rules + content: + to: + - operation: + ports: + - "6379" + - "9121" + + - it: metrics port should NOT appear when metrics disabled + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + metrics.enabled: false + asserts: + - notContains: + path: spec.rules + content: + to: + - operation: + ports: + - "6379" + - "9121" + + # Regression: the "open" rule (second element in spec.rules) must have + # NO `from` clause. If somebody ever adds a principal filter there the + # metrics port becomes same-release only, which silently kills every + # Prometheus scrape from a different namespace (the shipped + # ServiceMonitor path). Keep this rule unconditional. + - it: the open port rule must have no principal restriction + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + metrics.enabled: true + asserts: + - notExists: + path: spec.rules[1].from + + # --- Invalid mode --- + - it: should reject invalid istio.mode at the schema layer + set: + istio.enabled: true + istio.mode: "not-a-real-mode" + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - failedTemplate: + errorMessage: "values don't meet the specifications of the schema(s) in the following chart(s):\nvalkey:\n- at '/istio/mode': value must be one of 'sidecar', 'ambient'\n" + + # --- Custom trust domain --- + # A cluster federated via multi-cluster mesh (or any install that + # overrides istiod's default) publishes identities under a non-default + # trust domain. The AP principal string must follow — otherwise same- + # release callers ALSO fail the ALLOW match and the bus rule is a + # self-denial. + - it: principal should honour istio.trustDomain override + set: + istio.enabled: true + istio.mode: ambient + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.trustDomain: "my.mesh.example.com" + asserts: + - contains: + path: spec.rules + content: + from: + - source: + principals: + - "my.mesh.example.com/ns/NAMESPACE/sa/RELEASE-NAME-valkey" + to: + - operation: + ports: + - "16379" + + - it: principal should default to cluster.local when trustDomain unset + set: + istio.enabled: true + istio.mode: ambient + cluster.enabled: true + cluster.persistence.size: "5Gi" + asserts: + - contains: + path: spec.rules + content: + from: + - source: + principals: + - "cluster.local/ns/NAMESPACE/sa/RELEASE-NAME-valkey" + to: + - operation: + ports: + - "16379" + + # --- Cross-release isolation footguns --- + # These two validators exist to make the feature's SECURITY GUARANTEE + # hold: ambient cross-release isolation relies on the SPIFFE principal + # being UNIQUE per release AND on SOMETHING enforcing it. Without the + # guards you can silently ship a chart install with zero bus-port + # protection. + + - it: should refuse ambient+cluster when shared 'default' SA would be used + # serviceAccount.create=false AND no explicit name collapses every + # release's AP principal to sa/default; cross-release MEET passes the + # identity check and the clusters silently merge. Live-repro'd in + # review — this MUST fail template at install time. + set: + istio.enabled: true + istio.mode: ambient + cluster.enabled: true + cluster.persistence.size: "5Gi" + serviceAccount.create: false + serviceAccount.name: "" + asserts: + - failedTemplate: + errorPattern: "serviceAccount.create=false AND serviceAccount.name empty.*" + + - it: should accept ambient+cluster with explicit (distinct) serviceAccount.name + # Opt-in for the advanced multi-release-shared-SA case — we can't tell + # whether the user picked a DIFFERENT name from a hypothetical other + # release, but at least the name is intentional. + set: + istio.enabled: true + istio.mode: ambient + cluster.enabled: true + cluster.persistence.size: "5Gi" + serviceAccount.create: false + serviceAccount.name: "my-valkey-sa" + asserts: + - hasDocuments: + count: 1 + - contains: + path: spec.rules + content: + from: + - source: + principals: + - "cluster.local/ns/NAMESPACE/sa/my-valkey-sa" + to: + - operation: + ports: + - "16379" + + # Note: the "refuse ambient+cluster when AuthorizationPolicy AND + # NetworkPolicy are both off" assertion lives in istio_test.yaml — that + # suite renders the PeerAuthentication template, which is what carries + # the validator (the AP template correctly renders NOTHING when the AP + # is disabled, so it's the wrong place to prove the guard fires). + + - it: should allow authorizationPolicy.enabled=false in sidecar mode (NetworkPolicy still guards) + set: + istio.enabled: true + istio.mode: sidecar + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.authorizationPolicy.enabled: false + asserts: + - hasDocuments: + count: 0 + + - it: should allow authorizationPolicy.enabled=false when istio is off + set: + istio.enabled: false + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.authorizationPolicy.enabled: false + asserts: + - hasDocuments: + count: 0 diff --git a/valkey/tests/istio_test.yaml b/valkey/tests/istio_test.yaml new file mode 100644 index 00000000..8c821bd2 --- /dev/null +++ b/valkey/tests/istio_test.yaml @@ -0,0 +1,640 @@ +suite: istio service mesh integration +templates: + - templates/istio-peer-authentication.yaml + - templates/istio-destination-rule.yaml +tests: + # --- Feature flag tests --- + - it: should not create PeerAuthentication when istio is disabled + set: + istio.enabled: false + template: templates/istio-peer-authentication.yaml + asserts: + - hasDocuments: + count: 0 + + - it: should not create DestinationRule when istio is disabled + set: + istio.enabled: false + template: templates/istio-destination-rule.yaml + asserts: + - hasDocuments: + count: 0 + + - it: should create PeerAuthentication when istio is enabled + set: + istio.enabled: true + template: templates/istio-peer-authentication.yaml + asserts: + - hasDocuments: + count: 1 + - isKind: + of: PeerAuthentication + - isAPIVersion: + of: security.istio.io/v1 + + - it: should create DestinationRule when istio is enabled + set: + istio.enabled: true + template: templates/istio-destination-rule.yaml + asserts: + - hasDocuments: + count: 1 + - isKind: + of: DestinationRule + - isAPIVersion: + of: networking.istio.io/v1 + + # --- PeerAuthentication tests --- + - it: PeerAuthentication should target Valkey pods via selector labels + set: + istio.enabled: true + template: templates/istio-peer-authentication.yaml + asserts: + - equal: + path: spec.selector.matchLabels["app.kubernetes.io/name"] + value: valkey + - equal: + path: spec.selector.matchLabels["app.kubernetes.io/instance"] + value: RELEASE-NAME + + - it: PeerAuthentication should default to STRICT mTLS mode + set: + istio.enabled: true + template: templates/istio-peer-authentication.yaml + asserts: + - equal: + path: spec.mtls.mode + value: STRICT + + - it: PeerAuthentication should allow overriding mTLS mode to PERMISSIVE + set: + istio.enabled: true + istio.peerAuthentication.mode: PERMISSIVE + template: templates/istio-peer-authentication.yaml + asserts: + - equal: + path: spec.mtls.mode + value: PERMISSIVE + + - it: PeerAuthentication should allow overriding mTLS mode to DISABLE + set: + istio.enabled: true + istio.peerAuthentication.mode: DISABLE + template: templates/istio-peer-authentication.yaml + asserts: + - equal: + path: spec.mtls.mode + value: DISABLE + + - it: PeerAuthentication should allow overriding mTLS mode to UNSET + set: + istio.enabled: true + istio.peerAuthentication.mode: UNSET + template: templates/istio-peer-authentication.yaml + asserts: + - equal: + path: spec.mtls.mode + value: UNSET + + - it: PeerAuthentication should have correct name + set: + istio.enabled: true + template: templates/istio-peer-authentication.yaml + asserts: + - equal: + path: metadata.name + value: RELEASE-NAME-valkey + + - it: PeerAuthentication should include chart labels + set: + istio.enabled: true + template: templates/istio-peer-authentication.yaml + asserts: + - isNotNull: + path: metadata.labels["helm.sh/chart"] + - isNotNull: + path: metadata.labels["app.kubernetes.io/name"] + - isNotNull: + path: metadata.labels["app.kubernetes.io/managed-by"] + + - it: PeerAuthentication should include custom labels + set: + istio.enabled: true + istio.peerAuthentication.labels: + security.example.com/policy: strict + team: platform + template: templates/istio-peer-authentication.yaml + asserts: + - equal: + path: metadata.labels["security.example.com/policy"] + value: strict + - equal: + path: metadata.labels["team"] + value: platform + + - it: PeerAuthentication should include custom annotations + set: + istio.enabled: true + istio.peerAuthentication.annotations: + security.example.com/reviewed: "true" + template: templates/istio-peer-authentication.yaml + asserts: + - equal: + path: metadata.annotations["security.example.com/reviewed"] + value: "true" + + - it: PeerAuthentication should not have annotations when none are set + set: + istio.enabled: true + template: templates/istio-peer-authentication.yaml + asserts: + - notExists: + path: metadata.annotations + + - it: PeerAuthentication should include common labels when set + set: + istio.enabled: true + commonLabels: + env: production + template: templates/istio-peer-authentication.yaml + asserts: + - equal: + path: metadata.labels.env + value: production + + # --- DestinationRule tests (main service) --- + - it: DestinationRule should target the main service host in standalone mode + set: + istio.enabled: true + template: templates/istio-destination-rule.yaml + asserts: + - equal: + path: spec.host + value: RELEASE-NAME-valkey.NAMESPACE.svc.cluster.local + + - it: DestinationRule should default to ISTIO_MUTUAL TLS mode + set: + istio.enabled: true + template: templates/istio-destination-rule.yaml + asserts: + - equal: + path: spec.trafficPolicy.tls.mode + value: ISTIO_MUTUAL + + - it: DestinationRule should allow overriding TLS mode + set: + istio.enabled: true + istio.destinationRule.mode: MUTUAL + template: templates/istio-destination-rule.yaml + asserts: + - equal: + path: spec.trafficPolicy.tls.mode + value: MUTUAL + + - it: DestinationRule should allow SIMPLE TLS mode + set: + istio.enabled: true + istio.destinationRule.mode: SIMPLE + template: templates/istio-destination-rule.yaml + asserts: + - equal: + path: spec.trafficPolicy.tls.mode + value: SIMPLE + + - it: DestinationRule should allow DISABLE TLS mode + set: + istio.enabled: true + istio.destinationRule.mode: DISABLE + template: templates/istio-destination-rule.yaml + asserts: + - equal: + path: spec.trafficPolicy.tls.mode + value: DISABLE + + - it: DestinationRule should have correct name + set: + istio.enabled: true + template: templates/istio-destination-rule.yaml + asserts: + - equal: + path: metadata.name + value: RELEASE-NAME-valkey + + - it: DestinationRule should include chart labels + set: + istio.enabled: true + template: templates/istio-destination-rule.yaml + asserts: + - isNotNull: + path: metadata.labels["helm.sh/chart"] + - isNotNull: + path: metadata.labels["app.kubernetes.io/name"] + + - it: DestinationRule should include custom labels + set: + istio.enabled: true + istio.destinationRule.labels: + networking.example.com/managed: "true" + template: templates/istio-destination-rule.yaml + asserts: + - equal: + path: metadata.labels["networking.example.com/managed"] + value: "true" + + - it: DestinationRule should include custom annotations + set: + istio.enabled: true + istio.destinationRule.annotations: + networking.example.com/reviewed: "true" + template: templates/istio-destination-rule.yaml + asserts: + - equal: + path: metadata.annotations["networking.example.com/reviewed"] + value: "true" + + - it: DestinationRule should not have annotations when none are set + set: + istio.enabled: true + template: templates/istio-destination-rule.yaml + asserts: + - notExists: + path: metadata.annotations + + - it: DestinationRule should use custom cluster domain + set: + istio.enabled: true + clusterDomain: my.custom.domain + template: templates/istio-destination-rule.yaml + asserts: + - equal: + path: spec.host + value: RELEASE-NAME-valkey.NAMESPACE.svc.my.custom.domain + + # --- DestinationRule headless service tests (cluster mode) --- + - it: should create headless DestinationRule when cluster mode is enabled + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/istio-destination-rule.yaml + asserts: + - hasDocuments: + count: 2 + + - it: headless DestinationRule should target the headless service host + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/istio-destination-rule.yaml + documentIndex: 1 + asserts: + - isKind: + of: DestinationRule + - equal: + path: metadata.name + value: RELEASE-NAME-valkey-headless + - equal: + path: spec.host + value: RELEASE-NAME-valkey-headless.NAMESPACE.svc.cluster.local + + - it: headless DestinationRule should use same TLS mode as main + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/istio-destination-rule.yaml + documentIndex: 1 + asserts: + - equal: + path: spec.trafficPolicy.tls.mode + value: ISTIO_MUTUAL + + - it: headless DestinationRule should respect overridden TLS mode + set: + istio.enabled: true + istio.destinationRule.mode: MUTUAL + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/istio-destination-rule.yaml + documentIndex: 1 + asserts: + - equal: + path: spec.trafficPolicy.tls.mode + value: MUTUAL + + - it: headless DestinationRule should include custom labels + set: + istio.enabled: true + istio.destinationRule.labels: + networking.example.com/managed: "true" + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/istio-destination-rule.yaml + documentIndex: 1 + asserts: + - equal: + path: metadata.labels["networking.example.com/managed"] + value: "true" + + - it: headless DestinationRule should include custom annotations + set: + istio.enabled: true + istio.destinationRule.annotations: + networking.example.com/reviewed: "true" + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/istio-destination-rule.yaml + documentIndex: 1 + asserts: + - equal: + path: metadata.annotations["networking.example.com/reviewed"] + value: "true" + + - it: headless DestinationRule should include chart labels + set: + istio.enabled: true + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/istio-destination-rule.yaml + documentIndex: 1 + asserts: + - isNotNull: + path: metadata.labels["helm.sh/chart"] + - isNotNull: + path: metadata.labels["app.kubernetes.io/name"] + + # --- DestinationRule headless service tests (replica mode) --- + - it: should create headless DestinationRule when replica mode is enabled + set: + istio.enabled: true + replica.enabled: true + replica.persistence.size: "5Gi" + template: templates/istio-destination-rule.yaml + asserts: + - hasDocuments: + count: 2 + + - it: headless DestinationRule should target headless service in replica mode + set: + istio.enabled: true + replica.enabled: true + replica.persistence.size: "5Gi" + template: templates/istio-destination-rule.yaml + documentIndex: 1 + asserts: + - isKind: + of: DestinationRule + - equal: + path: metadata.name + value: RELEASE-NAME-valkey-headless + - equal: + path: spec.host + value: RELEASE-NAME-valkey-headless.NAMESPACE.svc.cluster.local + + # --- Standalone mode tests --- + - it: should only create main DestinationRule in standalone mode (no headless) + set: + istio.enabled: true + cluster.enabled: false + replica.enabled: false + template: templates/istio-destination-rule.yaml + asserts: + - hasDocuments: + count: 1 + - equal: + path: metadata.name + value: RELEASE-NAME-valkey + + # --- Name override tests --- + - it: should use fullnameOverride in PeerAuthentication + set: + istio.enabled: true + fullnameOverride: "my-valkey" + template: templates/istio-peer-authentication.yaml + asserts: + - equal: + path: metadata.name + value: my-valkey + + - it: should use fullnameOverride in DestinationRule + set: + istio.enabled: true + fullnameOverride: "my-valkey" + template: templates/istio-destination-rule.yaml + asserts: + - equal: + path: metadata.name + value: my-valkey + - equal: + path: spec.host + value: my-valkey.NAMESPACE.svc.cluster.local + + - it: should use fullnameOverride in headless DestinationRule + set: + istio.enabled: true + fullnameOverride: "my-valkey" + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/istio-destination-rule.yaml + documentIndex: 1 + asserts: + - equal: + path: metadata.name + value: my-valkey-headless + - equal: + path: spec.host + value: my-valkey-headless.NAMESPACE.svc.cluster.local + + # --- Mode validation --- + # The schema catches typos in istio.mode at install time (before any + # template renders). This keeps errors fast and mode-neutral, unlike the + # old regime where only templates that happened to render in the chosen + # mode would fail — a bogus mode with istio.enabled=false would silently + # sail through. + - it: should reject invalid istio.mode at the schema layer + set: + istio.enabled: true + istio.mode: waypoint # typo — not a real mode + template: templates/istio-peer-authentication.yaml + asserts: + - failedTemplate: + errorMessage: "values don't meet the specifications of the schema(s) in the following chart(s):\nvalkey:\n- at '/istio/mode': value must be one of 'sidecar', 'ambient'\n" + + - it: should accept istio.mode=sidecar + set: + istio.enabled: true + istio.mode: sidecar + template: templates/istio-peer-authentication.yaml + asserts: + - hasDocuments: + count: 1 + + - it: should accept istio.mode=ambient + set: + istio.enabled: true + istio.mode: ambient + template: templates/istio-peer-authentication.yaml + asserts: + - hasDocuments: + count: 1 + + # --- Ambient mode: DestinationRule must NOT render --- + # ztunnel already wraps pod-to-pod hops in HBONE mTLS; a DR on top would + # layer Envoy mTLS inside ztunnel mTLS (double crypto) and requires a + # waypoint proxy to even take effect. Keep it off. + - it: should not render DestinationRule in ambient mode (standalone) + set: + istio.enabled: true + istio.mode: ambient + template: templates/istio-destination-rule.yaml + asserts: + - hasDocuments: + count: 0 + + - it: should not render DestinationRule in ambient mode (cluster) + set: + istio.enabled: true + istio.mode: ambient + cluster.enabled: true + cluster.persistence.size: "5Gi" + template: templates/istio-destination-rule.yaml + asserts: + - hasDocuments: + count: 0 + + - it: should not render DestinationRule in ambient mode (replica) + set: + istio.enabled: true + istio.mode: ambient + replica.enabled: true + replica.persistence.size: "5Gi" + template: templates/istio-destination-rule.yaml + asserts: + - hasDocuments: + count: 0 + + - it: should render DestinationRule in sidecar mode (default) + set: + istio.enabled: true + template: templates/istio-destination-rule.yaml + asserts: + - hasDocuments: + count: 1 + + # --- Ambient mode: PeerAuthentication still applies --- + # Enforced by ztunnel instead of Envoy, but the CRD shape is identical. + - it: should render PeerAuthentication in ambient mode + set: + istio.enabled: true + istio.mode: ambient + template: templates/istio-peer-authentication.yaml + asserts: + - hasDocuments: + count: 1 + - equal: + path: spec.mtls.mode + value: STRICT + + - it: PeerAuthentication should be identical across modes + set: + istio.enabled: true + istio.mode: ambient + istio.peerAuthentication.mode: PERMISSIVE + template: templates/istio-peer-authentication.yaml + asserts: + - equal: + path: spec.mtls.mode + value: PERMISSIVE + + # --- Cross-release isolation guards --- + # These assert the chart refuses to silently ship an unprotected cluster + # bus. The validator is called from the PeerAuthentication template so it + # fires regardless of whether the AuthorizationPolicy itself renders. + + - it: should refuse ambient+cluster when AuthorizationPolicy is off + # Ambient mode skips the bus-port NetworkPolicy (it would drop HBONE); + # disabling the AP on top leaves the port completely unprotected across + # releases. The chart must fail closed. + set: + istio.enabled: true + istio.mode: ambient + cluster.enabled: true + cluster.persistence.size: "5Gi" + istio.authorizationPolicy.enabled: false + template: templates/istio-peer-authentication.yaml + asserts: + - failedTemplate: + errorPattern: "istio.authorizationPolicy.enabled=false in ambient mode.*cluster-bus port unprotected.*" + + - it: should refuse ambient+cluster when serviceAccount collapses to default + # serviceAccount.create=false AND serviceAccount.name="" produces the + # shared `default` SA. Two such releases in the same namespace generate + # identical AP principals; cross-release MEET succeeds. Repro'd live. + set: + istio.enabled: true + istio.mode: ambient + cluster.enabled: true + cluster.persistence.size: "5Gi" + serviceAccount.create: false + serviceAccount.name: "" + template: templates/istio-peer-authentication.yaml + asserts: + - failedTemplate: + errorPattern: "serviceAccount.create=false AND serviceAccount.name empty.*" + + - it: should accept ambient+cluster with serviceAccount.create=true (default) + # Per-release SA (default): distinct SPIFFE principal per release, + # AP correctly isolates. + set: + istio.enabled: true + istio.mode: ambient + cluster.enabled: true + cluster.persistence.size: "5Gi" + serviceAccount.create: true + template: templates/istio-peer-authentication.yaml + asserts: + - hasDocuments: + count: 1 + + - it: should accept ambient+cluster with explicit serviceAccount.name override + # User takes responsibility for distinct naming. + set: + istio.enabled: true + istio.mode: ambient + cluster.enabled: true + cluster.persistence.size: "5Gi" + serviceAccount.create: false + serviceAccount.name: "my-valkey-sa" + template: templates/istio-peer-authentication.yaml + asserts: + - hasDocuments: + count: 1 + + # --- Custom trust domain --- + - it: PeerAuthentication unaffected by custom trustDomain + # PA doesn't reference principals, so trustDomain is a no-op here. Just + # prove nothing breaks when the value is set. + set: + istio.enabled: true + istio.mode: ambient + istio.trustDomain: "my.mesh.example.com" + template: templates/istio-peer-authentication.yaml + asserts: + - hasDocuments: + count: 1 + - equal: + path: spec.mtls.mode + value: STRICT + + # --- Typo defence --- + # Schema check fires even when istio.enabled=false so typos surface at + # GitOps-commit time, not after someone flips the toggle in production. + - it: should reject typo'd istio.mode even with istio.enabled=false + set: + istio.enabled: false + istio.mode: ambiet + template: templates/istio-peer-authentication.yaml + asserts: + - failedTemplate: + errorMessage: "values don't meet the specifications of the schema(s) in the following chart(s):\nvalkey:\n- at '/istio/mode': value must be one of 'sidecar', 'ambient'\n" diff --git a/valkey/tests/netpolicy_test.yaml b/valkey/tests/netpolicy_test.yaml new file mode 100644 index 00000000..43081f23 --- /dev/null +++ b/valkey/tests/netpolicy_test.yaml @@ -0,0 +1,49 @@ +suite: network policy configuration +templates: + - templates/netpolicy.yaml +tests: + - it: should render nothing when networkPolicy is empty + asserts: + - hasDocuments: + count: 0 + + # Default-deny ingress is expressed as an empty list; the chart must keep + # that distinct from "never declared" — otherwise the opt-in is lost. + - it: should render a default-deny ingress policy when ingress is an empty list + set: + networkPolicy: + ingress: [] + asserts: + - isKind: + of: NetworkPolicy + - equal: + path: spec.policyTypes + value: + - Ingress + - equal: + path: spec.ingress + value: [] + - notExists: + path: spec.egress + + - it: should render both ingress and egress when both declared + set: + networkPolicy: + ingress: + - from: + - podSelector: {} + egress: + - to: + - podSelector: {} + asserts: + - isKind: + of: NetworkPolicy + - equal: + path: spec.policyTypes + value: + - Ingress + - Egress + - isNotEmpty: + path: spec.ingress + - isNotEmpty: + path: spec.egress diff --git a/valkey/tests/poddisruptionbudget_test.yaml b/valkey/tests/poddisruptionbudget_test.yaml index dd7c6079..6999ea22 100644 --- a/valkey/tests/poddisruptionbudget_test.yaml +++ b/valkey/tests/poddisruptionbudget_test.yaml @@ -11,14 +11,31 @@ tests: - hasDocuments: count: 0 - - it: should not create PDB when replica is disabled + - it: should not create PDB when neither replica nor cluster is enabled set: replica.enabled: false + cluster.enabled: false podDisruptionBudget.enabled: true asserts: - hasDocuments: count: 0 + # Regression: PDB used to be gated on replica mode only; cluster mode was + # silently unprotected. + - it: should create PDB when enabled with cluster mode + set: + cluster.enabled: true + cluster.persistence.size: "5Gi" + podDisruptionBudget.enabled: true + asserts: + - hasDocuments: + count: 1 + - isKind: + of: PodDisruptionBudget + - equal: + path: spec.maxUnavailable + value: 1 + - it: should create PDB when enabled with replica set: replica.enabled: true diff --git a/valkey/tests/pvc_test.yaml b/valkey/tests/pvc_test.yaml new file mode 100644 index 00000000..003939f8 --- /dev/null +++ b/valkey/tests/pvc_test.yaml @@ -0,0 +1,165 @@ +suite: pvc configuration +templates: + - templates/pvc.yaml +tests: + - it: should not create PVC when replica.enabled is true + set: + dataStorage.enabled: true + dataStorage.requestedSize: "8Gi" + replica.enabled: true + asserts: + - hasDocuments: + count: 0 + + - it: should not create PVC when cluster.enabled is true + set: + dataStorage.enabled: true + dataStorage.requestedSize: "8Gi" + cluster.enabled: true + asserts: + - hasDocuments: + count: 0 + + - it: should not create PVC when both replica.enabled and cluster.enabled are true + set: + dataStorage.enabled: true + dataStorage.requestedSize: "8Gi" + replica.enabled: true + cluster.enabled: true + asserts: + - hasDocuments: + count: 0 + + - it: should create PVC when both replica.enabled and cluster.enabled are false and conditions are met + set: + replica.enabled: false + cluster.enabled: false + dataStorage.enabled: true + dataStorage.requestedSize: "8Gi" + dataStorage.persistentVolumeClaimName: "" + asserts: + - hasDocuments: + count: 1 + - isKind: + of: PersistentVolumeClaim + + - it: should not create PVC when dataStorage.enabled is false + set: + dataStorage.enabled: false + replica.enabled: false + cluster.enabled: false + asserts: + - hasDocuments: + count: 0 + + - it: should not create PVC when dataStorage.requestedSize is empty + set: + dataStorage.enabled: true + dataStorage.requestedSize: "" + replica.enabled: false + cluster.enabled: false + asserts: + - hasDocuments: + count: 0 + + - it: should not create PVC when dataStorage.persistentVolumeClaimName is set + set: + dataStorage.enabled: true + dataStorage.requestedSize: "8Gi" + dataStorage.persistentVolumeClaimName: "existing-pvc" + replica.enabled: false + cluster.enabled: false + asserts: + - hasDocuments: + count: 0 + + - it: should have correct storage size + set: + dataStorage.enabled: true + dataStorage.requestedSize: "16Gi" + replica.enabled: false + cluster.enabled: false + asserts: + - isKind: + of: PersistentVolumeClaim + - equal: + path: spec.resources.requests.storage + value: "16Gi" + + - it: should have keepPvc annotation when enabled + set: + dataStorage.enabled: true + dataStorage.requestedSize: "8Gi" + dataStorage.keepPvc: true + replica.enabled: false + cluster.enabled: false + asserts: + - isKind: + of: PersistentVolumeClaim + - equal: + path: metadata.annotations["helm.sh/resource-policy"] + value: keep + + - it: should have custom storage class when specified + set: + dataStorage.enabled: true + dataStorage.requestedSize: "8Gi" + dataStorage.className: "fast-ssd" + replica.enabled: false + cluster.enabled: false + asserts: + - isKind: + of: PersistentVolumeClaim + - equal: + path: spec.storageClassName + value: fast-ssd + + - it: should have custom labels when specified + set: + dataStorage.enabled: true + dataStorage.requestedSize: "8Gi" + dataStorage.labels: + custom.label: "value" + another.label: "test" + replica.enabled: false + cluster.enabled: false + asserts: + - isKind: + of: PersistentVolumeClaim + - equal: + path: metadata.labels["custom.label"] + value: value + - equal: + path: metadata.labels["another.label"] + value: test + + - it: should have custom annotations when specified + set: + dataStorage.enabled: true + dataStorage.requestedSize: "8Gi" + dataStorage.annotations: + custom.annotation: "value" + replica.enabled: false + cluster.enabled: false + asserts: + - isKind: + of: PersistentVolumeClaim + - equal: + path: metadata.annotations["custom.annotation"] + value: value + + - it: should have correct access modes + set: + dataStorage.enabled: true + dataStorage.requestedSize: "8Gi" + dataStorage.accessModes: + - ReadWriteOnce + replica.enabled: false + cluster.enabled: false + asserts: + - isKind: + of: PersistentVolumeClaim + - equal: + path: spec.accessModes + value: + - ReadWriteOnce diff --git a/valkey/tests/service_test.yaml b/valkey/tests/service_test.yaml index 115c137a..364fa083 100644 --- a/valkey/tests/service_test.yaml +++ b/valkey/tests/service_test.yaml @@ -86,3 +86,67 @@ tests: content: app.kubernetes.io/instance: RELEASE-NAME app.kubernetes.io/name: valkey + - it: should pin to pod-0 when replica.enabled is true + set: + replica.enabled: true + template: templates/service.yaml + asserts: + - isKind: + of: Service + - equal: + path: spec.selector["statefulset.kubernetes.io/pod-name"] + value: RELEASE-NAME-valkey-0 + - it: should not pin to pod-0 when cluster.enabled is true + set: + cluster.enabled: true + template: templates/service.yaml + asserts: + - isKind: + of: Service + - notExists: + path: spec.selector["statefulset.kubernetes.io/pod-name"] + - it: should not pin to pod-0 when both replica.enabled and cluster.enabled are false + set: + replica.enabled: false + cluster.enabled: false + template: templates/service.yaml + asserts: + - isKind: + of: Service + - notExists: + path: spec.selector["statefulset.kubernetes.io/pod-name"] + - it: should never expose the cluster bus port on the frontend service + # The bus port carries gossip + failover traffic between nodes; it's + # reached via the headless service, not the round-robin frontend. + set: + cluster.enabled: true + cluster.busPort: 16379 + template: templates/service.yaml + asserts: + - isKind: + of: Service + - notContains: + path: spec.ports + content: + name: tcp-bus + - notContains: + path: spec.ports + content: + port: 16379 + + # Regression: loadBalancerSourceRanges used to render via Go's default + # slice-to-string pipeline ([a b]), which the API server rejects as invalid + # CIDR. The fix is to emit a proper YAML list. + - it: should render loadBalancerSourceRanges as a YAML list + set: + service.type: LoadBalancer + service.loadBalancerSourceRanges: + - "1.2.3.4/32" + - "5.6.7.8/32" + template: templates/service.yaml + asserts: + - equal: + path: spec.loadBalancerSourceRanges + value: + - "1.2.3.4/32" + - "5.6.7.8/32" diff --git a/valkey/tests/statefulset_test.yaml b/valkey/tests/statefulset_test.yaml index 6deb88ab..05efddde 100644 --- a/valkey/tests/statefulset_test.yaml +++ b/valkey/tests/statefulset_test.yaml @@ -371,3 +371,113 @@ tests: secretKeyRef: name: my-custom-secret key: my-password-key + + # Regression: probes must accept NOAUTH as proof of liveness. + - it: should use a PONG|NOAUTH-tolerant probe on the valkey container + set: + replica.enabled: true + replica.persistence.size: "5Gi" + auth.enabled: true + auth.aclUsers: + default: + permissions: "~* &* +@all" + password: "p" + template: templates/statefulset.yaml + asserts: + - matchRegex: + path: spec.template.spec.containers[0].startupProbe.exec.command[2] + pattern: "PONG\\|NOAUTH" + - matchRegex: + path: spec.template.spec.containers[0].livenessProbe.exec.command[2] + pattern: "PONG\\|NOAUTH" + - matchRegex: + path: spec.template.spec.containers[0].readinessProbe.exec.command[2] + pattern: "PONG\\|NOAUTH" + + - it: should define a readiness probe + set: + replica.enabled: true + replica.persistence.size: "5Gi" + template: templates/statefulset.yaml + asserts: + - exists: + path: spec.template.spec.containers[0].readinessProbe + + # --- Tri-state LOADING policy (see cluster_test.yaml for rationale) --- + - it: replicated startupProbe must reject LOADING + set: + replica.enabled: true + replica.persistence.size: "5Gi" + template: templates/statefulset.yaml + asserts: + - notMatchRegex: + path: spec.template.spec.containers[0].startupProbe.exec.command[2] + pattern: "LOADING" + + - it: replicated livenessProbe must accept LOADING (full-resync tolerance) + set: + replica.enabled: true + replica.persistence.size: "5Gi" + template: templates/statefulset.yaml + asserts: + - matchRegex: + path: spec.template.spec.containers[0].livenessProbe.exec.command[2] + pattern: "PONG\\|NOAUTH\\|LOADING" + + - it: replicated readinessProbe must reject LOADING + set: + replica.enabled: true + replica.persistence.size: "5Gi" + template: templates/statefulset.yaml + asserts: + - notMatchRegex: + path: spec.template.spec.containers[0].readinessProbe.exec.command[2] + pattern: "LOADING" + + - it: replica.startupProbe overrides should land on the startupProbe only + set: + replica.enabled: true + replica.persistence.size: "5Gi" + replica.startupProbe.failureThreshold: 240 + template: templates/statefulset.yaml + asserts: + - equal: + path: spec.template.spec.containers[0].startupProbe.failureThreshold + value: 240 + - equal: + path: spec.template.spec.containers[0].livenessProbe.failureThreshold + value: 6 + - equal: + path: spec.template.spec.containers[0].readinessProbe.failureThreshold + value: 3 + + # --- Istio ambient mode --- + - it: should add ambient dataplane-mode label when istio.mode=ambient + set: + replica.enabled: true + replica.persistence.size: "5Gi" + istio.enabled: true + istio.mode: ambient + template: templates/statefulset.yaml + asserts: + - equal: + path: spec.template.metadata.labels["istio.io/dataplane-mode"] + value: ambient + + # Sidecar mode emits istio.io/dataplane-mode=none (veto ambient capture) + # and sidecar.istio.io/inject=true (force injection), so the chart is + # self-sufficient on clusters that run both data planes side-by-side. + - it: should set sidecar-mode mesh labels in sidecar mode + set: + replica.enabled: true + replica.persistence.size: "5Gi" + istio.enabled: true + istio.mode: sidecar + template: templates/statefulset.yaml + asserts: + - equal: + path: spec.template.metadata.labels["istio.io/dataplane-mode"] + value: none + - equal: + path: spec.template.metadata.labels["sidecar.istio.io/inject"] + value: "true" diff --git a/valkey/values.schema.json b/valkey/values.schema.json index 5db0f4ac..a02c2884 100644 --- a/valkey/values.schema.json +++ b/valkey/values.schema.json @@ -1,5 +1,24 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", + "$defs": { + "probeTuning": { + "type": "object", + "properties": { + "periodSeconds": { + "type": "integer", + "minimum": 1 + }, + "timeoutSeconds": { + "type": "integer", + "minimum": 1 + }, + "failureThreshold": { + "type": "integer", + "minimum": 1 + } + } + } + }, "type": "object", "properties": { "affinity": { @@ -22,6 +41,103 @@ } } }, + "cluster": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "shards": { + "type": "integer" + }, + "replicasPerShard": { + "type": "integer" + }, + "replicationUser": { + "type": "string" + }, + "nodeTimeout": { + "type": "integer" + }, + "requireFullCoverage": { + "type": "boolean" + }, + "allowReadsWhenDown": { + "type": "boolean" + }, + "persistence": { + "type": "object", + "properties": { + "size": { + "type": "string" + }, + "storageClass": { + "type": "string" + }, + "accessModes": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "busPort": { + "type": "integer" + }, + "isolation": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + } + } + }, + "startupProbe": { + "$ref": "#/$defs/probeTuning" + }, + "livenessProbe": { + "$ref": "#/$defs/probeTuning" + }, + "readinessProbe": { + "$ref": "#/$defs/probeTuning" + }, + "preStopFailover": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "timeoutSeconds": { + "type": "integer", + "minimum": 1 + } + } + }, + "terminationGracePeriodSeconds": { + "type": "integer", + "minimum": 1 + }, + "initJob": { + "type": "object", + "properties": { + "podLabels": { + "type": "object" + }, + "podAnnotations": { + "type": "object" + }, + "ttlSecondsAfterFinished": { + "type": "integer", + "minimum": 0 + } + } + }, + "persistentVolumeClaimRetentionPolicy": { + "type": "object" + } + } + }, "clusterDomain": { "type": "string" }, @@ -82,9 +198,6 @@ "extraInitContainers": { "type": "array" }, - "extraSecretValkeyConfigs": { - "type": "boolean" - }, "extraValkeyConfigs": { "type": "array" }, @@ -134,6 +247,78 @@ "initResources": { "type": "object" }, + "istio": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "mode": { + "type": "string", + "enum": [ + "sidecar", + "ambient" + ] + }, + "trustDomain": { + "type": "string" + }, + "peerAuthentication": { + "type": "object", + "properties": { + "mode": { + "type": "string", + "enum": [ + "STRICT", + "PERMISSIVE", + "DISABLE", + "UNSET" + ] + }, + "labels": { + "type": "object" + }, + "annotations": { + "type": "object" + } + } + }, + "destinationRule": { + "type": "object", + "properties": { + "mode": { + "type": "string", + "enum": [ + "DISABLE", + "SIMPLE", + "MUTUAL", + "ISTIO_MUTUAL" + ] + }, + "labels": { + "type": "object" + }, + "annotations": { + "type": "object" + } + } + }, + "authorizationPolicy": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "labels": { + "type": "object" + }, + "annotations": { + "type": "object" + } + } + } + } + }, "metrics": { "type": "object", "properties": { @@ -361,6 +546,15 @@ "priorityClassName": { "type": "string" }, + "startupProbe": { + "$ref": "#/$defs/probeTuning" + }, + "livenessProbe": { + "$ref": "#/$defs/probeTuning" + }, + "readinessProbe": { + "$ref": "#/$defs/probeTuning" + }, "replica": { "type": "object", "properties": { @@ -396,6 +590,15 @@ "persistentVolumeClaimRetentionPolicy": { "type": "object" }, + "startupProbe": { + "$ref": "#/$defs/probeTuning" + }, + "livenessProbe": { + "$ref": "#/$defs/probeTuning" + }, + "readinessProbe": { + "$ref": "#/$defs/probeTuning" + }, "replicas": { "type": "integer" }, diff --git a/valkey/values.yaml b/valkey/values.yaml index 20e00b62..7517705e 100644 --- a/valkey/values.yaml +++ b/valkey/values.yaml @@ -98,6 +98,41 @@ resources: {} # cpu: 100m # memory: 128Mi +# Probe tuning. The probe command is `valkey-cli ping` (with TLS args +# when tls.enabled). Only the timing knobs are configurable. +# +# The three probes have different LOADING policies: +# +# startupProbe — does NOT accept LOADING, so it actually gates +# liveness/readiness during initial RDB load. On a slow loader, bump +# failureThreshold (or periodSeconds) so failureThreshold * +# periodSeconds comfortably exceeds your worst-case RDB load time. +# Default 30 * 5s = 150s suits small dev datasets only; a 44 GB +# primary needs roughly 240 * 5s = 20 minutes. +# +# livenessProbe — DOES accept LOADING. After startup passes, LOADING +# almost always means a full-resync from primary is in progress +# (replica fell behind, replication backlog overflowed, etc.). +# Killing the pod here loses the in-flight download and forces yet +# another full resync, perpetuating the kill-loop. A pod stuck +# loading forever is rare and harmless compared to the kill-loop. +# +# readinessProbe — does NOT accept LOADING. A LOADING pod can't serve +# traffic, so it must leave the Service endpoint set until it can. +# The pod sits 'Running 0/1' during full-resync — exactly right. +startupProbe: + periodSeconds: 5 + timeoutSeconds: 5 + failureThreshold: 30 +livenessProbe: + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 +readinessProbe: + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 + # Resource limits/requests for init containers initResources: {} # Example: @@ -156,9 +191,6 @@ extraValkeySecrets: [] # Mount additional configMaps into the Valkey container extraValkeyConfigs: [] -# Mount extra secrets as volume to init container (deprecated, use extraValkeySecrets) -extraSecretValkeyConfigs: false - # Mount additional emptyDir or hostPath volumes (advanced use) extraVolumes: [] # - name: hostpath-volume @@ -264,6 +296,172 @@ replica: # More info: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#persistentvolumeclaim-retention persistentVolumeClaimRetentionPolicy: {} + # Probe tuning for replicated (non-cluster) StatefulSet pods. Same + # rationale as the top-level startupProbe block — bump failureThreshold + # for large RDBs. + startupProbe: + periodSeconds: 5 + timeoutSeconds: 5 + failureThreshold: 30 + livenessProbe: + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 + readinessProbe: + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 + +# Cluster mode configuration for Valkey Cluster (sharded deployment) +# Note: cluster.enabled and replica.enabled are mutually exclusive +cluster: + # Enable cluster mode (creates a sharded Valkey cluster) + enabled: false + + # Number of shards (primary nodes). Minimum recommended is 3 for cluster mode. + # Each shard handles a portion of the hash slot range (16384 slots total). + shards: 3 + + # Number of replicas per shard (for high availability within each shard) + # Total nodes = shards × (1 + replicasPerShard) + # For example: 3 shards with 1 replica each = 6 nodes total + replicasPerShard: 1 + + # Username for cluster replication authentication, ignored if auth.enabled is false. + # IMPORTANT: When auth.enabled is true, this user MUST be defined in auth.aclUsers. + # The user must have appropriate replication permissions: +psync +replconf +ping + replicationUser: "default" + + # Cluster node timeout in milliseconds (how long before a node is considered failed) + nodeTimeout: 15000 + + # Require all hash slots to be covered for the cluster to accept writes + # Set to false to allow partial cluster operation + requireFullCoverage: true + + # Allow cluster to serve read requests when in down state + allowReadsWhenDown: false + + # Persistence configuration (required for cluster mode) + persistence: + # Size of the PVC for each node (required when cluster.enabled is true) + size: "" + # Storage class name (empty = use default storage class) + storageClass: "" + # Access modes for the PVC + accessModes: + - ReadWriteOnce + + # Bus port for cluster communication (default: service.port + 10000) + # This port is used for node-to-node communication in the cluster + busPort: 16379 + + # Orderly handover on primary pod shutdown. A `kubectl rollout restart` + # (or any voluntary eviction) sends SIGTERM to Valkey directly; without a + # preStop hook, the primary dies with open client connections and the + # cluster takes up to nodeTimeout to promote a replica. During that + # window, connection pools fill with dead sockets and the app errors out + # on every pooled command. + # + # With this enabled, each pod's preStop checks whether it is a primary + # and (if so) asks one of its own replicas to run `CLUSTER FAILOVER` — + # Valkey's canonical graceful-handover command. The primary demotes to + # replica in the same pass, so by the time SIGTERM arrives the shard + # already has a new primary and existing connections close cleanly. + # + # No-op when: + # * the pod is already a replica (nothing to fail over); + # * the shard has no replicas (replicasPerShard=0 — nothing to fail + # over TO); + # * the pod has no healthy replica of its own (FAILOVER would target + # nothing). + # The hook is strictly best-effort — any error path falls through to the + # normal SIGTERM, which is the pre-existing (suboptimal) behaviour. + preStopFailover: + # Enable the preStop CLUSTER FAILOVER hook. + enabled: true + # How long to wait for the ex-primary to observe its own demotion + # before giving up and letting kubelet send SIGTERM. Keep comfortably + # below terminationGracePeriodSeconds — kubelet counts the preStop + # time against that total grace period. + timeoutSeconds: 40 + + # Grace period for pod shutdown. Needs to accommodate the preStop + # CLUSTER FAILOVER handshake (default timeoutSeconds: 40) with headroom + # for SIGTERM + flush. The K8s default (30s) is too short for an orderly + # cluster rollout — a clipped preStop forces the abrupt close this + # feature is trying to avoid. + terminationGracePeriodSeconds: 60 + + # Isolate this Valkey cluster's gossip bus from any other release in the + # Kubernetes cluster. Valkey's CLUSTER MEET has no authentication, so + # without this, any pod that can open a TCP connection to a node's bus + # port can merge its owner's cluster into this one — regardless of + # namespace, since pod-to-pod traffic crosses namespaces freely by + # default. + # + # The generated NetworkPolicy restricts the bus port to same-release pods + # only; client and metrics ports remain open. + # + # Requires a NetworkPolicy-enforcing CNI (Calico, Cilium, kindnet ≥ 0.20, + # Antrea, etc.). On a non-enforcing CNI (plain Flannel, in-tree kubenet) + # the rendered policy is a no-op and the chart cannot provide isolation + # on its own — an Istio AuthorizationPolicy or a separate Kubernetes + # cluster per Valkey cluster is the only remaining option. Namespaces + # alone do NOT provide isolation. + isolation: + enabled: true + + # Probe tuning for cluster-mode StatefulSet pods. Same rationale as the + # top-level startupProbe block — bump failureThreshold for large RDBs. + # In cluster mode, replicas catch up via PSYNC + RDB transfer on (re) + # connect, so the post-restart load window can be just as long as for + # standalone replicated mode. + startupProbe: + periodSeconds: 5 + timeoutSeconds: 5 + failureThreshold: 30 + livenessProbe: + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 + readinessProbe: + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 + + # PersistentVolumeClaim retention policy for StatefulSet + # Controls when PVCs are deleted (requires Kubernetes 1.23+) + # More info: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#persistentvolumeclaim-retention + persistentVolumeClaimRetentionPolicy: {} + + # Override surface for the cluster-init Job pod only. Layered on top of + # the chart-wide podLabels / podAnnotations (these win on key collision) + # and merged with the chart-computed mesh labels, so the same opt-out + # mechanics that apply to the data pods are still in force unless you + # explicitly veto a key here. + # + # The intended use case is excluding the short-lived bootstrap Job from + # cluster-wide pod admission webhooks — observability sidecars, mesh + # proxies pulled in by namespace labels, policy agents, etc. — that are + # appropriate for long-running data pods but turn the Job into a + # never-completing batch task. Because the Job is a Helm post-install + # hook with backoffLimit=6, an injected sidecar that never exits means + # `helm install --wait` blocks until timeout and the chart fails to + # converge. + # + # Both maps default empty; with no overrides set the Job pod inherits + # the chart-wide values exactly as it did before this surface existed. + initJob: + podLabels: {} + podAnnotations: {} + # Auto-cleanup window for the completed Job. Kubernetes deletes the + # Job (and its pod) this many seconds after it transitions to + # Complete or Failed via the TTL-after-finished controller. Set + # short enough to keep `kubectl get pods` clean across upgrades, + # long enough to grab `kubectl logs` for post-mortem if needed. + ttlSecondsAfterFinished: 300 + tls: # Enable TLS enabled: false @@ -280,6 +478,101 @@ tls: # Require that clients authenticate with a certificate requireClientCertificate: false +istio: + # Enable Istio + enabled: false + + # Data-plane mode. Two very different shapes: + # + # sidecar — Classic Istio. An Envoy sidecar is injected into every pod. + # Requires the namespace/pod to be labelled for injection + # (`istio-injection=enabled` or `sidecar.istio.io/inject=true`). + # The chart adds `traffic.sidecar.istio.io/exclude*Ports` so + # the cluster-bus port bypasses Envoy (gossip needs raw TCP). + # + # ambient — Istio Ambient Mesh. No sidecar; a node-local ztunnel wraps + # pod traffic in HBONE mTLS transparently. The chart adds + # `istio.io/dataplane-mode: ambient` to every workload pod so + # ztunnel captures their traffic (also works when the whole + # namespace is labelled; the pod label is additive and lets + # operators opt in per-release). The DestinationRule that + # wraps outbound connections in ISTIO_MUTUAL is skipped — + # ztunnel already handles mTLS, and a DR would layer a second + # Envoy mTLS on top, doubling crypto overhead. + # + # Both modes still render the PeerAuthentication (enforced by ztunnel in + # ambient, by Envoy in sidecar) and the chart's AuthorizationPolicy that + # pins the cluster-bus port to same-release principals. + mode: sidecar # @schema enum:[sidecar,ambient] + + # SPIFFE trust domain this mesh issues identities under. Used by the + # AuthorizationPolicy to build the principal string + # `/ns//sa/`. Must match the `trustDomain` value in + # the mesh ConfigMap (`kubectl -n istio-system get cm istio -o jsonpath= + # '{.data.mesh}' | grep trustDomain`); a mismatch means the rule matches + # nothing and same-release pods get default-denied on the bus port, not + # just cross-release ones. + trustDomain: cluster.local + + # PeerAuthentication controls mTLS enforcement on inbound connections. + # Applies to both sidecar and ambient modes. + # + # A note on Prometheus scraping: STRICT requires every inbound connection + # to present a mesh-issued mTLS cert. An out-of-mesh Prometheus (e.g. a + # Prometheus running in a namespace without injection, or on a cluster + # without Istio) cannot present such a cert, so scraping the metrics + # service fails with "Connection reset by peer". Fix either by putting + # the scraper in the mesh (ambient capture or sidecar injection), or by + # setting this mode to PERMISSIVE so the mesh accepts plaintext too. + # The chart's AuthorizationPolicy keeps 9121 wide open at the identity + # layer regardless — PeerAuthentication is the gate that flips. + peerAuthentication: + # mTLS mode for inbound traffic (STRICT, PERMISSIVE, DISABLE, UNSET) + # STRICT: Require mTLS on all ports + # PERMISSIVE: Accept both plaintext and mTLS + mode: STRICT + # Additional labels for the PeerAuthentication resource + labels: {} + # Additional annotations for the PeerAuthentication resource + annotations: {} + + # DestinationRule configures mTLS for outbound connections to Valkey services. + # Rendered only when istio.mode is "sidecar" — in ambient mode, ztunnel + # handles mTLS transparently and a DR would double-encrypt. + destinationRule: + # TLS mode for outbound traffic (DISABLE, SIMPLE, MUTUAL, ISTIO_MUTUAL) + # ISTIO_MUTUAL: Use Istio-managed certificates for mTLS. + # NOTE: When tls.enabled is true the Valkey pods already terminate TLS + # themselves. Keeping mode=ISTIO_MUTUAL wraps app-level TLS in Envoy mTLS + # (double encryption) and still works, but doubles crypto overhead. If you + # only want mesh-level mTLS, set tls.enabled=false here and rely on Istio. + # If you prefer app-level TLS only, set mode=DISABLE so Envoy passes the + # TLS bytes through untouched. + mode: ISTIO_MUTUAL + # Additional labels for the DestinationRule resource + labels: {} + # Additional annotations for the DestinationRule resource + annotations: {} + + # AuthorizationPolicy restricts the cluster-bus port to same-release + # principals. In sidecar mode, Envoy enforces; in ambient mode, ztunnel + # enforces. Unlike the NetworkPolicy (IP-based, requires a policy-enforcing + # CNI), this is cryptographic — it requires the caller to hold a SPIFFE + # identity matching the Valkey pods' ServiceAccount, so a pod in a different + # release cannot forge the check even if it can open a TCP connection. + # + # Only active in cluster mode (no bus port otherwise). + authorizationPolicy: + # Enable the chart-owned AuthorizationPolicy. + # Defaults to true when istio.enabled is true — the whole point of wiring + # Istio in is to get mesh-level enforcement, and this is the piece that + # prevents cross-release CLUSTER MEET attacks at L4. + enabled: true + # Additional labels for the AuthorizationPolicy resource + labels: {} + # Additional annotations for the AuthorizationPolicy resource + annotations: {} + # Node selector for pod assignment nodeSelector: {}