From 2421963e0b43c63e6c9640206b5dbdf50eec8e92 Mon Sep 17 00:00:00 2001 From: jose Date: Thu, 2 Apr 2026 16:45:27 +0200 Subject: [PATCH 1/5] feat(sentinel): add native ACL support with secure password handling fix(chart): resolve schema validation and template errors --- CHANGELOG..md | 38 ++++++++++++++++++++++++ valkey/templates/sentinel-configmap.yaml | 29 ++++++++++++++---- valkey/values.schema.json | 4 +++ valkey/values.yaml | 11 ++++++- 4 files changed, 75 insertions(+), 7 deletions(-) create mode 100644 CHANGELOG..md diff --git a/CHANGELOG..md b/CHANGELOG..md new file mode 100644 index 00000000..da9a0501 --- /dev/null +++ b/CHANGELOG..md @@ -0,0 +1,38 @@ +# Helm Chart Fixes and Improvements + +## Fixed Schema and Configuration Issues +- 2026-04-02 + - Fixed missing comma in `values.schema.json` that caused validation errors + - Added `sentinelAclUsers` definition to the JSON schema for proper validation + - Fixed HAProxy image tag value: converted `tag: 2.9` to `tag: "2.9"` (string instead of number) to match schema definition + - All changes ensure `helm lint` passes without errors + +## Added Native ACL Support for Sentinel + +- 2026-04-02 + - Implemented Access Control List (ACL) management for Valkey Sentinel in `sentinel-configmap.yaml` (lines 92-120) + + **Features:** + - Generate NATIVE ACLs for INBOUND connections to Sentinel from `values.auth.sentinelAclUsers` + - Generate OUTBOUND authentication configuration for Sentinel to connect to Valkey master/replicas using `values.auth.aclUsers` + - Secure password handling with SHA256 hashing to prevent plaintext credentials in config files + - Automatic replication user authentication for master/replica monitoring + + **Security Improvements:** + - Modified logging function to output to stderr (`>&2`) instead of stdout to prevent passwords and sensitive data from leaking into command substitutions or configuration files + - Ensures log messages do not interfere with command output that may contain credentials + + **Implementation Details:** + + 1. **INBOUND ACL Generation (lines 93-105):** + - Generates native ACL entries for clients connecting to Sentinel + - Iterates through `values.auth.sentinelAclUsers` configuration + - Retrieves password for each Sentinel ACL user + - Hashes password with SHA256: `PASSHASH=$(echo -n "$PASSWORD" | sha256sum | cut -f 1 -d " ")` + - Writes ACL entry: `echo "user {{ $username }} on #$PASSHASH {{ $user.permissions }}" >> "$SENTINEL_CONF"` + + 2. **OUTBOUND Authentication Configuration (lines 107-120):** + - Configures Sentinel authentication credentials to connect to Valkey master and replicas + - Fetches replication user from `values.auth.aclUsers` (Valkey ACL block) + - Retrieves password using `get_user_password()` helper function + - Sets: `sentinel auth-user` and `sentinel auth-pass` for master set monitoring diff --git a/valkey/templates/sentinel-configmap.yaml b/valkey/templates/sentinel-configmap.yaml index 33e63ce7..774903bd 100644 --- a/valkey/templates/sentinel-configmap.yaml +++ b/valkey/templates/sentinel-configmap.yaml @@ -25,8 +25,9 @@ data: SENTINEL_CONF="/data/sentinel.conf" + # Logging function (outputs to stderr to avoid polluting stdout command substitutions) log() { - echo "$(date) $1" + echo "$(date) $1" >&2 } log "Initializing Sentinel configuration..." @@ -90,7 +91,22 @@ data: EOF {{- if .Values.auth.enabled }} - # Configure authentication for sentinel to connect to master/replicas + # 1. Generate NATIVE ACLs for INBOUND connections to Sentinel + log "Generating ACL entries for Sentinel incoming connections" + + {{- if .Values.auth.sentinelAclUsers }} + {{- range $username, $user := .Values.auth.sentinelAclUsers }} + {{- $passwordKey := $user.passwordKey | default $username }} + + PASSWORD=$(get_user_password "{{ $username }}" "{{ $passwordKey }}") || exit 1 + # Hash password to prevent plaintext leak in the config file + PASSHASH=$(echo -n "$PASSWORD" | sha256sum | cut -f 1 -d " ") + echo "user {{ $username }} on #$PASSHASH {{ $user.permissions }}" >> "$SENTINEL_CONF" + {{- end }} + {{- end }} + + # 2. Configure OUTBOUND connections from Sentinel to Valkey + # Fetch the replication user configured in the Valkey block (aclUsers) {{- $replUsername := .Values.replica.replicationUser }} {{- $replUser := index .Values.auth.aclUsers $replUsername }} {{- $replPasswordKey := $replUser.passwordKey | default $replUsername }} @@ -98,11 +114,12 @@ data: cat >> "$SENTINEL_CONF" << EOF - # Authentication configuration + # Master/Replica authentication configuration + sentinel auth-user ${MASTER_SET} {{ $replUsername }} sentinel auth-pass ${MASTER_SET} ${AUTH_PASSWORD} EOF - log "Configured authentication for sentinel" + log "Configured native ACLs and authentication for sentinel" {{- end }} {{- if .Values.tls.enabled }} @@ -128,5 +145,5 @@ data: {{- end }} log "Sentinel configuration complete" - cat "$SENTINEL_CONF" -{{- end }} + # IMPORTANT: Do not cat $SENTINEL_CONF here to avoid leaking secrets to stdout +{{- end }} \ No newline at end of file diff --git a/valkey/values.schema.json b/valkey/values.schema.json index 622de8e8..f7e49bbd 100644 --- a/valkey/values.schema.json +++ b/valkey/values.schema.json @@ -19,6 +19,9 @@ }, "usersExistingSecret": { "type": "string" + }, + "sentinelAclUsers": { + "type": "object" } } }, @@ -110,6 +113,7 @@ "type": "string" } } + }, "haproxy": { "type": "object", "properties": { diff --git a/valkey/values.yaml b/valkey/values.yaml index 5ac8dbe3..f794a003 100644 --- a/valkey/values.yaml +++ b/valkey/values.yaml @@ -188,6 +188,15 @@ auth: # read-user: # permissions: "~* -@all +@read +ping +info" + sentinelAclUsers: + default: + passwordKey: "" + permissions: "" + + # haproxy_watcher: + # passwordKey: "" + # permissions: "" + # Inline ACL configuration that will be appended after generated users. # NOTE: If using aclConfig, ensure the 'default' user is defined here. aclConfig: "" @@ -530,7 +539,7 @@ haproxy: image: registry: docker.io repository: haproxy - tag: 2.9 + tag: "2.9" pullPolicy: IfNotPresent replicas: 3 service: From fccfbbc8987cfef7cf4c00f288e466affbf3c20f Mon Sep 17 00:00:00 2001 From: jose Date: Fri, 3 Apr 2026 09:50:22 +0200 Subject: [PATCH 2/5] add Inter-Sentinel and Sentinel-Monitor authentication configuration --- valkey/templates/sentinel-configmap.yaml | 47 ++++++++++++++++++------ valkey/values.yaml | 21 ++++++++++- 2 files changed, 55 insertions(+), 13 deletions(-) diff --git a/valkey/templates/sentinel-configmap.yaml b/valkey/templates/sentinel-configmap.yaml index 774903bd..a1e1474a 100644 --- a/valkey/templates/sentinel-configmap.yaml +++ b/valkey/templates/sentinel-configmap.yaml @@ -93,32 +93,58 @@ data: {{- if .Values.auth.enabled }} # 1. Generate NATIVE ACLs for INBOUND connections to Sentinel log "Generating ACL entries for Sentinel incoming connections" - + {{- if .Values.auth.sentinelAclUsers }} {{- range $username, $user := .Values.auth.sentinelAclUsers }} {{- $passwordKey := $user.passwordKey | default $username }} - + PASSWORD=$(get_user_password "{{ $username }}" "{{ $passwordKey }}") || exit 1 - # Hash password to prevent plaintext leak in the config file PASSHASH=$(echo -n "$PASSWORD" | sha256sum | cut -f 1 -d " ") echo "user {{ $username }} on #$PASSHASH {{ $user.permissions }}" >> "$SENTINEL_CONF" {{- end }} {{- end }} - # 2. Configure OUTBOUND connections from Sentinel to Valkey - # Fetch the replication user configured in the Valkey block (aclUsers) - {{- $replUsername := .Values.replica.replicationUser }} - {{- $replUser := index .Values.auth.aclUsers $replUsername }} - {{- $replPasswordKey := $replUser.passwordKey | default $replUsername }} - AUTH_PASSWORD=$(get_user_password "{{ $replUsername }}" "{{ $replPasswordKey }}") || exit 1 + # 2. Configure OUTBOUND connections from Sentinel to Valkey Master/Replicas + # Fetch the dedicated monitor user, fallback to replication user for backward compatibility + {{- $monitorUsername := .Values.replica.sentinel.monitorUser | default .Values.replica.replicationUser }} + {{- $monitorUser := index .Values.auth.aclUsers $monitorUsername }} + {{- $monitorPasswordKey := $monitorUser.passwordKey | default $monitorUsername }} + AUTH_PASSWORD=$(get_user_password "{{ $monitorUsername }}" "{{ $monitorPasswordKey }}") || exit 1 cat >> "$SENTINEL_CONF" << EOF # Master/Replica authentication configuration - sentinel auth-user ${MASTER_SET} {{ $replUsername }} sentinel auth-pass ${MASTER_SET} ${AUTH_PASSWORD} EOF + {{- if ne $monitorUsername "default" }} + # Only append auth-user if the username is not 'default' (Valkey compatibility) + echo "sentinel auth-user ${MASTER_SET} {{ $monitorUsername }}" >> "$SENTINEL_CONF" + {{- end }} + + # 3. Configure INTER-SENTINEL authentication + # This is critical for Sentinels to discover each other via gossip when ACLs are enabled + {{- if .Values.replica.sentinel.interSentinelUser }} + {{- $interSentinelUser := .Values.replica.sentinel.interSentinelUser }} + {{- $interSentinelUserObj := index .Values.auth.sentinelAclUsers $interSentinelUser }} + {{- $interSentinelPasswordKey := $interSentinelUserObj.passwordKey | default $interSentinelUser }} + + SENTINEL_PASS=$(get_user_password "{{ $interSentinelUser }}" "{{ $interSentinelPasswordKey }}") || exit 1 + + cat >> "$SENTINEL_CONF" << EOF + + # Sentinel-to-Sentinel authentication configuration + sentinel sentinel-pass ${SENTINEL_PASS} + EOF + + {{- if ne $interSentinelUser "default" }} + # Only append sentinel-user if the username is not 'default' (Valkey compatibility) + echo "sentinel sentinel-user {{ $interSentinelUser }}" >> "$SENTINEL_CONF" + {{- end }} + + log "Configured Inter-Sentinel authentication using user {{ $interSentinelUser }}" + {{- end }} + log "Configured native ACLs and authentication for sentinel" {{- end }} @@ -145,5 +171,4 @@ data: {{- end }} log "Sentinel configuration complete" - # IMPORTANT: Do not cat $SENTINEL_CONF here to avoid leaking secrets to stdout {{- end }} \ No newline at end of file diff --git a/valkey/values.yaml b/valkey/values.yaml index f794a003..498d85ea 100644 --- a/valkey/values.yaml +++ b/valkey/values.yaml @@ -192,7 +192,13 @@ auth: default: passwordKey: "" permissions: "" - + # Example for isolated inter-sentinel gossip: + # sentinel.inter.node: + # passwordKey: "inter-node-pwd" + # permissions: "on resetchannels -@all +auth +hello +ping +sentinel +subscribe +publish &*" + # haproxy_watcher: + # passwordKey: "watcher-pwd" + # permissions: "on -@all +auth +hello +ping +sentinel|master +sentinel|masters +sentinel|sentinels" # haproxy_watcher: # passwordKey: "" # permissions: "" @@ -272,6 +278,17 @@ replica: # Enable Sentinel for high availability and automatic failover enabled: false + # Username for Sentinel to authenticate to the Valkey master/replicas. + # If not defined or left empty, the script will fallback to using replicationUser. + # WARNING: If the 'default' user is disabled in auth.aclUsers, this MUST be set to a + # valid user with Pub/Sub and monitoring permissions. + monitorUser: "default" + + # Declare the specific user Sentinels will use to authenticate with EACH OTHER. + # WARNING: If the 'default' user is disabled in auth.sentinelAclUsers, this MUST be + # set to a valid user to prevent the 'empty array' discovery error. + interSentinelUser: "default" + # Port on which sentinel will listen port: 26379 @@ -581,4 +598,4 @@ haproxy: # requests: # cpu: 10m # memory: 16Mi - securityContext: {} + securityContext: {} \ No newline at end of file From 0e9d6f398179d4ad270ca148db72c3670039ab05 Mon Sep 17 00:00:00 2001 From: jose Date: Fri, 3 Apr 2026 10:00:40 +0200 Subject: [PATCH 3/5] updated --- CHANGELOG..md | 38 -------------------------------------- 1 file changed, 38 deletions(-) delete mode 100644 CHANGELOG..md diff --git a/CHANGELOG..md b/CHANGELOG..md deleted file mode 100644 index da9a0501..00000000 --- a/CHANGELOG..md +++ /dev/null @@ -1,38 +0,0 @@ -# Helm Chart Fixes and Improvements - -## Fixed Schema and Configuration Issues -- 2026-04-02 - - Fixed missing comma in `values.schema.json` that caused validation errors - - Added `sentinelAclUsers` definition to the JSON schema for proper validation - - Fixed HAProxy image tag value: converted `tag: 2.9` to `tag: "2.9"` (string instead of number) to match schema definition - - All changes ensure `helm lint` passes without errors - -## Added Native ACL Support for Sentinel - -- 2026-04-02 - - Implemented Access Control List (ACL) management for Valkey Sentinel in `sentinel-configmap.yaml` (lines 92-120) - - **Features:** - - Generate NATIVE ACLs for INBOUND connections to Sentinel from `values.auth.sentinelAclUsers` - - Generate OUTBOUND authentication configuration for Sentinel to connect to Valkey master/replicas using `values.auth.aclUsers` - - Secure password handling with SHA256 hashing to prevent plaintext credentials in config files - - Automatic replication user authentication for master/replica monitoring - - **Security Improvements:** - - Modified logging function to output to stderr (`>&2`) instead of stdout to prevent passwords and sensitive data from leaking into command substitutions or configuration files - - Ensures log messages do not interfere with command output that may contain credentials - - **Implementation Details:** - - 1. **INBOUND ACL Generation (lines 93-105):** - - Generates native ACL entries for clients connecting to Sentinel - - Iterates through `values.auth.sentinelAclUsers` configuration - - Retrieves password for each Sentinel ACL user - - Hashes password with SHA256: `PASSHASH=$(echo -n "$PASSWORD" | sha256sum | cut -f 1 -d " ")` - - Writes ACL entry: `echo "user {{ $username }} on #$PASSHASH {{ $user.permissions }}" >> "$SENTINEL_CONF"` - - 2. **OUTBOUND Authentication Configuration (lines 107-120):** - - Configures Sentinel authentication credentials to connect to Valkey master and replicas - - Fetches replication user from `values.auth.aclUsers` (Valkey ACL block) - - Retrieves password using `get_user_password()` helper function - - Sets: `sentinel auth-user` and `sentinel auth-pass` for master set monitoring From 97b283854f3aa222571f3c1d824e7120dc4d2bc0 Mon Sep 17 00:00:00 2001 From: jose Date: Tue, 7 Apr 2026 14:29:13 +0200 Subject: [PATCH 4/5] feat(sentinel): Updated HAProxy Sentinel Watcher - Updating HAProxy watcher for near-instant IP-based failover. - Refactoring init scripts to support dynamic topology and universal auth/TLS injection. - Adding smart L7 health checks in HAProxy to handle ACL-protected nodes. - Fully parameterizing Service and ConfigMap ports for end-to-end flexibility. --- valkey/templates/haproxy-configmap.yaml | 201 +++++++++++++----------- valkey/templates/haproxy-service.yaml | 29 ++++ valkey/templates/init_config.yaml | 149 ++++++++---------- valkey/values.yaml | 9 +- 4 files changed, 212 insertions(+), 176 deletions(-) diff --git a/valkey/templates/haproxy-configmap.yaml b/valkey/templates/haproxy-configmap.yaml index 35dc500c..af67c765 100644 --- a/valkey/templates/haproxy-configmap.yaml +++ b/valkey/templates/haproxy-configmap.yaml @@ -20,13 +20,13 @@ data: retries 3 frontend valkey_frontend_write - bind *:6379 + bind *:{{ .Values.haproxy.service.port | default 6379 }} mode tcp option tcplog default_backend valkey_backend_master frontend valkey_frontend_read - bind *:6380 + bind *:{{ .Values.haproxy.service.readPort | default 6380 }} mode tcp option tcplog default_backend valkey_backend_read @@ -37,26 +37,37 @@ data: # the runtime socket (set server addr + enable/disable server). # This prevents HAProxy's own DNS resolver from putting servers into # DNS NX maintenance, which cannot be cleared by enable server. - {{ range $i := until (add (int .Values.replica.replicas) 1 | int) }} - server valkey-{{ $i }} 127.0.0.1:{{ $.Values.service.port }} no-check disabled - {{ end }} + {{- range $i := until (add (int .Values.replica.replicas) 1 | int) }} + server valkey-{{ $i }} 127.0.0.1:6379 check-send-proxy disabled + {{- end }} backend valkey_backend_read mode tcp option tcp-check - tcp-check connect - tcp-check comment PING + + # Step 1-2: Connect (must use SSL if TLS is enabled) + {{- if .Values.tls.enabled }} + tcp-check connect port {{ .Values.service.port }} ssl + {{- else }} + tcp-check connect port {{ .Values.service.port }} + {{- end }} + + # Step 3: Send PING tcp-check send "PING\r\n" - tcp-check expect rstring "\\+PONG|-NOAUTH" + + # Step 4: Expect PONG or any Auth error (which proves the service is up) + # We use a broader regex to catch "-NOAUTH" and "-ERR AUTH" + tcp-check expect rstring ^(\+PONG|-[Nn][Oo][Aa][Uu][Tt][Hh]|-[Ee][Rr][Rr]) + timeout connect 2s timeout check 5s # Read backend uses health checks since any replica is acceptable. # Servers start enabled; all will pass a simple PING check. - {{ range $i := until (add (int .Values.replica.replicas) 1 | int) }} - server valkey-{{ $i }} {{ include "valkey.fullname" $ }}-{{ $i }}.{{ include "valkey.headlessServiceName" $ }}.{{ $.Release.Namespace }}.svc.{{ $.Values.clusterDomain }}:{{ $.Values.service.port }} check inter 5s fall 3 rise 1 init-addr last,libc,none - {{ end }} + {{- range $i := until (add (int .Values.replica.replicas) 1 | int) }} + server valkey-{{ $i }} {{ include "valkey.fullname" $ }}-{{ $i }}.{{ include "valkey.headlessServiceName" $ }}.{{ $.Release.Namespace }}.svc.{{ $.Values.clusterDomain }}:{{ $.Values.service.port }} check inter 5s fall 3 rise 1 init-addr last,libc,none {{ if $.Values.tls.enabled }}ssl verify none{{ end }} + {{- end }} - sentinel-watcher.sh: | + sentinel-watcher.sh: |- #!/bin/sh # Sentinel watcher: polls Sentinel for master changes and updates HAProxy # via the runtime socket. @@ -69,134 +80,140 @@ data: # DNS NX maintenance that cannot be overridden by `enable server`). set -eu - SENTINEL_SVC="{{ include "valkey.fullname" . }}-sentinel.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}" SENTINEL_PORT="{{ .Values.replica.sentinel.port }}" MASTER_SET="{{ .Values.replica.sentinel.masterSet }}" + POLL_INTERVAL="{{ .Values.haproxy.config.checkInterval | default 2 }}" VALKEY_PORT="{{ .Values.service.port }}" HAPROXY_SOCKET="/var/run/haproxy/admin.sock" BACKEND="valkey_backend_master" - POLL_INTERVAL=2 TOTAL_SERVERS="{{ add (int .Values.replica.replicas) 1 }}" + # Logging to stderr prevents polluting stdout + log() { echo "$(date) $1" >&2; } + + {{- if .Values.auth.enabled }} + # Authentication helpers + get_user_password() { + username="$1" + password_key="${2:-$username}" + if [ -f "/valkey-users-secret/$password_key" ]; then + cat "/valkey-users-secret/$password_key" + elif [ -f "/valkey-auth-secret/${username}-password" ]; then + cat "/valkey-auth-secret/${username}-password" + fi + } + + {{- $watcherUser := .Values.haproxy.sentinelWatcher.user | default "default" }} + {{- $userObj := index .Values.auth.sentinelAclUsers $watcherUser | default (dict "passwordKey" "") }} + {{- $passKey := $userObj.passwordKey | default $watcherUser }} + + WATCHER_USER="{{ $watcherUser }}" + WATCHER_PASS=$(get_user_password "$WATCHER_USER" "{{ $passKey }}") + {{- end }} + + # Base command assembly for Auth and TLS support + CLI_BASE="valkey-cli -p ${SENTINEL_PORT}" + {{- if .Values.auth.enabled }} - {{- $replUsername := .Values.replica.replicationUser }} - {{- $replUser := index .Values.auth.aclUsers $replUsername }} - {{- $replPasswordKey := $replUser.passwordKey | default $replUsername }} - AUTH_ARG="-a $(cat /valkey-auth-secret/{{ $replPasswordKey }}-password 2>/dev/null || cat /valkey-users-secret/{{ $replPasswordKey }} 2>/dev/null)" - {{- else }} - AUTH_ARG="" + if [ "$WATCHER_USER" != "default" ]; then + CLI_BASE="${CLI_BASE} --user ${WATCHER_USER} -a ${WATCHER_PASS}" + else + CLI_BASE="${CLI_BASE} -a ${WATCHER_PASS}" + fi {{- end }} - log() { - echo "[sentinel-watcher] $(date '+%Y-%m-%dT%H:%M:%S') $*" + {{- if .Values.tls.enabled }} + CLI_BASE="${CLI_BASE} --tls --cacert /tls/{{ .Values.tls.caPublicKey }} --cert /tls/{{ .Values.tls.serverPublicKey }} --key /tls/{{ .Values.tls.serverKey }}" + {{- end }} + + # Wrapper to query Sentinel + sentinel_cmd() { + local host=$1 + shift + ${CLI_BASE} -h "${host}" "$@" } + # Wrapper to update HAProxy via socket haproxy_cmd() { echo "$1" | socat stdio "${HAPROXY_SOCKET}" } - # Resolve a hostname to an IPv4 address using getent (musl/Alpine libc resolver). - # Falls back to nslookup if getent fails. resolve_ip() { - hostname="$1" - ip=$(getent hosts "${hostname}" 2>/dev/null | awk '{print $1}' | head -1) - if [ -z "${ip}" ]; then - ip=$(nslookup "${hostname}" 2>/dev/null \ - | awk '/^Address:/{print $2}' \ - | grep -v '#' | head -1) - fi - echo "${ip}" - } - - # Query Sentinel: returns first line of get-master-addr-by-name (hostname or IP) - get_master_from_sentinel() { - valkey-cli -h "${SENTINEL_SVC}" \ - {{- if .Values.tls.enabled }} - --tls --cacert /tls/{{ .Values.tls.caPublicKey }} \ - {{- end }} - -p "${SENTINEL_PORT}" ${AUTH_ARG} \ - SENTINEL get-master-addr-by-name "${MASTER_SET}" 2>/dev/null | head -1 || true + getent hosts "$1" | awk '{print $1}' } + LAST_MASTER_HOST="" - # Extract server index from a hostname like "valkey-1.valkey-headless..." - index_from_host() { - local host="${1%%.*}" - echo "${host##*-}" - } - - wait_for_socket() { - log "Waiting for HAProxy socket..." - until [ -S "${HAPROXY_SOCKET}" ]; do sleep 1; done - log "HAProxy socket ready." - } + while true; do + MASTER_HOST="" - wait_for_sentinel() { - log "Waiting for Sentinel at ${SENTINEL_SVC}:${SENTINEL_PORT}..." - until valkey-cli -h "${SENTINEL_SVC}" \ - {{- if .Values.tls.enabled }} - --tls --cacert /tls/{{ .Values.tls.caPublicKey }} \ - {{- end }} - -p "${SENTINEL_PORT}" ${AUTH_ARG} PING 2>/dev/null | grep -q PONG; do - sleep 2 + # 1. Ask Sentinels who the master is + i=0 + while [ "${i}" -lt "${TOTAL_SERVERS}" ]; do + S_HOST="{{ include "valkey.fullname" . }}-${i}.{{ include "valkey.headlessServiceName" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}" + + RESP=$(sentinel_cmd "${S_HOST}" sentinel get-master-addr-by-name "${MASTER_SET}" 2>/dev/null) || { + i=$((i + 1)) + continue + } + + if [ -n "${RESP}" ]; then + MASTER_HOST=$(echo "${RESP}" | head -n 1) + break + fi + i=$((i + 1)) done - log "Sentinel is ready." - } - - wait_for_socket - wait_for_sentinel - - LAST_MASTER_IDX="" - - while true; do - MASTER_HOST=$(get_master_from_sentinel) if [ -z "${MASTER_HOST}" ]; then - log "WARN: Could not get master from Sentinel, retrying..." sleep "${POLL_INTERVAL}" continue fi - MASTER_IDX=$(index_from_host "${MASTER_HOST}") - - if [ -z "${MASTER_IDX}" ]; then - log "WARN: Could not parse master index from '${MASTER_HOST}', retrying..." + # 2. Check if master actually changed + if [ "${MASTER_HOST}" = "${LAST_MASTER_HOST}" ]; then sleep "${POLL_INTERVAL}" continue fi - if [ "${MASTER_IDX}" = "${LAST_MASTER_IDX}" ]; then + # 3. Find the actual HAProxy backend index for this master + MASTER_IDX="" + j=0 + while [ "${j}" -lt "${TOTAL_SERVERS}" ]; do + EXPECTED_HOST="{{ include "valkey.fullname" . }}-${j}.{{ include "valkey.headlessServiceName" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}" + if [ "${MASTER_HOST}" = "${EXPECTED_HOST}" ]; then + MASTER_IDX="${j}" + break + fi + j=$((j + 1)) + done + + if [ -z "${MASTER_IDX}" ]; then + log "WARN: Unknown master host '${MASTER_HOST}'" sleep "${POLL_INTERVAL}" continue fi - log "Master changed: index ${LAST_MASTER_IDX:-none} -> ${MASTER_IDX} (host: ${MASTER_HOST})" + log "Master changed: ${LAST_MASTER_HOST:-none} -> ${MASTER_HOST} (HAProxy Backend Index: valkey-${MASTER_IDX})" - # Resolve hostname to IP so HAProxy connects directly (no DNS resolver involved) MASTER_IP=$(resolve_ip "${MASTER_HOST}") if [ -z "${MASTER_IP}" ]; then log "WARN: Could not resolve IP for '${MASTER_HOST}', retrying..." sleep "${POLL_INTERVAL}" continue fi - log "Resolved master IP: ${MASTER_IP}" - # Disable old master servers first (no traffic interruption gap) - i=0 - while [ "${i}" -lt "${TOTAL_SERVERS}" ]; do - if [ "${i}" != "${MASTER_IDX}" ]; then - log "Disabling server valkey-${i} in ${BACKEND}" - haproxy_cmd "disable server ${BACKEND}/valkey-${i}" > /dev/null + # 4. Update HAProxy routing safely + j=0 + while [ "${j}" -lt "${TOTAL_SERVERS}" ]; do + if [ "${j}" != "${MASTER_IDX}" ]; then + haproxy_cmd "disable server ${BACKEND}/valkey-${j}" > /dev/null || true fi - i=$((i + 1)) + j=$((j + 1)) done - # Point the new master server to its actual Pod IP and enable it - log "Setting server valkey-${MASTER_IDX} addr ${MASTER_IP}:${VALKEY_PORT} and enabling" haproxy_cmd "set server ${BACKEND}/valkey-${MASTER_IDX} addr ${MASTER_IP} port ${VALKEY_PORT}" > /dev/null haproxy_cmd "enable server ${BACKEND}/valkey-${MASTER_IDX}" > /dev/null - LAST_MASTER_IDX="${MASTER_IDX}" - log "HAProxy updated: valkey-${MASTER_IDX} (${MASTER_IP}:${VALKEY_PORT}) is now active in ${BACKEND}" + LAST_MASTER_HOST="${MASTER_HOST}" sleep "${POLL_INTERVAL}" done -{{- end }} +{{- end }} \ No newline at end of file diff --git a/valkey/templates/haproxy-service.yaml b/valkey/templates/haproxy-service.yaml index 774b4d3f..259e1cb8 100644 --- a/valkey/templates/haproxy-service.yaml +++ b/valkey/templates/haproxy-service.yaml @@ -25,3 +25,32 @@ spec: {{- include "valkey.selectorLabels" . | nindent 4 }} app.kubernetes.io/component: haproxy {{- end }} + + +{{- if .Values.haproxy.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "valkey.fullname" . }}-haproxy + labels: + {{- include "valkey.labels" . | nindent 4 }} + app.kubernetes.io/component: haproxy + annotations: + {{- with .Values.haproxy.service.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.haproxy.service.type }} + ports: + - port: {{ .Values.haproxy.service.port }} + targetPort: 6379 + protocol: TCP + name: valkey-write + - port: {{ .Values.haproxy.service.readPort | default 6380 }} + targetPort: {{ .Values.haproxy.service.readPort | default 6380 }} + protocol: TCP + name: valkey-read + selector: + {{- include "valkey.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: haproxy +{{- end }} \ No newline at end of file diff --git a/valkey/templates/init_config.yaml b/valkey/templates/init_config.yaml index a5607e13..cb712c5b 100644 --- a/valkey/templates/init_config.yaml +++ b/valkey/templates/init_config.yaml @@ -170,93 +170,83 @@ data: # Replica mode configuration log "Configuring replication mode" - # Use POD_INDEX from Kubernetes metadata POD_INDEX=${POD_INDEX:-0} - IS_MASTER=false - + + # 1. Determine Initial Topology or Restored State + REPLICA_TARGET="" if [ "${SENTINEL_MODE:-false}" = "true" ]; then if [ -n "$SAVED_REPLICA_LINE" ]; then - { - echo "" - echo "# Restored Sentinel Configuration" - echo "$SAVED_REPLICA_LINE" - } >>"$VALKEY_CONFIG" - log "Restored replication state from previous run" + # CRITICAL: Restore topology written by Sentinel (CONFIG REWRITE) + log "Sentinel Mode: Restoring replication state from previous run: $SAVED_REPLICA_LINE" + REPLICA_TARGET="$SAVED_REPLICA_LINE" + elif [ "$POD_INDEX" != "0" ]; then + # Fresh boot: Initial cluster assembly. Nodes > 0 follow node 0. + MASTER_HOST="{{ include "valkey.fullname" . }}-0.{{ include "valkey.headlessServiceName" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}" + MASTER_PORT="{{ .Values.service.port }}" + REPLICA_TARGET="replicaof $MASTER_HOST $MASTER_PORT" + log "Sentinel Mode (Fresh): Node $POD_INDEX starting as replica of $MASTER_HOST" else - # First run or no previous state - if [ "$POD_INDEX" = "0" ]; then - IS_MASTER=true - log "Sentinel Mode (Bootstrap): Pod-0 defaults to MASTER" - else - # For bootstrap, others should replicate 0 - IS_MASTER=false - log "Sentinel Mode (Bootstrap): Pod-$POD_INDEX defaults to REPLICA" - fi + log "Sentinel Mode (Fresh): Node 0 starting as initial MASTER" fi else - # Standard Replication Logic - # Check if this is pod-0 (master) - if [ "$POD_INDEX" = "0" ]; then - IS_MASTER=true - log "This pod (index $POD_INDEX) is configured as MASTER" - else - log "This pod (index $POD_INDEX) is configured as REPLICA" - fi + # Standard Mode (Non-Sentinel) + if [ "$POD_INDEX" != "0" ]; then + MASTER_HOST="{{ include "valkey.fullname" . }}-0.{{ include "valkey.headlessServiceName" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}" + MASTER_PORT="{{ .Values.service.port }}" + REPLICA_TARGET="replicaof $MASTER_HOST $MASTER_PORT" + fi fi - # Configure replica settings - if [ "$IS_MASTER" = "false" ]; then - - if [ "${SENTINEL_MODE:-false}" = "true" ] && [ -n "$SAVED_REPLICA_LINE" ]; then - log "Skipping default replica configuration as we restored state." - else - MASTER_HOST="{{ include "valkey.fullname" . }}-0.{{ include "valkey.headlessServiceName" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomain }}" - MASTER_PORT="{{ .Values.service.port }}" - - log "Configuring replica to follow master at $MASTER_HOST:$MASTER_PORT" - - { - echo "" - echo "# Replica Configuration" - echo "replicaof $MASTER_HOST $MASTER_PORT" - {{- if .Values.replica.disklessSync }} - echo "" - echo "# Diskless replication" - echo "repl-diskless-sync yes" - echo "repl-diskless-sync-delay 5" - {{- end }} - echo "" - echo "# Replica priority for Sentinel failover (lower = higher priority, 0 = never promote)" - echo "replica-priority 100" - {{- if .Values.auth.enabled }} - echo "" - echo "# Master authentication" - {{- end }} - } >>"$VALKEY_CONFIG" - - {{- if .Values.auth.enabled }} - # Get the password for the replication user - {{- $replUsername := .Values.replica.replicationUser }} - {{- $replUser := index .Values.auth.aclUsers $replUsername }} - {{- $replPasswordKey := $replUser.passwordKey | default $replUsername }} - REPL_PASSWORD=$(get_user_password "{{ $replUsername }}" "{{ $replPasswordKey }}") || exit 1 - - # Write masterauth configuration - echo "masterauth $REPL_PASSWORD" >>"$VALKEY_CONFIG" - echo "masteruser {{ $replUsername }}" >>"$VALKEY_CONFIG" - log "Configured masterauth with user {{ $replUsername }}" - {{- end }} - - {{- if .Values.tls.enabled }} - # TLS for replication - { - echo "" - echo "# TLS for replication" - echo "tls-replication yes" - } >>"$VALKEY_CONFIG" - log "Enabled TLS for replication" - {{- end }} - fi + # 2. Inject Replication Target + if [ -n "$REPLICA_TARGET" ]; then + echo "" >> "$VALKEY_CONFIG" + echo "# Replication Target" >> "$VALKEY_CONFIG" + echo "$REPLICA_TARGET" >> "$VALKEY_CONFIG" + fi + + # 3. CRITICAL: Inject Replica Auth and TLS + # In Sentinel mode, ALL nodes (even Node 0) must have these configured, + # as any node can be demoted to a replica dynamically. + if [ -n "$REPLICA_TARGET" ] || [ "${SENTINEL_MODE:-false}" = "true" ]; then + + # General replica settings + { + {{- if .Values.replica.disklessSync }} + echo "" + echo "# Diskless replication" + echo "repl-diskless-sync yes" + echo "repl-diskless-sync-delay 5" + {{- end }} + echo "" + echo "# Replica priority for Sentinel failover (lower = higher priority, 0 = never promote)" + echo "replica-priority 100" + } >>"$VALKEY_CONFIG" + + {{- if .Values.auth.enabled }} + # Master authentication + {{- $replUsername := .Values.replica.replicationUser }} + {{- $replUser := index .Values.auth.aclUsers $replUsername }} + {{- $replPasswordKey := $replUser.passwordKey | default $replUsername }} + REPL_PASSWORD=$(get_user_password "{{ $replUsername }}" "{{ $replPasswordKey }}") || exit 1 + + { + echo "" + echo "# Master authentication" + echo "masterauth $REPL_PASSWORD" + echo "masteruser {{ $replUsername }}" + } >>"$VALKEY_CONFIG" + log "Configured masterauth with user {{ $replUsername }}" + {{- end }} + + {{- if .Values.tls.enabled }} + # TLS for replication + { + echo "" + echo "# TLS for replication" + echo "tls-replication yes" + } >>"$VALKEY_CONFIG" + log "Enabled TLS for replication" + {{- end }} fi {{- if gt (int .Values.replica.minReplicasToWrite) 0 }} @@ -270,7 +260,6 @@ data: log "Configured write safety: require {{ .Values.replica.minReplicasToWrite }} replicas with max {{ .Values.replica.minReplicasMaxLag }}s lag" {{- end }} {{- end }} - # Append extra configs if present if [ -f /usr/local/etc/valkey/valkey.conf ]; then log "Appending /usr/local/etc/valkey/valkey.conf" diff --git a/valkey/values.yaml b/valkey/values.yaml index 498d85ea..d9cec4b0 100644 --- a/valkey/values.yaml +++ b/valkey/values.yaml @@ -199,9 +199,6 @@ auth: # haproxy_watcher: # passwordKey: "watcher-pwd" # permissions: "on -@all +auth +hello +ping +sentinel|master +sentinel|masters +sentinel|sentinels" - # haproxy_watcher: - # passwordKey: "" - # permissions: "" # Inline ACL configuration that will be appended after generated users. # NOTE: If using aclConfig, ensure the 'default' user is defined here. @@ -561,7 +558,8 @@ haproxy: replicas: 3 service: type: ClusterIP - port: 6379 + port: 6379 # Port for write operations + readPort: 6380 # Port for read operations annotations: {} resources: {} # limits: @@ -584,6 +582,9 @@ haproxy: # backend state via HAProxy's runtime socket, enabling near-instant failover. # Only deployed when replica.sentinel.enabled is true. sentinelWatcher: + # Dedicated sentinel ACL user for the watcher. + # Must be defined in auth.sentinelAclUsers + user: "default" # Uses valkey/valkey alpine image which ships valkey-cli and busybox nc. # Busybox nc supports -U (unix domain sockets) so no extra tools are needed. image: From d6b55d5256d276cc7aa7fa48ea94d0a3c9ee60f3 Mon Sep 17 00:00:00 2001 From: Jose Dominguez Date: Wed, 8 Apr 2026 14:50:25 +0200 Subject: [PATCH 5/5] Clenning haproxy-service and added support for commonLabels and podAnnotations in haproxy-deployment --- valkey/templates/haproxy-deployment.yaml | 25 ++++++++++++----- valkey/templates/haproxy-service.yaml | 35 ++---------------------- 2 files changed, 21 insertions(+), 39 deletions(-) diff --git a/valkey/templates/haproxy-deployment.yaml b/valkey/templates/haproxy-deployment.yaml index 0910647c..51fa5177 100644 --- a/valkey/templates/haproxy-deployment.yaml +++ b/valkey/templates/haproxy-deployment.yaml @@ -14,19 +14,30 @@ spec: app.kubernetes.io/component: haproxy template: metadata: - annotations: - checksum/config: {{ include (print $.Template.BasePath "/haproxy-configmap.yaml") . | sha256sum }} labels: {{- include "valkey.selectorLabels" . | nindent 8 }} + {{- if .Values.haproxy.enabled }} app.kubernetes.io/component: haproxy + {{- end }} + {{- with .Values.commonLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + annotations: + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + checksum/config: {{ include (print $.Template.BasePath "/haproxy-configmap.yaml") . | sha256sum }} spec: automountServiceAccountToken: {{ .Values.serviceAccount.automount }} serviceAccountName: {{ include "valkey.serviceAccountName" . }} - {{- (include "valkey.imagePullSecrets" .) | nindent 6 }} - {{- with .Values.haproxy.extraInitContainers }} - initContainers: - {{- toYaml . | nindent 8 }} - {{- end }} + {{- (include "valkey.imagePullSecrets" .) | nindent 6 }} + {{- with .Values.haproxy.extraInitContainers }} + initContainers: + {{- toYaml . | nindent 8 }} + {{- end }} containers: - name: haproxy {{- if .Values.haproxy.image.registry }} diff --git a/valkey/templates/haproxy-service.yaml b/valkey/templates/haproxy-service.yaml index 259e1cb8..b89d83b7 100644 --- a/valkey/templates/haproxy-service.yaml +++ b/valkey/templates/haproxy-service.yaml @@ -13,37 +13,8 @@ metadata: spec: type: {{ .Values.haproxy.service.type }} ports: - - port: {{ .Values.haproxy.service.port }} - targetPort: 6379 - protocol: TCP - name: valkey-write - - port: 6380 - targetPort: 6380 - protocol: TCP - name: valkey-read - selector: - {{- include "valkey.selectorLabels" . | nindent 4 }} - app.kubernetes.io/component: haproxy -{{- end }} - - -{{- if .Values.haproxy.enabled }} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "valkey.fullname" . }}-haproxy - labels: - {{- include "valkey.labels" . | nindent 4 }} - app.kubernetes.io/component: haproxy - annotations: - {{- with .Values.haproxy.service.annotations }} - {{- toYaml . | nindent 4 }} - {{- end }} -spec: - type: {{ .Values.haproxy.service.type }} - ports: - - port: {{ .Values.haproxy.service.port }} - targetPort: 6379 + - port: {{ .Values.haproxy.service.port | default 6379 }} + targetPort: {{ .Values.haproxy.service.port | default 6379 }} protocol: TCP name: valkey-write - port: {{ .Values.haproxy.service.readPort | default 6380 }} @@ -53,4 +24,4 @@ spec: selector: {{- include "valkey.selectorLabels" . | nindent 4 }} app.kubernetes.io/component: haproxy -{{- end }} \ No newline at end of file +{{- end }}