From d65a82565580be243c669e9609ae0e46283b98b5 Mon Sep 17 00:00:00 2001 From: YaacovHazan <31382944+YaacovHazan@users.noreply.github.com> Date: Thu, 14 May 2026 17:14:15 +0300 Subject: [PATCH 01/13] Set default for INLINE_LSE_ATOMICS to 0 for compatibility across architectures (#15212) Ensure backward compatibility and consistent behavior across different architectures by explicitly setting the default value. Fixes #15175 Co-authored-by: ofiryanai (cherry picked from commit 6c3a8ecceff085835a5388e97af0238646755bfe) --- modules/redisearch/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modules/redisearch/Makefile b/modules/redisearch/Makefile index 97ddd495754..21bd43922e0 100644 --- a/modules/redisearch/Makefile +++ b/modules/redisearch/Makefile @@ -3,5 +3,10 @@ MODULE_VERSION = v8.4.10 MODULE_REPO = https://github.com/redisearch/redisearch TARGET_MODULE = $(SRC_DIR)/bin/$(FULL_VARIANT)/search-community/redisearch.so + # Set INLINE_LSE_ATOMICS=1 for perf improvement on common ARM CPUs (i.e. Graviton2/3/4); no effect on x86 or macOS. + # Default 0 keeps the binary runnable on pre-Armv8.1-a cores (Cortex-A72, Graviton1, RPi4) that would otherwise SIGILL at module load. +INLINE_LSE_ATOMICS ?= 0 +export INLINE_LSE_ATOMICS + include ../common.mk From 53abd4389e84c6cfdea1ae4cfce09384250ac885 Mon Sep 17 00:00:00 2001 From: dannysheyn <35047315+dannysheyn@users.noreply.github.com> Date: Thu, 14 May 2026 08:05:17 +0300 Subject: [PATCH 02/13] Fix cluster-announce-ip rejecting hostnames (#15188) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes [#15183](https://github.com/redis/redis/issues/15183). ## Motivation Commit [cf668f2c2](https://github.com/redis/redis/commit/cf668f2c2c782ea12dc88458bfd329cf6eb5d658) tightened cluster-announce-ip validation to require a valid IPv4 or IPv6 address, which is a regression for users that legitimately announce a hostname. ## Changes * isValidClusterAnnounceIp() now accepts either: * A valid IPv4/IPv6 address * A valid hostname — same character rules as cluster-announce-hostname, length-bounded by NET_IP_STR_LEN to match the storage buffer. (cherry picked from commit 21f2569f9b577e2acb560f83652e2679c5bd6c92) (cherry picked from commit 54ea50c02926a1462823349b6d5262e635619075) --- src/config.c | 32 ++++++++++++++-------- tests/unit/cluster/announced-endpoints.tcl | 23 +++++++++++----- 2 files changed, 37 insertions(+), 18 deletions(-) diff --git a/src/config.c b/src/config.c index 83dd274b778..e3080910365 100644 --- a/src/config.c +++ b/src/config.c @@ -2401,13 +2401,7 @@ static int isValidAnnouncedNodename(char *val,const char **err) { return 1; } -static int isValidAnnouncedHostname(char *val, const char **err) { - if (strlen(val) >= NET_HOST_STR_LEN) { - *err = "Hostnames must be less than " - STRINGIFY(NET_HOST_STR_LEN) " characters"; - return 0; - } - +static int isValidHostnameChars(char *val, const char **err) { int i = 0; char c; while ((c = val[i])) { @@ -2425,6 +2419,15 @@ static int isValidAnnouncedHostname(char *val, const char **err) { return 1; } +static int isValidAnnouncedHostname(char *val, const char **err) { + if (strlen(val) >= NET_HOST_STR_LEN) { + *err = "Hostnames must be less than " + STRINGIFY(NET_HOST_STR_LEN) " characters"; + return 0; + } + return isValidHostnameChars(val, err); +} + /* Validation function for cluster-announce-ip. * Ensures the IP address is valid and rejects control characters. */ static int isValidClusterAnnounceIp(char *val, const char **err) { @@ -2434,12 +2437,19 @@ static int isValidClusterAnnounceIp(char *val, const char **err) { return 1; } - if (inet_pton(AF_INET, val, buf) != 1 && - inet_pton(AF_INET6, val, buf) != 1) { - *err = "Cluster announce IP must be a valid IPv4 or IPv6 address"; + /* Accept valid IPv4 or IPv6 */ + if (inet_pton(AF_INET, val, buf) == 1 || inet_pton(AF_INET6, val, buf) == 1) { + return 1; + } + /* Also accept valid hostnames, but limited to NET_IP_STR_LEN since + * cluster_announce_ip is stored in a NET_IP_STR_LEN buffer */ + if (strlen(val) >= NET_IP_STR_LEN) { + *err = "Hostnames for cluster-announce-ip must be less than " + STRINGIFY(NET_IP_STR_LEN) " characters"; return 0; } - return 1; + /* Also accept valid hostnames */ + return isValidHostnameChars(val, err); } /* Validate specified string is a valid proc-title-template */ diff --git a/tests/unit/cluster/announced-endpoints.tcl b/tests/unit/cluster/announced-endpoints.tcl index 5784ea6f512..58643a2a78a 100644 --- a/tests/unit/cluster/announced-endpoints.tcl +++ b/tests/unit/cluster/announced-endpoints.tcl @@ -75,21 +75,26 @@ start_cluster 2 2 {tags {external:skip cluster}} { # Tests for cluster-announce-ip validation test "cluster-announce-ip validation" { + # Reject control characters in IP-like values catch {R 0 config set cluster-announce-ip "192.168.1.100\nnext"} err - assert_match "*valid IPv4 or IPv6*" $err + assert_match "*alphanumeric*" $err catch {R 0 config set cluster-announce-ip "10.0.0.1\ttab"} err - assert_match "*valid IPv4 or IPv6*" $err + assert_match "*alphanumeric*" $err catch {R 0 config set cluster-announce-ip "1.2.3.4\r\n"} err - assert_match "*valid IPv4 or IPv6*" $err + assert_match "*alphanumeric*" $err - catch {R 0 config set cluster-announce-ip "redis-node-1.example.com"} err - assert_match "*valid IPv4 or IPv6*" $err + # Reject control characters in hostname-like values + catch {R 0 config set cluster-announce-ip "redis-node\nnext"} err + assert_match "*alphanumeric*" $err - catch {R 0 config set cluster-announce-ip "192.168.1"} err - assert_match "*valid IPv4 or IPv6*" $err + catch {R 0 config set cluster-announce-ip "redis-node\ttab"} err + assert_match "*alphanumeric*" $err + catch {R 0 config set cluster-announce-ip "redis-node\r\n"} err + assert_match "*alphanumeric*" $err + # Accept valid IPv4 R 0 config set cluster-announce-ip "192.168.1.100" assert_equal "192.168.1.100" [lindex [R 0 config get cluster-announce-ip] 1] @@ -98,6 +103,10 @@ start_cluster 2 2 {tags {external:skip cluster}} { R 0 config set cluster-announce-ip "2001:db8::1" assert_equal "2001:db8::1" [lindex [R 0 config get cluster-announce-ip] 1] + # Accept valid hostname + R 0 config set cluster-announce-ip "redis-node-1.example.com" + assert_equal "redis-node-1.example.com" [lindex [R 0 config get cluster-announce-ip] 1] + # Can be cleared R 0 config set cluster-announce-ip "" assert_equal "" [lindex [R 0 config get cluster-announce-ip] 1] From 3a2672baadcd33d05b5e745e17e890d8f49d7d58 Mon Sep 17 00:00:00 2001 From: Alessio Attilio Date: Tue, 24 Mar 2026 03:22:58 +0100 Subject: [PATCH 03/13] Skip memory prefetch during loading to avoid crash in dictEmpty callback (#14848) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #14838 ## Summary Fix a crash in `prefetchCommands()` that occurs during replica full sync when the replica has existing data that needs to be emptied. ## Problem Description During `emptyData()` → `kvstoreEmpty()` → `dictEmpty()` → `_dictClear()`, the first hash table is cleared and `d->ht_table[0]` is set to NULL via `_dictReset`. Then while clearing the second hash table, every 65536 buckets it invokes `replicationEmptyDbCallback()` → `processEventsWhileBlocked()` → `readQueryFromClient()` → `prefetchCommands()`. At this point, `dictSize() > 0` still holds (because the second hash table isn't fully cleared yet), but `ht_table[0]` is already NULL. The prefetch code assumed `ht_table[0]` is always valid when `dictSize() > 0`, leading to a crash. ## Solution 1. **Skip prefetch during loading**: Added a `server.loading` check at the top of `prefetchCommands()` to return early. During RDB loading, the main dictionary is being rebuilt, so prefetching keys from it is useless anyway. 2. **Add defensive assertion**: Added `serverAssert(batch->current_dicts[i]->ht_table[0])` in `initBatchInfo()` to catch any future cases where `ht_table[0]` is NULL while `dictSize() > 0` (which should only happen mid-`dictEmpty` via `_dictReset`). --------- Co-authored-by: kairosci Co-authored-by: debing.sun Co-authored-by: Yuan Wang (cherry picked from commit 1abd489d0716cbfd68fb1c39e0d1a0bba2dc6fc5) --- src/memory_prefetch.c | 7 +++++- tests/integration/replication.tcl | 40 +++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/src/memory_prefetch.c b/src/memory_prefetch.c index 651312d5bc9..540c2b7bd63 100644 --- a/src/memory_prefetch.c +++ b/src/memory_prefetch.c @@ -161,6 +161,11 @@ static void initBatchInfo(dict **dicts, GetValueDataFunc func) { info->state = PREFETCH_DONE; continue; } + + /* We skip prefetch during loading, so ht_table[0] should never be NULL + * when dictSize() > 0 (which only happens mid-dictEmpty via _dictReset). */ + serverAssert(batch->current_dicts[i]->ht_table[0]); + info->ht_idx = HT_IDX_INVALID; info->current_entry = NULL; info->current_kv = NULL; @@ -327,7 +332,7 @@ int determinePrefetchCount(int len) { * 2. Prefetch the keys and values for all commands in the current batch from * the main dictionaries. */ void prefetchCommands(void) { - if (!batch) return; + if (!batch || server.loading) return; /* Prefetch argv's for all clients */ for (size_t i = 0; i < batch->client_count; i++) { diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl index 038c73a1cbe..0c17b26baae 100644 --- a/tests/integration/replication.tcl +++ b/tests/integration/replication.tcl @@ -1904,3 +1904,43 @@ foreach type {script function} { } } } + +start_server {tags {"repl external:skip"}} { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + + start_server {overrides {io-threads 2}} { + set slave [srv 0 client] + + test {prefetchCommands handles NULL argv and keys during RDB replication with IO threads} { + # Enable diskless sync to trigger RDB streaming during replication + $master config set repl-diskless-sync yes + $master config set repl-diskless-sync-delay 0 + + # Populate keys in the format key:$i with 128-byte values. + $slave debug populate 700000 key 128 + + # Force a full resync by resetting the slave. + set rd [redis_deferring_client 0] + $rd slaveof $master_host $master_port + + # Create a large pipeline command. + set batch_size 1000 + set buf "" + for {set i 0} {$i < $batch_size} {incr i} { + append buf [format_command get key:1] + } + + # Continuously send pipelined commands so that the replica processes + # and prefetches them while it is emptying old data during full sync. + set start_time [clock milliseconds] + while {[clock milliseconds] - $start_time < 5000} { + $rd write $buf + $rd flush + if {[s 0 master_link_status] eq "up"} break + } + $rd close + } + } +} From b73afafb5d5d9a9a5b50d1a6a69d0174e6a5cb8f Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Fri, 21 Nov 2025 22:37:17 +0800 Subject: [PATCH 04/13] Fix min_cgroup_last_id cache not updated when destroying consumer group (#14552) ## Problem When destroying a consumer group with `XGROUP DESTROY`, the cached `min_cgroup_last_id` was not being invalidated. This caused incorrect behavior when using `XDELEX` with the `ACKED` option, as the cache still referenced the destroyed group's `last_id`. ## Solution Invalidate the `min_cgroup_last_id` cache when the destroyed group's `last_id` equals the cached minimum. The cache will be recalculated on the next call to `streamEntryIsReferenced()`. --------- Co-authored-by: guybe7 (cherry picked from commit bb6389e8237723b2c88325a4c4b7853a48db2da2) --- src/t_stream.c | 7 +++++++ tests/unit/type/stream-cgroups.tcl | 27 +++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/src/t_stream.c b/src/t_stream.c index 5d9ad86339b..c403e6c96ed 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -3130,6 +3130,7 @@ static void streamFreeCG(stream *s, streamCG *cg) { /* Destroy a consumer group and clean up all associated references. */ void streamDestroyCG(stream *s, streamCG *cg) { + /* Remove all references from the cgroups_ref. */ raxIterator it; raxStart(&it, cg->pel); raxSeek(&it, "^", NULL, 0); @@ -3139,6 +3140,12 @@ void streamDestroyCG(stream *s, streamCG *cg) { } raxStop(&it); + /* If we're destroying the group with the minimum last_id, the cached + * minimum is no longer valid and needs to be recalculated from the + * remaining groups. */ + if (s->min_cgroup_last_id_valid && streamCompareID(&s->min_cgroup_last_id, &cg->last_id) == 0) + s->min_cgroup_last_id_valid = 0; + streamFreeCG(s, cg); } diff --git a/tests/unit/type/stream-cgroups.tcl b/tests/unit/type/stream-cgroups.tcl index 132b69299e1..caf3d6fc2ed 100644 --- a/tests/unit/type/stream-cgroups.tcl +++ b/tests/unit/type/stream-cgroups.tcl @@ -713,6 +713,33 @@ start_server { assert_equal 0 [r XLEN mystream] } + test {XGROUP DESTROY correctly manage min_cgroup_last_id cache} { + r DEL mystream + # Add some entries + r XADD mystream 1-0 f1 v1 + r XADD mystream 2-0 f2 v2 + r XADD mystream 3-0 f3 v3 + r XADD mystream 4-0 f4 v4 + r XADD mystream 5-0 f5 v5 + + # Create two consumer groups + r XGROUP CREATE mystream group1 1-0 ;# min_cgroup_last_id is 1-0 now + r XGROUP CREATE mystream group2 3-0 + + # Entry 1-0 should be deletable (1-0 <= min_cgroup_last_id and not in any PEL) + assert_equal {1} [r XDELEX mystream ACKED IDS 1 1-0] + + # Entry 2-0 should be referenced (2-0 > 1-0, not yet consumed by all consume groups) + assert_equal {2} [r XDELEX mystream ACKED IDS 1 2-0] + + # Destroy group1 + # min_cgroup_last_id is 3-0 now + r XGROUP DESTROY mystream group1 + + # Entry 2-0 should now be deletable (2-0 < 3-0 and not in any PEL) + assert_equal {1} [r XDELEX mystream ACKED IDS 1 2-0] + } + test {RENAME can unblock XREADGROUP with data} { r del mystream{t} r XGROUP CREATE mystream{t} mygroup $ MKSTREAM From 788e3e219f2906855a12a8fd4cddd0a6d6dfe406 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Mon, 5 Jan 2026 21:17:36 +0800 Subject: [PATCH 05/13] Fix XTRIM/XADD with approx not deletes entries for DELREF/ACKED strategies (#14623) This bug was introduced by #14130 and found by guybe7 When using XTRIM/XADD with approx mode (~) and DELREF/ACKED delete strategies, if a node was eligible for removal but couldn't be removed directly (because consumer group references need to be checked), the code would incorrectly break out of the loop instead of continuing to process entries within the node. This fix allows the per-entry deletion logic to execute for eligible nodes when using non-KEEPREF strategies. (cherry picked from commit 9ca860be9ed6151044e046fd045bca4b5812dd68) --- src/t_stream.c | 11 +++++--- tests/unit/type/stream.tcl | 55 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 3 deletions(-) diff --git a/src/t_stream.c b/src/t_stream.c index c403e6c96ed..4bd8e5bd018 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -702,7 +702,9 @@ typedef struct { int trim_strategy_arg_idx; /* Index of the count in MAXLEN/MINID, for rewriting. */ int delete_strategy; /* DELETE_STRATEGY_* */ int approx_trim; /* If 1 only delete whole radix tree nodes, so - * the trim argument is not applied verbatim. */ + * the trim argument is not applied verbatim. + * Note: This flag is ignored when delete_strategy is non-KEEPREF. + * Individual entries may still be processed for consumer groups. */ long long limit; /* Maximum amount of entries to trim. If 0, no limitation * on the amount of trimming work is enforced. */ /* TRIM_STRATEGY_MAXLEN options */ @@ -810,8 +812,11 @@ int64_t streamTrim(stream *s, streamAddTrimArgs *args) { } /* If we cannot remove a whole element, and approx is true, - * stop here. */ - if (approx) break; + * stop here. However, for non-KEEPREF strategies, if the node was + * eligible for removal but we couldn't remove it (because we need + * to check consumer group references), we should continue to process + * entries within this node. */ + if (approx && delete_strategy == DELETE_STRATEGY_KEEPREF) break; /* Now we have to trim entries from within 'lp' */ size_t oldsize = lpBytes(lp); diff --git a/tests/unit/type/stream.tcl b/tests/unit/type/stream.tcl index 9d888589d6b..1aeeca81d63 100644 --- a/tests/unit/type/stream.tcl +++ b/tests/unit/type/stream.tcl @@ -884,6 +884,61 @@ start_server { assert {[r XTRIM mystream MAXLEN ~ 0 LIMIT 1] == 0} assert {[r XTRIM mystream MAXLEN ~ 0 LIMIT 2] == 2} } + + test {XTRIM with approx and ACKED deletes entries correctly} { + # This test verifies that when using approx trim (~) with ACKED strategy, + # if the first node cannot be removed (has unacked messages), we should + # continue to check subsequent nodes that might be eligible for removal. + r DEL mystream + set origin_max_entries [config_get_set stream-node-max-entries 2] + + # Create 5 entries in 3 nodes (2 entries per node) + r XADD mystream 1-0 f v + r XADD mystream 2-0 f v + r XADD mystream 3-0 f v + r XADD mystream 4-0 f v + r XADD mystream 5-0 f v + + # Create a consumer group and read all messages + r XGROUP CREATE mystream mygroup 0 + r XREADGROUP GROUP mygroup consumer1 STREAMS mystream > + + # Acknowledge messages: 1-0, 2-0 (first node), and 4-0 (second node) + r XACK mystream mygroup 1-0 2-0 4-0 + + # XTRIM MINID ~ 6-0 ACKED should remove: + # Total 3 entries removed (1-0, 2-0, 4-0), 2 unacked entries remain (3-0, 5-0) + assert_equal 3 [r XTRIM mystream MINID ~ 6-0 ACKED] + assert_equal 2 [r XLEN mystream] + assert_equal {{3-0 {f v}} {5-0 {f v}}} [r XRANGE mystream - +] + + r config set stream-node-max-entries $origin_max_entries + } + + test {XTRIM with approx and DELREF deletes entries correctly} { + # Similar test but with DELREF strategy + r DEL mystream + set origin_max_entries [config_get_set stream-node-max-entries 2] + + # Create 4 entries in 2 nodes + r XADD mystream 1-0 f v + r XADD mystream 2-0 f v + r XADD mystream 3-0 f v + r XADD mystream 4-0 f v + + # Create a consumer group and read all messages + r XGROUP CREATE mystream mygroup 0 + r XREADGROUP GROUP mygroup consumer1 STREAMS mystream > + + # With XTRIM MINID ~ 5-0 DELREF, all eligible nodes should be trimmed + # and PEL entries should be cleaned up + assert_equal 4 [r XTRIM mystream MINID ~ 5-0 DELREF] + assert_equal 0 [r XLEN mystream] + # PEL should be empty after DELREF + assert_equal {0 {} {} {}} [r XPENDING mystream mygroup] + + r config set stream-node-max-entries $origin_max_entries + } } start_server {tags {"stream needs:debug"} overrides {appendonly yes}} { From 6848b649cc63395f78d91905dbbffea4ce29f126 Mon Sep 17 00:00:00 2001 From: Stav-Levi <45394834+StavRLevi@users.noreply.github.com> Date: Thu, 8 Jan 2026 08:41:55 +0200 Subject: [PATCH 06/13] Fix ACL key-pattern bypass in MSETEX command (#14659) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MSETEX doesn't properly check ACL key permissions for all keys - only the first key is validated. MSETEX arguments look like: MSETEX key1 val1 key2 val2 ... EX seconds Keys are at every 2nd position (step=2). When Redis extracts keys for ACL checking, it calculates where the last key is: last = first + numkeys - 1; => calculation ignores step last = first + (numkeys-1) * step; With 2 keys starting at position 2: Bug: last = 2 + 2 - 1 = 3 → only checks position 2 Fix: last = 2 + (2-1)*2 = 4 → checks positions 2 and 4 Fixes #14657 (cherry picked from commit 73249497d4d5f9133f519797127e34fee73e4b32) --- src/db.c | 2 +- tests/unit/introspection-2.tcl | 8 ++++++++ tests/unit/type/string.tcl | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/db.c b/src/db.c index 0a772dbf427..db2597d850d 100644 --- a/src/db.c +++ b/src/db.c @@ -3021,7 +3021,7 @@ int getKeysUsingKeySpecs(struct redisCommand *cmd, robj **argv, int argc, int se } first += spec->fk.keynum.firstkey; - last = first + (long)numkeys-1; + last = first + ((long)numkeys - 1) * step; } else { /* unknown spec */ goto invalid_spec; diff --git a/tests/unit/introspection-2.tcl b/tests/unit/introspection-2.tcl index 2d730477806..1e98fdb6a02 100644 --- a/tests/unit/introspection-2.tcl +++ b/tests/unit/introspection-2.tcl @@ -161,6 +161,14 @@ start_server {tags {"introspection"}} { assert_error "ERR Invalid arguments*" {r command getkeysandflags ZINTERSTORE zz 1443677133621497600 asdf} } + test {COMMAND GETKEYSANDFLAGS MSETEX} { + assert_equal {{k1 {OW update}}} [r command getkeysandflags msetex 1 k1 v1 ex 10] + assert_equal {{k1 {OW update}} {k2 {OW update}}} [r command getkeysandflags msetex 2 k1 v1 k2 v2 ex 10] + assert_equal {{k1 {OW update}} {k2 {OW update}} {k3 {OW update}}} [r command getkeysandflags msetex 3 k1 v1 k2 v2 k3 v3 ex 10] + assert_equal {{k1 {OW update}} {k2 {OW update}}} [r command getkeysandflags msetex 2 k1 v1 k2 v2 keepttl] + assert_equal {{k1 {OW update}} {k2 {OW update}}} [r command getkeysandflags msetex 2 k1 v1 k2 v2 ex 10 nx] + } + test {COMMAND GETKEYS MEMORY USAGE} { assert_equal {key} [r command getkeys memory usage key] } diff --git a/tests/unit/type/string.tcl b/tests/unit/type/string.tcl index e49bcca07ed..d0a91351769 100644 --- a/tests/unit/type/string.tcl +++ b/tests/unit/type/string.tcl @@ -311,7 +311,7 @@ start_server {tags {"string"}} { test {MSETEX - error cases} { assert_error {*wrong number of arguments*} {r msetex} assert_error {*invalid numkeys value*} {r msetex key1 val1 ex 10} - assert_error {*wrong number of key-value pairs*} {r msetex 2 key1 val1 key2} + assert_error {*wrong number of key-value pairs*} {r msetex 2 key1{t} val1 key2{t}} assert_error {*syntax error*} {r msetex 1 key1 val1 invalid_flag} } From 7b1d0a41f20e17c15fb324af00f83e6a4a85cc40 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 7 Jan 2026 20:51:41 +0800 Subject: [PATCH 07/13] Fix UBSan error in stream trim when processing last entry (#14669) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary This bus was introduced by https://github.com/redis/redis/pull/14623 Before PR #14623, when a stream node was going to be fully removed, we would just delete the whole node directly instead of iterating through and deleting each entry. Now, with the XTRIM/XADD flags, we have to iterate and delete entries one by one. However, the implementation in issue #8169 didn’t consider the case where all entries are removed, so `p` can end up being NULL. Fixes an UndefinedBehaviorSanitizer error in `streamTrim()` when marking the last entry in a listpack as deleted. The issue occurs when performing pointer arithmetic on a NULL pointer after `lpNext()` reaches the end of the listpack. ## Solution If p is NULL, we skip the delta calculation and the calculation of new `p`. (cherry picked from commit 85ab4cab589daa9c2695ba3d89401d815e069c13) --- src/t_stream.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/t_stream.c b/src/t_stream.c index 4bd8e5bd018..2766761965b 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -888,12 +888,12 @@ int64_t streamTrim(stream *s, streamAddTrimArgs *args) { if (can_delete) { /* Mark the entry as deleted. */ - intptr_t delta = p - lp; + intptr_t delta = p ? (p - lp) : 0; /* p may be NULL if this was the last entry */ flags |= STREAM_ITEM_FLAG_DELETED; lp = lpReplaceInteger(lp, &pcopy, flags); deleted_from_lp++; s->length--; - p = lp + delta; + if (p) p = lp + delta; } } } From 93dafa67cbf8140b94425f1c91432356f0deed7a Mon Sep 17 00:00:00 2001 From: Moti Cohen Date: Tue, 3 Mar 2026 13:56:58 +0200 Subject: [PATCH 08/13] Fix setModuleEnumConfig() to pass unprefixed name to callbacks (#14816) `setModuleEnumConfig()` was passing the prefixed config name to module callbacks instead of the unprefixed name, inconsistent with other config types. Fixed by using getRegisteredConfigName() like bool, numeric, and string configs do. Added assertions to all test module config callbacks to validate correct unprefixed names are received. Issue was introduced by #13656 (cherry picked from commit 8a65b65d63cbf73d0d0713ba34467190f7198f7f) --- src/module.c | 3 +- tests/modules/moduleconfigs.c | 79 +++++++++++++++++++++++------------ 2 files changed, 55 insertions(+), 27 deletions(-) diff --git a/src/module.c b/src/module.c index b3514ea6a69..260983ae182 100644 --- a/src/module.c +++ b/src/module.c @@ -13311,7 +13311,8 @@ int setModuleStringConfig(ModuleConfig *config, sds strval, const char **err) { int setModuleEnumConfig(ModuleConfig *config, int val, const char **err) { RedisModuleString *error = NULL; - int return_code = config->set_fn.set_enum(config->name, val, config->privdata, &error); + char *rname = getRegisteredConfigName(config); + int return_code = config->set_fn.set_enum(rname, val, config->privdata, &error); propagateErrorString(error, err); return return_code == REDISMODULE_OK ? 1 : 0; } diff --git a/tests/modules/moduleconfigs.c b/tests/modules/moduleconfigs.c index 04974f7527e..2c3701537eb 100644 --- a/tests/modules/moduleconfigs.c +++ b/tests/modules/moduleconfigs.c @@ -1,5 +1,7 @@ #include "redismodule.h" +#include #include +#include int mutable_bool_val, no_prefix_bool, no_prefix_bool2; int immutable_bool_val; long long longval, no_prefix_longval; @@ -13,38 +15,37 @@ int flagsval; * to point to the config, and they register the configs as such. Note that one could also just * use names if they wanted, and store anything in privdata. */ int getBoolConfigCommand(const char *name, void *privdata) { - REDISMODULE_NOT_USED(name); + assert(strcmp(name, "mutable_bool") == 0 || strcmp(name, "immutable_bool") == 0); return (*(int *)privdata); } int setBoolConfigCommand(const char *name, int new, void *privdata, RedisModuleString **err) { - REDISMODULE_NOT_USED(name); REDISMODULE_NOT_USED(err); + assert(strcmp(name, "mutable_bool") == 0 || strcmp(name, "immutable_bool") == 0); *(int *)privdata = new; return REDISMODULE_OK; } long long getNumericConfigCommand(const char *name, void *privdata) { - REDISMODULE_NOT_USED(name); + assert(strcmp(name, "numeric") == 0 || strcmp(name, "memory_numeric") == 0); return (*(long long *) privdata); } int setNumericConfigCommand(const char *name, long long new, void *privdata, RedisModuleString **err) { - REDISMODULE_NOT_USED(name); REDISMODULE_NOT_USED(err); + assert(strcmp(name, "numeric") == 0 || strcmp(name, "memory_numeric") == 0); *(long long *)privdata = new; return REDISMODULE_OK; } RedisModuleString *getStringConfigCommand(const char *name, void *privdata) { - REDISMODULE_NOT_USED(name); REDISMODULE_NOT_USED(privdata); + assert(strcmp(name, "string") == 0); return strval; } int setStringConfigCommand(const char *name, RedisModuleString *new, void *privdata, RedisModuleString **err) { - REDISMODULE_NOT_USED(name); - REDISMODULE_NOT_USED(err); REDISMODULE_NOT_USED(privdata); + assert(strcmp(name, "string") == 0); size_t len; if (!strcasecmp(RedisModule_StringPtrLen(new, &len), "rejectisfreed")) { *err = RedisModule_CreateString(NULL, "Cannot set string to 'rejectisfreed'", 36); @@ -57,29 +58,29 @@ int setStringConfigCommand(const char *name, RedisModuleString *new, void *privd } int getEnumConfigCommand(const char *name, void *privdata) { - REDISMODULE_NOT_USED(name); REDISMODULE_NOT_USED(privdata); + assert(strcmp(name, "enum") == 0); return enumval; } int setEnumConfigCommand(const char *name, int val, void *privdata, RedisModuleString **err) { - REDISMODULE_NOT_USED(name); - REDISMODULE_NOT_USED(err); REDISMODULE_NOT_USED(privdata); + REDISMODULE_NOT_USED(err); + assert(strcmp(name, "enum") == 0); enumval = val; return REDISMODULE_OK; } int getFlagsConfigCommand(const char *name, void *privdata) { - REDISMODULE_NOT_USED(name); REDISMODULE_NOT_USED(privdata); + assert(strcmp(name, "flags") == 0); return flagsval; } int setFlagsConfigCommand(const char *name, int val, void *privdata, RedisModuleString **err) { - REDISMODULE_NOT_USED(name); REDISMODULE_NOT_USED(err); REDISMODULE_NOT_USED(privdata); + assert(strcmp(name, "flags") == 0); flagsval = val; return REDISMODULE_OK; } @@ -102,34 +103,60 @@ int longlongApplyFunc(RedisModuleCtx *ctx, void *privdata, RedisModuleString **e return REDISMODULE_ERR; } return REDISMODULE_OK; -} +} RedisModuleString *getStringConfigUnprefix(const char *name, void *privdata) { - REDISMODULE_NOT_USED(name); REDISMODULE_NOT_USED(privdata); + assert(strcmp(name, "unprefix-string") == 0 || strcmp(name, "unprefix.string-alias") == 0); return strval2; } int setStringConfigUnprefix(const char *name, RedisModuleString *new, void *privdata, RedisModuleString **err) { - REDISMODULE_NOT_USED(name); - REDISMODULE_NOT_USED(err); REDISMODULE_NOT_USED(privdata); + REDISMODULE_NOT_USED(err); + assert(strcmp(name, "unprefix-string") == 0 || strcmp(name, "unprefix.string-alias") == 0); if (strval2) RedisModule_FreeString(NULL, strval2); RedisModule_RetainString(NULL, new); strval2 = new; return REDISMODULE_OK; } +int getBoolConfigUnprefix(const char *name, void *privdata) { + assert(strcmp(name, "unprefix-bool") == 0 || strcmp(name, "unprefix-bool-alias") == 0 || + strcmp(name, "unprefix-noalias-bool") == 0); + return (*(int *)privdata); +} + +int setBoolConfigUnprefix(const char *name, int new, void *privdata, RedisModuleString **err) { + REDISMODULE_NOT_USED(err); + assert(strcmp(name, "unprefix-bool") == 0 || strcmp(name, "unprefix-bool-alias") == 0 || + strcmp(name, "unprefix-noalias-bool") == 0); + *(int *)privdata = new; + return REDISMODULE_OK; +} + +long long getNumericConfigUnprefix(const char *name, void *privdata) { + assert(strcmp(name, "unprefix.numeric") == 0 || strcmp(name, "unprefix.numeric-alias") == 0); + return (*(long long *) privdata); +} + +int setNumericConfigUnprefix(const char *name, long long new, void *privdata, RedisModuleString **err) { + REDISMODULE_NOT_USED(err); + assert(strcmp(name, "unprefix.numeric") == 0 || strcmp(name, "unprefix.numeric-alias") == 0); + *(long long *)privdata = new; + return REDISMODULE_OK; +} + int getEnumConfigUnprefix(const char *name, void *privdata) { - REDISMODULE_NOT_USED(name); REDISMODULE_NOT_USED(privdata); + assert(strcmp(name, "unprefix-enum") == 0 || strcmp(name, "unprefix-enum-alias") == 0); return no_prefix_enumval; } int setEnumConfigUnprefix(const char *name, int val, void *privdata, RedisModuleString **err) { - REDISMODULE_NOT_USED(name); - REDISMODULE_NOT_USED(err); REDISMODULE_NOT_USED(privdata); + REDISMODULE_NOT_USED(err); + assert(strcmp(name, "unprefix-enum") == 0 || strcmp(name, "unprefix-enum-alias") == 0); no_prefix_enumval = val; return REDISMODULE_OK; } @@ -222,21 +249,21 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) } /*** unprefixed and aliased configuration ***/ - if (RedisModule_RegisterBoolConfig(ctx, "unprefix-bool|unprefix-bool-alias", 1, REDISMODULE_CONFIG_DEFAULT|REDISMODULE_CONFIG_UNPREFIXED, - getBoolConfigCommand, setBoolConfigCommand, NULL, &no_prefix_bool) == REDISMODULE_ERR) { + if (RedisModule_RegisterBoolConfig(ctx, "unprefix-bool|unprefix-bool-alias", 1, REDISMODULE_CONFIG_DEFAULT|REDISMODULE_CONFIG_UNPREFIXED, + getBoolConfigUnprefix, setBoolConfigUnprefix, NULL, &no_prefix_bool) == REDISMODULE_ERR) { RedisModule_Log(ctx, "warning", "Failed to register unprefix-bool"); return REDISMODULE_ERR; } if (RedisModule_RegisterBoolConfig(ctx, "unprefix-noalias-bool", 1, REDISMODULE_CONFIG_DEFAULT|REDISMODULE_CONFIG_UNPREFIXED, - getBoolConfigCommand, setBoolConfigCommand, NULL, &no_prefix_bool2) == REDISMODULE_ERR) { + getBoolConfigUnprefix, setBoolConfigUnprefix, NULL, &no_prefix_bool2) == REDISMODULE_ERR) { RedisModule_Log(ctx, "warning", "Failed to register unprefix-noalias-bool"); return REDISMODULE_ERR; - } - if (RedisModule_RegisterNumericConfig(ctx, "unprefix.numeric|unprefix.numeric-alias", -1, REDISMODULE_CONFIG_DEFAULT|REDISMODULE_CONFIG_UNPREFIXED, - -5, 2000, getNumericConfigCommand, setNumericConfigCommand, NULL, &no_prefix_longval) == REDISMODULE_ERR) { + } + if (RedisModule_RegisterNumericConfig(ctx, "unprefix.numeric|unprefix.numeric-alias", -1, REDISMODULE_CONFIG_DEFAULT|REDISMODULE_CONFIG_UNPREFIXED, + -5, 2000, getNumericConfigUnprefix, setNumericConfigUnprefix, NULL, &no_prefix_longval) == REDISMODULE_ERR) { RedisModule_Log(ctx, "warning", "Failed to register unprefix.numeric"); return REDISMODULE_ERR; - } + } if (RedisModule_RegisterStringConfig(ctx, "unprefix-string|unprefix.string-alias", "secret unprefix", REDISMODULE_CONFIG_DEFAULT|REDISMODULE_CONFIG_UNPREFIXED, getStringConfigUnprefix, setStringConfigUnprefix, NULL, NULL) == REDISMODULE_ERR) { RedisModule_Log(ctx, "warning", "Failed to register unprefix-string"); From 41c0ca8b3d0a4224eb2a4fcd6ba683cbae02080f Mon Sep 17 00:00:00 2001 From: Vitah Lin Date: Tue, 19 May 2026 18:27:33 +0800 Subject: [PATCH 09/13] Fix diskless replicas drop during rdb pipe test (#15131) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR is based on: valkey-io/valkey#3511 Close https://github.com/redis/redis/issues/14983 During diskless replication, if **any single replica** cannot accept a write (TCP send buffer full / `EAGAIN`), the master stops reading the RDB pipe entirely, stalling data delivery to **all** replicas — including fast ones that are ready to receive data. The failure reason is similar to https://github.com/redis/redis/pull/14946, the socket buffer is more easy to fill. In `rdbPipeReadHandler`, the master reads from the child's RDB pipe and writes to all replica sockets in a loop. When `connWrite` to any replica returns a partial write (socket send buffer full), the handler: 1. Installs a per-replica `rdbPipeWriteHandler` and increments `rdb_pipe_numconns_writing` 2. **Removes the pipe read event** via `aeDeleteFileEvent(server.el, server.rdb_pipe_read, AE_READABLE)`, stopping all pipe reads The pipe read event is only re-enabled when **all** pending write handlers complete (`rdb_pipe_numconns_writing == 0`), meaning the **slowest replica dictates the throughput for all replicas**. With one slow replica (consuming at ~290 KB/s due to `key-load-delay`): - Master bursts ~1.3 MB of RDB data until the slow replica's socket send buffer fills - `rdbPipeReadHandler` disables the pipe read event - **All replicas starve for 4–5 seconds** while the slow replica drains its buffer - Cycle repeats: burst → stall → burst → stall Ultimately, it leads to a very slow synchronization process of the entire master and replica. 1. Skip the entire `diskless replicas drop during rdb pipe` test under Valgrind to avoid timing flakiness on slow env. 2. Move `start_server` inside the `foreach all_drop` loop so each subcase gets a fresh master instead of sharing state across subcases. 3. For `no / slow / fast / all` subcases, replica 0 runs with `key-load-delay 500`, which combined with the blocked-writer TCP back-pressure can stall the RDB-saving child indefinitely; shrink the dataset to ~40 MB so the transfer still exercises the blocked-writer path but completes in reasonable time instead of hanging on the TCP deadlock. For the timeout subcase, replica 0 does not run with `key-load-delay 500`, so to avoid the TCP deadlock we still reduce the dataset somewhat, but keep it larger than the other subcases. Otherwise the kernel TCP send buffer can absorb the whole RDB, and we'd miss the repl_last_partial_write != 0 "(full sync)" timeout path and only hit the "(streaming sync)" path instead. 5. For the `all` subcase, set `rdb-key-save-delay 1000` on the master so the RDB child keeps generating data while both replicas are killed, ensuring the last-replica-drop path is exercised rather than racing with normal completion. 6. Move the slow-replica `pause_process()` so it happens only in the timeout subcase, not after killing replicas, so Redis observes the disconnect promptly in non-timeout flows. 7. In the timeout subcase, set `repl-timeout` 2, wait inline for `*Disconnecting timedout replica (full sync)*`, then restore `repl-timeout` 60 so the remaining replica can finish the streamed RDB. --------- Co-authored-by: Sarthak Aggarwal Co-authored-by: debing.sun (cherry picked from commit 31896140d1e940cad43d725e830cff0e49d060e5) --- tests/integration/replication.tcl | 112 +++++++++++++++++++++--------- 1 file changed, 81 insertions(+), 31 deletions(-) diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl index 0c17b26baae..8d0ad6439b1 100644 --- a/tests/integration/replication.tcl +++ b/tests/integration/replication.tcl @@ -880,27 +880,44 @@ proc compute_cpu_usage {start end} { return [ list $pucpu $pscpu ] } - +if {!$::valgrind} { # test diskless rdb pipe with multiple replicas, which may drop half way -start_server {tags {"repl external:skip tsan:skip"} overrides {save ""}} { - set master [srv 0 client] - $master config set repl-diskless-sync yes - $master config set repl-diskless-sync-delay 5 - $master config set repl-diskless-sync-max-replicas 2 - set master_host [srv 0 host] - set master_port [srv 0 port] - set master_pid [srv 0 pid] - # put enough data in the db that the rdb file will be bigger than the socket buffers - # and since we'll have key-load-delay of 100, 20000 keys will take at least 2 seconds - # we also need the replica to process requests during transfer (which it does only once in 2mb) - $master debug populate 20000 test 10000 - $master config set rdbcompression no - $master config set repl-rdb-channel no - # If running on Linux, we also measure utime/stime to detect possible I/O handling issues - set os [catch {exec uname}] - set measure_time [expr {$os == "Linux"} ? 1 : 0] - foreach all_drop {no slow fast all timeout} { +foreach all_drop {no slow fast all timeout} { + start_server {tags {"repl external:skip tsan:skip"} overrides {save ""}} { + set master [srv 0 client] + $master config set repl-diskless-sync yes + $master config set repl-diskless-sync-delay 5 + $master config set repl-diskless-sync-max-replicas 2 + set master_host [srv 0 host] + set master_port [srv 0 port] + set master_pid [srv 0 pid] + if {$all_drop == "timeout"} { + # Use a larger RDB (~100 MB) so it cannot fit into the kernel TCP + # send buffer (autotuning can absorb tens of MB on some hosts). We + # need the primary to hit the blocked writer path + # (repl_last_partial_write != 0) while the slow replica is paused, + # so the cron triggers the "(full sync)" timeout path instead of + # the replica being moved to ONLINE prematurely and timing out via + # the "(streaming sync)" path. + $master debug populate 10000 test 10000 + } else { + # Put enough data in the db that the RDB is comfortably larger than the + # pipe and socket buffers so the primary can hit the blocked writer path, + # but keep it small enough that slow TLS CI runners don't spend minutes + # draining an oversized transfer (~40 MB uncompressed). + $master debug populate 4000 test 10000 + } + $master config set rdbcompression no + $master config set repl-rdb-channel no + # If running on Linux, we also measure utime/stime to detect possible I/O handling issues + set os [catch {exec uname}] + set measure_time [expr {$os == "Linux"} ? 1 : 0] + test "diskless $all_drop replicas drop during rdb pipe" { + # Reset config that the timeout subcase may change, so a failing + # subcase does not leave the next one with an aggressive timeout. + $master config set repl-timeout 60 + $master config set rdb-key-save-delay 0 set replicas {} set replicas_alive {} # start one replica that will read the rdb fast, and one that will be slow @@ -916,7 +933,25 @@ start_server {tags {"repl external:skip tsan:skip"} overrides {save ""}} { # so that the whole rdb generation process is bound to that set loglines [count_log_lines -2] [lindex $replicas 0] config set repl-diskless-load swapdb - [lindex $replicas 0] config set key-load-delay 100 ;# 20k keys and 100 microseconds sleep means at least 2 seconds + [lindex $replicas 1] config set repl-diskless-load swapdb + if {$all_drop == "all"} { + # Keep the RDB child generating data long enough for + # both replicas to be killed before the pipe reaches + # EOF, so this subcase still covers the last-replica + # drop path instead of racing with normal completion. + $master config set rdb-key-save-delay 1000 + } + # For non-timeout subcases, use key-load-delay to keep + # replica 0 as a steady slow reader for the entire RDB + # transfer. This keeps the expected diskless pipe code + # paths covered without accepting alternate log outcomes. + if {$all_drop != "timeout"} { + # 4k keys with 500 microseconds each keeps replica 0 + # slow for about 2 seconds, which is long enough to + # fill the pipe without turning the transfer into a + # multi-minute TLS run. + [lindex $replicas 0] config set key-load-delay 500 + } [lindex $replicas 0] replicaof $master_host $master_port [lindex $replicas 1] replicaof $master_host $master_port @@ -930,9 +965,16 @@ start_server {tags {"repl external:skip tsan:skip"} overrides {save ""}} { set start_time [clock seconds] } - # wait a while so that the pipe socket writer will be - # blocked on write (since replica 0 is slow to read from the socket) - after 500 + if {$all_drop != "timeout"} { + # key-load-delay is already throttling the slow + # replica; just wait for the pipe to fill. + after 500 + } else { + # For the timeout subcase, stop the slow reader so it + # reaches repl-timeout during full sync. + pause_process [srv -1 pid] + after 500 + } # add some command to be present in the command stream after the rdb. $master incr $all_drop @@ -947,14 +989,17 @@ start_server {tags {"repl external:skip tsan:skip"} overrides {save ""}} { set replicas_alive [lreplace $replicas_alive 0 0] } if {$all_drop == "timeout"} { + # Let one replica hit repl-timeout while the slow reader + # is paused, then restore a generous timeout so the + # remaining replica can finish the streamed RDB. $master config set repl-timeout 2 - # we want the slow replica to hang on a key for very long so it'll reach repl-timeout - pause_process [srv -1 pid] - after 2000 + wait_for_log_messages -2 {"*Disconnecting timedout replica (full sync)*"} $loglines 200 100 + $master config set repl-timeout 60 } - # wait for rdb child to exit - wait_for_condition 500 100 { + # Use a single generous budget for all subcases; successful + # runs still exit early once the child is done. + wait_for_condition 5000 100 { [s -2 rdb_bgsave_in_progress] == 0 } else { fail "rdb child didn't terminate" @@ -971,7 +1016,6 @@ start_server {tags {"repl external:skip tsan:skip"} overrides {save ""}} { wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 1 replicas still up*"} $loglines 1 1 } if {$all_drop == "timeout"} { - wait_for_log_messages -2 {"*Disconnecting timedout replica (full sync)*"} $loglines 1 1 wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 1 replicas still up*"} $loglines 1 1 # master disconnected the slow replica, remove from array set replicas_alive [lreplace $replicas_alive 0 0] @@ -995,18 +1039,23 @@ start_server {tags {"repl external:skip tsan:skip"} overrides {save ""}} { assert {$master_utime < 70} assert {$master_stime < 70} } - if {!$::no_latency && ($all_drop == "none" || $all_drop == "fast")} { + if {!$::no_latency && ($all_drop == "no" || $all_drop == "fast")} { assert {$master_utime < 15} assert {$master_stime < 15} } } + # In the "no" case both replicas stay alive through the + # full streamed RDB, so on slow TLS runners the final + # ONLINE transition can lag behind child exit. + set replica_online_wait_tries [expr {$all_drop == "no" ? 600 : 150}] + # verify the data integrity foreach replica $replicas_alive { # Wait that replicas acknowledge they are online so # we are sure that DBSIZE and DEBUG DIGEST will not # fail because of timing issues. - wait_for_condition 150 100 { + wait_for_condition $replica_online_wait_tries 100 { [lindex [$replica role] 3] eq {connected} } else { fail "replicas still not connected after some time" @@ -1031,6 +1080,7 @@ start_server {tags {"repl external:skip tsan:skip"} overrides {save ""}} { } } } +} ;# end of valgrind test "diskless replication child being killed is collected" { # when diskless master is waiting for the replica to become writable From ef72e671c83fca1f57883ec034dc5c8427204090 Mon Sep 17 00:00:00 2001 From: RoyBenMoshe <112958533+RoyBenMoshe@users.noreply.github.com> Date: Tue, 25 Nov 2025 09:30:43 +0200 Subject: [PATCH 10/13] SCAN: restore original filter order (#14537) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In #14121, the SCAN filters order was changed, before #14121the order was - pattern, expiration and type, after #14121pattern became last, this break change broke the original behavior, which will cause scan with pattern also to remove the expired keys. This PR reorders the filters to be consistent with the original behavior and extends a test to cover this scenario. (cherry picked from commit 39200596f48e44067021474adeefeeccf3dc3a13) --- src/db.c | 28 +++++++++++++++------------- tests/unit/scan.tcl | 27 +++++++++++++++------------ 2 files changed, 30 insertions(+), 25 deletions(-) diff --git a/src/db.c b/src/db.c index db2597d850d..6d4ad439190 100644 --- a/src/db.c +++ b/src/db.c @@ -1503,9 +1503,22 @@ void scanCallback(void *privdata, const dictEntry *de, dictEntryLink plink) { /* o and typename can not have values at the same time. */ serverAssert(!((data->type != LLONG_MAX) && o)); + kvobj *kv = NULL; if (!o) { /* If scanning keyspace */ - kvobj *kv = dictGetKV(de); - + kv = dictGetKV(de); + keyStr = kvobjGetKey(kv); + } else { + keyStr = dictGetKey(de); + } + + /* Filter element if it does not match the pattern. */ + if (data->pattern) { + if (!stringmatchlen(data->pattern, sdslen(data->pattern), keyStr, data->strlen(keyStr), 0)) { + return; + } + } + + if (!o) { /* Expiration check first - only for database keyspace scanning. * Use kv obj to avoid robj creation. */ if (expireIfNeeded(data->db, NULL, kv, 0) != KEY_VALID) @@ -1520,17 +1533,6 @@ void scanCallback(void *privdata, const dictEntry *de, dictEntryLink plink) { if (!objectTypeCompare(kv, data->type)) return; } - - keyStr = kvobjGetKey(kv); - } else { - keyStr = dictGetKey(de); - } - - /* Filter element if it does not match the pattern. */ - if (data->pattern) { - if (!stringmatchlen(data->pattern, sdslen(data->pattern), keyStr, data->strlen(keyStr), 0)) { - return; - } } if (o == NULL) { diff --git a/tests/unit/scan.tcl b/tests/unit/scan.tcl index 8ab14d3b161..6a092cb4e95 100644 --- a/tests/unit/scan.tcl +++ b/tests/unit/scan.tcl @@ -164,25 +164,29 @@ proc test_scan {type} { r debug set-active-expire 1 } {OK} {needs:debug} - test "{$type} SCAN with expired keys with TYPE filter" { + test "{$type} SCAN with expired keys with TYPE filter and PATTERN filter" { r flushdb # make sure that passive expiration is triggered by the scan r debug set-active-expire 0 populate 1000 - r set foo bar - r pexpire foo 1 + r set key:foo bar + r pexpire key:foo 1 # add a hash type key - r hset hash f v - r pexpire hash 1 + r hset key:hash f v + r pexpire key:hash 1 + + # add a pattern key + r set boo far + r pexpire boo 1 after 2 set cur 0 set keys {} while 1 { - set res [r scan $cur type "string" count 10] + set res [r scan $cur type "string" match key* count 10] set cur [lindex $res 0] set k [lindex $res 1] lappend keys {*}$k @@ -191,12 +195,11 @@ proc test_scan {type} { assert_equal 1000 [llength $keys] - # make sure that expired key have been removed by scan command - assert_equal 1000 [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] - # TODO: uncomment in redis 8.0 - # make sure that only the expired key in the type match will been removed by scan command - #assert_equal 1001 [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] - + # make sure that expired key have been removed by scan command, + # pattern check before expired so key filtered by pattern will not be removed + # but expiration check is before type check so key:foo and key:hash will be removed + assert_equal 1001 [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + r debug set-active-expire 1 } {OK} {needs:debug} From 2a9542ccc8449ae95d13d162be5cadc93f799c4c Mon Sep 17 00:00:00 2001 From: lihp <113838121+show1999@users.noreply.github.com> Date: Sat, 22 Nov 2025 11:52:31 +0800 Subject: [PATCH 11/13] Fixes an issue where EXEC checks ACL during AOF loading (#14545) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR fixes an issue(#14541) where EXEC’s ACL recheck was still being performed during AOF loading, that may cause AOF loading failed, if ACL rules are changed and don't allow some commands in MULTI-EXEC. (cherry picked from commit 0288d70820095491c0cf7d9d89a7e6062a6418f2) --- src/multi.c | 8 +++++++- tests/integration/aof.tcl | 29 +++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/multi.c b/src/multi.c index dd372031b62..cd8783d20e7 100644 --- a/src/multi.c +++ b/src/multi.c @@ -179,6 +179,9 @@ void execCommand(client *c) { c->all_argv_len_sum = c->mstate.argv_len_sums; + /* Skip ACL check for the AOF client while server loading. */ + int skip_acl_check = server.loading && c->id == CLIENT_ID_AOF; + addReplyArrayLen(c,c->mstate.count); for (j = 0; j < c->mstate.count; j++) { c->argc = c->mstate.commands[j]->argc; @@ -189,7 +192,10 @@ void execCommand(client *c) { /* ACL permissions are also checked at the time of execution in case * they were changed after the commands were queued. */ int acl_errpos; - int acl_retval = ACLCheckAllPerm(c,&acl_errpos); + int acl_retval = ACL_OK; + if (!skip_acl_check) { + acl_retval = ACLCheckAllPerm(c,&acl_errpos); + } if (acl_retval != ACL_OK) { char *reason; switch (acl_retval) { diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index d9a9e866070..48127268f28 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -374,6 +374,35 @@ tags {"aof external:skip"} { } } + test {skip EXEC ACL check during AOF load} { + set user_acl "default on nopass ~* &* +@read -@write +multi +exec +select +ping" + + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n" + } + + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand set beforetx beforetx] + append_to_aof [formatCommand multi] + append_to_aof [formatCommand set tx1 tx1] + append_to_aof [formatCommand set tx2 tx2] + append_to_aof [formatCommand exec] + append_to_aof [formatCommand set aftertx aftertx] + } + + start_server_aof [list dir $server_path user $user_acl] { + set c [redis [srv host] [srv port] 0 $::tls] + wait_done_loading $c + assert_equal {beforetx} [$c get beforetx] + assert_equal {aftertx} [$c get aftertx] + assert_equal {tx1} [$c get tx1] + assert_equal {tx2} [$c get tx2] + + catch {$c set newkey value} e + assert_match {*NOPERM*set*} $e + } + } + # redis could load AOF which has timestamp annotations inside create_aof $aof_dirpath $aof_file { append_to_aof "#TS:1628217470\r\n" From 0ffb114beac6dd18efdca842bfd4e59c09450ee0 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Thu, 27 Nov 2025 07:59:11 +0200 Subject: [PATCH 12/13] Fix rare server hang at shutdown (#14581) In case we have to kill an rdb child at shutdown, we wait for the child process to exit, and then resume with the shutodwn, and we did not clear the child_pid variable, since we're going to terminate anyway. but if the shutdown is then aborted due to another issue further down that function, we will try to kill that child again, and the waitpid will never get released. Reproduced in the test "SHUTDOWN can proceed if shutdown command was with nosave" (cherry picked from commit f56e0115fbce73b012fe4ccce7368a67017b911c) --- src/server.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/server.c b/src/server.c index 1018ff31c5a..bb9793d973b 100644 --- a/src/server.c +++ b/src/server.c @@ -4826,6 +4826,7 @@ int finishShutdown(void) { * The temp rdb file fd may won't be closed when redis exits quickly, * but OS will close this fd when process exits. */ rdbRemoveTempFile(server.child_pid, 0); + resetChildState(); } /* Kill module child if there is one. */ From f68bcd2a84b7d8b7f44d7b33b934b9973cdf288a Mon Sep 17 00:00:00 2001 From: "h.o.t. neglected" Date: Sun, 10 May 2026 22:53:18 -0400 Subject: [PATCH 13/13] Fix MULTI queue memory accounting in multiStateMemOverhead (#15163) PR #14440 changed `mstate.commands` from an array of `multiCmd` structs to an array of `pendingCommand` pointers. This PR fixes the overhead calculation in multiStateMemOverhead to account for both the pointer array and the actual structs: - The pointer array: `alloc_count * sizeof(pendingCommand*)` - The individually allocated structs: `count * sizeof(pendingCommand)` (cherry picked from commit 9302d2788936eea4710792a499d7ce43cfec8806) --- src/multi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/multi.c b/src/multi.c index cd8783d20e7..2b900bcbd14 100644 --- a/src/multi.c +++ b/src/multi.c @@ -504,6 +504,7 @@ size_t multiStateMemOverhead(client *c) { /* Add watched keys overhead, Note: this doesn't take into account the watched keys themselves, because they aren't managed per-client. */ mem += listLength(c->watched_keys) * (sizeof(listNode) + sizeof(watchedKey)); /* Reserved memory for queued multi commands. */ - mem += c->mstate.alloc_count * sizeof(pendingCommand); + mem += c->mstate.alloc_count * sizeof(pendingCommand*); + mem += c->mstate.count * sizeof(pendingCommand); return mem; }