From d677fc2193c37821d6a3d0066485217255720547 Mon Sep 17 00:00:00 2001 From: Vitah Lin Date: Fri, 12 Dec 2025 06:50:26 +0800 Subject: [PATCH 01/12] Upgrade macos version in actions (#2920) GitHub has deprecated older macOS runners, and macos-13 is no longer supported. 1. The latest version of cross-platform-actions/action does allow running on ubuntu-latest (Linux runner) and does not strictly require macOS. 2. Previously, cross-platform-actions/action@v0.22.0 used runs-on: macos-13. I checked the latest version of cross-platform-actions, and the official examples now use runs-on: ubuntu. I think we can switch from macOS to Ubuntu. --------- Signed-off-by: Vitah Lin --- .github/workflows/daily.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index 573287ab649..a7e85f67aec 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -1201,7 +1201,7 @@ jobs: strategy: fail-fast: false matrix: - os: [macos-13, macos-14] + os: [macos-14] runs-on: ${{ matrix.os }} if: | (github.event_name == 'workflow_dispatch' || @@ -1230,7 +1230,7 @@ jobs: run: make SERVER_CFLAGS='-Werror' test-freebsd: - runs-on: macos-13 + runs-on: ubuntu-latest if: | (github.event_name == 'workflow_dispatch' || (github.event_name == 'schedule' && github.repository == 'valkey-io/valkey') || @@ -1248,7 +1248,7 @@ jobs: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} - name: test - uses: cross-platform-actions/action@5800fa0060a22edf69992a779adac3d2bb3a6f8a # v0.22.0 + uses: cross-platform-actions/action@46e8d7fb25520a8d6c64fd2b7a1192611da98eda # v0.30.0 with: operating_system: freebsd environment_variables: MAKE From 1a1acf523c82a9fd2dd565c54e86abdb2750c3b9 Mon Sep 17 00:00:00 2001 From: Patrik Hermansson Date: Thu, 8 Jan 2026 19:27:00 +0100 Subject: [PATCH 02/12] Trigger prepareCommand on argc change in module command filters (#2945) Previously, only changes to argv[0] were considered when deciding whether to re-prepare the command and recompute the cluster slot. This caused issues where changes to the argument count (argc) would not trigger the necessary prepare in case additional keys were injected to the command This fix adds a check to ensure that changes to argc are properly considered. Signed-off-by: Patrik Hermansson --- src/module.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/module.c b/src/module.c index 4daa3a5864d..d4c6ae6a377 100644 --- a/src/module.c +++ b/src/module.c @@ -11022,8 +11022,10 @@ void moduleCallCommandFilters(client *c) { ValkeyModuleCommandFilterCtx filter = {.argv = c->argv, .argv_len = c->argv_len, .argc = c->argc, .c = c}; - robj *tmp = c->argv[0]; - incrRefCount(tmp); + robj *pre_filter_command = c->argv[0]; + incrRefCount(pre_filter_command); + const int pre_filter_argc = c->argc; + while ((ln = listNext(&li))) { ValkeyModuleCommandFilter *f = ln->value; @@ -11036,15 +11038,21 @@ void moduleCallCommandFilters(client *c) { f->callback(&filter); } + /* Apply filter output */ c->argv = filter.argv; c->argv_len = filter.argv_len; c->argc = filter.argc; - if (tmp != c->argv[0]) { + + /* If filter changed the command or number of arguments, redo prepareCommand */ + const bool command_changed = (c->argv[0] != pre_filter_command); + const bool argc_changed = (c->argc != pre_filter_argc); + + if (command_changed || argc_changed) { /* Reset and lookup the command and cluster slot again. */ unprepareCommand(c); prepareCommand(c); } - decrRefCount(tmp); + decrRefCount(pre_filter_command); } /* Return the number of arguments a filtered command has. The number of From 2ff3a073450332026b346b2c14c6f19aceb10409 Mon Sep 17 00:00:00 2001 From: cjx-zar <56825069+cjx-zar@users.noreply.github.com> Date: Fri, 26 Dec 2025 10:20:39 +0800 Subject: [PATCH 03/12] Restrict ttl from being negative and avoid crash in import-mode (#2944) When import-mode is yes, we might be able to set an expired TTL. At the same time, commands like EXPIREAT/EXPIRE do not restrict TTL from being negative. After we set import-mode to no, server will crash at: ``` int activeExpireCycleTryExpire(serverDb *db, robj *val, long long now, int didx) { long long t = objectGetExpire(val); serverAssert(t >= 0); ``` In this case, we restrict ttl from being negative in expireGenericCommand, we simply change the expiration time to 0 to mark the key as expired since in import-mode, the import-source client can always read the expired keys anyway. import-mode was introduced in #1185 --------- Signed-off-by: cjx-zar --- src/expire.c | 7 +++++++ tests/unit/expire.tcl | 16 ++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/src/expire.c b/src/expire.c index cc452185300..e1dc40a04b4 100644 --- a/src/expire.c +++ b/src/expire.c @@ -781,6 +781,13 @@ void expireGenericCommand(client *c, long long basetime, int unit) { return; } when += basetime; + /* A negative expiration time should cause a key to expire and be deleted immediately. + * However, in some cases (such as import-mode), we might need to pause expiration, + * and we don't want keys with negative expiration times (could cause a crash during active expiration). + * Therefore, we simply change the expiration time to 0 to mark the key as expired. */ + if (when < 0) { + when = 0; + } robj *obj = lookupKeyWrite(c->db, key); diff --git a/tests/unit/expire.tcl b/tests/unit/expire.tcl index 58a906425e1..db11a3bf2e7 100644 --- a/tests/unit/expire.tcl +++ b/tests/unit/expire.tcl @@ -949,6 +949,22 @@ start_server {tags {"expire"}} { fail "Keys did not actively expire." } } + + test {Negative ttl will not cause server to crash when import mode is on} { + r flushall + r config set import-mode yes + r set foo bar + r expireat foo -1 + r set foo1 bar + r expireat foo1 -10000 + assert_equal [r dbsize] 2 + r config set import-mode no + wait_for_condition 30 100 { + [r dbsize] == 0 + } else { + fail "key wasn't expired" + } + } } start_server {tags {expire} overrides {hz 100}} { From c881df5cc19e99b14b4df393b727e0314052d7b4 Mon Sep 17 00:00:00 2001 From: Binbin Date: Mon, 29 Dec 2025 10:20:24 +0800 Subject: [PATCH 04/12] Fix chained replica crash when doing dual channel replication (#2983) There is a crash in freeReplicationBacklog: ``` Discarding previously cached primary state. ASSERTION FAILED 'listLength(server.replicas) == 0' is not true freeReplicationBacklog ``` The reason is that during dual channel operation, the RDB channel is protected. In the chained replica case, `disconnectReplicas` is called to disconnect all replica clients, but since the RDB channel is protected, `freeClient` does not actually free the replica client. Later, we encounter an assertion failure in `freeReplicationBacklog`. ``` void replicationAttachToNewPrimary(void) { /* Replica starts to apply data from new primary, we must discard the cached * primary structure. */ serverAssert(server.primary == NULL); replicationDiscardCachedPrimary(); /* Cancel any in progress imports (we will now use the primary's) */ clusterCleanSlotImportsOnFullSync(); disconnectReplicas(); /* Force our replicas to resync with us as well. */ freeReplicationBacklog(); /* Don't allow our chained replicas to PSYNC. */ } ``` Dual channel replication was introduced in #60. Signed-off-by: Binbin --- src/networking.c | 5 +- .../integration/dual-channel-replication.tcl | 67 +++++++++++++++++++ 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index dfa4fa902d1..de957811e72 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1856,7 +1856,10 @@ void disconnectReplicas(void) { listNode *ln; listRewind(server.replicas, &li); while ((ln = listNext(&li))) { - freeClient((client *)ln->value); + client *replica = (client *)ln->value; + /* If we are going to disconnect all replicas, there is no need to protect the rdb channel. */ + replica->flag.protected_rdb_channel = 0; + freeClient(replica); } } diff --git a/tests/integration/dual-channel-replication.tcl b/tests/integration/dual-channel-replication.tcl index a271289367b..d0188db5d8e 100644 --- a/tests/integration/dual-channel-replication.tcl +++ b/tests/integration/dual-channel-replication.tcl @@ -1369,3 +1369,70 @@ start_server {tags {"dual-channel-replication external:skip"}} { } } } + +# Test Sequence: +# 1. replica -> primary. +# 2. Replica initiates synchronization via RDB channel. +# 3. Primary's main process is suspended. +# 4. Replica completes RDB loading and pauses before establishing PSYNC connection. +# 5. Primary resumes operation and detects closed RDB channel. +# 6. Primary protects the RDB channel, maintains the RDB channel. +# 7. replica -> primary -> new_primary +# 8. Starting a new server, set up a chained replica. +# 9. The primary completes sync from the new_primary server and disconnects all replica +# clients (including the RDB channel). +# 10. Make sure that the primary does not assert. +# 11. Replica resumes operation, check the replication is working correctly. +test "Chained replicas can disconnect protected RDB channel client when using dual channel replication" { + start_server {tags {"dual-channel-replication external:skip"}} { + set primary [srv 0 client] + set primary_host [srv 0 host] + set primary_port [srv 0 port] + + $primary config set repl-diskless-sync yes + $primary config set dual-channel-replication-enabled yes + $primary debug pause-after-fork 1 + $primary debug delay-rdb-client-free-seconds 60 + + start_server {} { + set replica [srv 0 client] + set replica_pid [srv 0 pid] + + $replica config set dual-channel-replication-enabled yes + + # The primary will protect the replica's RDB channel. + set loglines [count_log_lines 0] + $replica replicaof $primary_host $primary_port + wait_for_log_messages 0 {"*Done loading RDB*"} $loglines 1000 50 + pause_process $replica_pid + wait_and_resume_process -1 + $primary debug pause-after-fork 0 + wait_for_condition 1000 50 { + [string match {*replicas_waiting_psync:1*} [$primary info replication]] + } else { + fail "Primary freed RDB client before psync was established" + } + + start_server {} { + set new_primary [srv 0 client] + set new_primary_host [srv 0 host] + set new_primary_port [srv 0 port] + + # Doing the chained replica, make sure it won't assert. + set loglines [count_log_lines -2] + $primary replicaof $new_primary_host $new_primary_port + wait_for_log_messages -2 {"*Done loading RDB*"} $loglines 1000 50 + + # Check the replication is working correctly. + resume_process $replica_pid + $new_primary set foo bar + wait_for_condition 1000 50 { + [$primary get foo] eq "bar" && + [$replica get foo] eq "bar" + } else { + fail "Chained replicas did not sync" + } + } + } + } +} From 2317dd6f2edf95abe18ff746b99c50945fb10156 Mon Sep 17 00:00:00 2001 From: Aditya Teltia <67232537+AdityaTeltia@users.noreply.github.com> Date: Sat, 10 Jan 2026 06:24:51 +0530 Subject: [PATCH 05/12] Skip slot cache optimization for AOF client to prevent key duplication and data corruption (#3004) When loading AOF in cluster mode, keys inside a MULTI/EXEC block could be inserted into wrong hash slots, causing key duplication and data corruption. The root cause was the slot caching optimization in getKeySlot(). This optimization reuses a cached slot value to avoid recalculating the hash for every key operation. However, when replaying AOF, a transaction may contain commands affecting keys in different slots. The cached slot from a previous command (e.g., SET k1) would incorrectly be used for subsequent commands in the transaction (e.g., SET k0), causing k0 to be stored in k1's slot. The existing code already skipped this optimization for replicated clients (commands from primary) using isReplicatedClient(). This change extends that to also skip for AOF clients by using mustObeyClient() instead, which covers both replicated clients and the AOF client. Fixes #2995, introduced in #1949. Signed-off-by: aditya.teltia --- src/db.c | 4 ++-- tests/unit/multi.tcl | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/db.c b/src/db.c index e412b3bd270..49dbbfea351 100644 --- a/src/db.c +++ b/src/db.c @@ -258,10 +258,10 @@ int getKeySlot(sds key) { * the key slot would fallback to keyHashSlot. * * Modules and scripts executed on the primary may get replicated as multi-execs that operate on multiple slots, - * so we must always recompute the slot for commands coming from the primary. + * so we must always recompute the slot for commands coming from the primary or AOF. */ if (server.current_client && server.current_client->slot >= 0 && server.current_client->flag.executing_command && - !isReplicatedClient(server.current_client)) { + !mustObeyClient(server.current_client)) { debugServerAssertWithInfo(server.current_client, NULL, (int)keyHashSlot(key, (int)sdslen(key)) == server.current_client->slot); return server.current_client->slot; diff --git a/tests/unit/multi.tcl b/tests/unit/multi.tcl index 0c7b86deb96..818414378ef 100644 --- a/tests/unit/multi.tcl +++ b/tests/unit/multi.tcl @@ -992,3 +992,23 @@ start_cluster 1 0 {tags {"external:skip cluster"}} { assert_equal [R 0 EXEC] {1 FIZZ} } } + +start_cluster 1 0 {tags {"external:skip cluster needs:debug"} overrides {appendonly yes}} { + test "Regression test for #2995: MULTI/EXEC with different slot keys should not duplicate on AOF reload" { + R 0 SET k0 a + R 0 BGREWRITEAOF + waitForBgrewriteaof [srv 0 client] + + R 0 SET k1 b + R 0 MULTI + R 0 SET k0 c + R 0 APPEND k0 d + assert_equal [R 0 EXEC] {OK 2} + + R 0 DEBUG LOADAOF + + assert_equal [llength [R 0 KEYS *]] 2 + assert_equal [R 0 GET k0] "cd" + assert_equal [R 0 GET k1] "b" + } +} From c70ff1c96ec383e5b93872eee85ff705373697ce Mon Sep 17 00:00:00 2001 From: bpint Date: Thu, 15 Jan 2026 15:46:07 +0800 Subject: [PATCH 06/12] Fix used_memory_dataset underflow due to miscalculated used_memory_overhead (#3005) The metric `used_memory_dataset` turned into an insanely large number close to 2^64 (actually overflowed negative value), as reported in #2994. ## Double-Counted database memory When server starts, the global variable `server.initial_memory_usage` is used to record a memory baseline in InitServerLast. This `server.initial_memory_usage` has clearly included initial database memory, since databases are created in initServer. In function getMemoryOverheadData, the `mem_total` is firstly assigned the baseline, which includes initial database memory. And then all extra memory usage of databases are added to mem_total. The initial database memory are therefore counted TWICE. This eventually caused wrongly larger `used_memory_overhead`. For a database with only a couple of keys, the `used_memory_overhead` is easily larger than `used_memory` and causes an overflowed `used_memory_dataset`. ## Missed Empty Databases In function getMemoryOverheadData(), kvstores without any allocated hashtable are ignored from calculation: ```c if (db == NULL || !kvstoreNumAllocatedHashtables(db->keys)) continue; ``` However, even the kvstore has no allocated hashtable, there are still some memory allocated by kvstoreCreate(), including `hashtable_size_index`, which can be larger than 128 KiB. On the contrary, this caused wrongly smaller `used_memory_overhead` for an empty database. When we insert only ONE key to the database, the database is suddenly taken into account, and `used_memory_overhead` will increase (for `used_memory_dataset` decrease) by more than 128 KiB due to the single key insertion. Signed-off-by: Ace Breakpoint Signed-off-by: bpint Signed-off-by: Madelyn Olson Co-authored-by: Binbin Co-authored-by: Madelyn Olson --- src/object.c | 2 +- src/server.c | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/object.c b/src/object.c index 41a7bd50cd0..56a03405675 100644 --- a/src/object.c +++ b/src/object.c @@ -1380,7 +1380,7 @@ struct serverMemOverhead *getMemoryOverheadData(void) { for (j = 0; j < server.dbnum; j++) { serverDb *db = server.db[j]; - if (db == NULL || !kvstoreNumAllocatedHashtables(db->keys)) continue; + if (db == NULL) continue; unsigned long long keyscount = kvstoreSize(db->keys); diff --git a/src/server.c b/src/server.c index bec96243375..3ab02405b6e 100644 --- a/src/server.c +++ b/src/server.c @@ -3135,7 +3135,13 @@ void InitServerLast(void) { bioInit(); initIOThreads(); set_jemalloc_bg_thread(server.jemalloc_bg_thread); - server.initial_memory_usage = zmalloc_used_memory(); + + /* First set initial_memory_usage to zero as baseline for getMemoryOverheadData(). */ + server.initial_memory_usage = 0; + struct serverMemOverhead *mh = getMemoryOverheadData(); + /* Exclude current overhead memory to avoid double counting in the future. */ + server.initial_memory_usage = zmalloc_used_memory() - mh->overhead_total; + freeMemoryOverheadData(mh); } /* The purpose of this function is to try to "glue" consecutive range From 6faa95779132a116df3deaca691f6f8f81a04dd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Viktor=20S=C3=B6derqvist?= Date: Wed, 8 Oct 2025 11:37:15 +0200 Subject: [PATCH 07/12] Allow TCL 9.0 for tests (#1673) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Makes our tests possible to run with TCL 9. The latest Fedora now has TCL 9.0 and it's working now, including the TCL TLS package. (This wasn't working earlier due to some packaging errors for TCL packages in Fedora, which have been fixed now.) This PR also removes the custom compilation of TCL 8 used in our Daily jobs and uses the system default TCL version instead. The TCL version depends on the OS. For the latest Fedora, you get 9.0, for macOS you get 8.5 and for most other OSes you get 8.6. The checks for TCL 8.7 are removed, because 8.7 doesn't exist. It was never released. --------- Signed-off-by: Viktor Söderqvist --- .github/actions/rpm-distros-tcl8/action.yml | 31 --------------------- .github/workflows/daily.yml | 18 ++++++------ runtest | 2 +- runtest-cluster | 2 +- runtest-moduleapi | 2 +- runtest-sentinel | 2 +- tests/instances.tcl | 4 +-- tests/support/cluster.tcl | 1 - tests/support/response_transformers.tcl | 2 -- tests/support/valkey.tcl | 1 - tests/test_helper.tcl | 8 ++---- tests/unit/other.tcl | 2 +- 12 files changed, 17 insertions(+), 58 deletions(-) delete mode 100644 .github/actions/rpm-distros-tcl8/action.yml diff --git a/.github/actions/rpm-distros-tcl8/action.yml b/.github/actions/rpm-distros-tcl8/action.yml deleted file mode 100644 index b43b3cc3278..00000000000 --- a/.github/actions/rpm-distros-tcl8/action.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: 'Install TCL8' -description: 'Installs tcl8 and tcltls from source on Fedora-based or uses the system package on others.' - -inputs: - matrix_name: - description: 'The name of the matrix to check for fedora' - required: true - -runs: - using: "composite" - steps: - # Fedora 42 comes with Tcl 9 by default, but tcltls is currently incompatible with Tcl 9. - # As a workaround, we install Tcl 8 and manually build tcltls 1.7.22 from source. - # Once tcltls adds support for Tcl 9, this logic can be removed and system packages used instead. - - run: | - if [[ "${{ inputs.matrix_name }}" =~ "fedora" ]]; then - dnf -y install tcl8 tcl8-devel gcc make awk openssl openssl-devel - ln -s /usr/bin/tclsh8.6 /usr/bin/tclsh - curl -LO https://core.tcl-lang.org/tcltls/uv/tcltls-1.7.22.tar.gz - tar -xzf tcltls-1.7.22.tar.gz - pushd tcltls-1.7.22 - ./configure --with-tcl=/usr/lib64/tcl8.6 - make - mkdir -p /usr/lib64/tcl8.6/tls1.7.22 - cp tcltls.so pkgIndex.tcl /usr/lib64/tcl8.6/tls1.7.22/ - popd - else - dnf -y install tcl tcltls - fi - ./utils/gen-test-certs.sh - shell: bash diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index a7e85f67aec..a2d4196c16f 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -947,12 +947,10 @@ jobs: run: dnf -y install epel-release - name: make run: | - dnf -y install gcc make procps-ng openssl-devel openssl which /usr/bin/kill /usr/bin/awk + dnf -y install gcc make procps-ng which /usr/bin/kill /usr/bin/awk make -j SERVER_CFLAGS='-Werror' - name: testprep - uses: ./.github/actions/rpm-distros-tcl8 - with: - matrix_name: ${{ matrix.name }} + run: dnf -y install tcl tcltls - name: test if: true && !contains(github.event.inputs.skiptests, 'valkey') run: ./runtest ${{ github.event_name != 'pull_request' && '--accurate' || '' }} --verbose --dump-logs ${{github.event.inputs.test_args}} @@ -1018,9 +1016,9 @@ jobs: dnf -y install make gcc openssl-devel openssl procps-ng which /usr/bin/kill /usr/bin/awk make -j BUILD_TLS=module SERVER_CFLAGS='-Werror' - name: testprep - uses: ./.github/actions/rpm-distros-tcl8 - with: - matrix_name: ${{ matrix.name }} + run: | + dnf -y install tcl tcltls + ./utils/gen-test-certs.sh - name: test if: true && !contains(github.event.inputs.skiptests, 'valkey') run: | @@ -1090,9 +1088,9 @@ jobs: dnf -y install make gcc openssl-devel openssl procps-ng which /usr/bin/kill /usr/bin/awk make -j BUILD_TLS=module SERVER_CFLAGS='-Werror' - name: testprep - uses: ./.github/actions/rpm-distros-tcl8 - with: - matrix_name: ${{ matrix.name }} + run: | + dnf -y install tcl tcltls + ./utils/gen-test-certs.sh - name: test if: true && !contains(github.event.inputs.skiptests, 'valkey') run: | diff --git a/runtest b/runtest index 00949bc304d..f0c4c422dcf 100755 --- a/runtest +++ b/runtest @@ -1,5 +1,5 @@ #!/bin/sh -TCL_VERSIONS="8.5 8.6 8.7" +TCL_VERSIONS="8.5 8.6 9.0" TCLSH="" for VERSION in $TCL_VERSIONS; do diff --git a/runtest-cluster b/runtest-cluster index 85e8690b1cc..de31e728732 100755 --- a/runtest-cluster +++ b/runtest-cluster @@ -1,5 +1,5 @@ #!/bin/sh -TCL_VERSIONS="8.5 8.6 8.7" +TCL_VERSIONS="8.5 8.6 9.0" TCLSH="" for VERSION in $TCL_VERSIONS; do diff --git a/runtest-moduleapi b/runtest-moduleapi index 460ee59460d..990e3a5fdad 100755 --- a/runtest-moduleapi +++ b/runtest-moduleapi @@ -1,5 +1,5 @@ #!/bin/sh -TCL_VERSIONS="8.5 8.6 8.7" +TCL_VERSIONS="8.5 8.6 9.0" TCLSH="" [ -z "$MAKE" ] && MAKE=make diff --git a/runtest-sentinel b/runtest-sentinel index 3bbe9af3c83..c2889e9d6ad 100755 --- a/runtest-sentinel +++ b/runtest-sentinel @@ -1,5 +1,5 @@ #!/bin/sh -TCL_VERSIONS="8.5 8.6 8.7" +TCL_VERSIONS="8.5 8.6 9.0" TCLSH="" for VERSION in $TCL_VERSIONS; do diff --git a/tests/instances.tcl b/tests/instances.tcl index 76d9e14bcb9..265fa054a3e 100644 --- a/tests/instances.tcl +++ b/tests/instances.tcl @@ -7,8 +7,6 @@ # This software is released under the BSD License. See the COPYING file for # more information. -package require Tcl 8.5 - set tcl_precision 17 source ../support/valkey.tcl source ../support/util.tcl @@ -293,7 +291,7 @@ proc parse_options {} { incr j set ::host ${val} } elseif {$opt eq {--tls} || $opt eq {--tls-module}} { - package require tls 1.6 + package require tls ::tls::init \ -cafile "$::tlsdir/ca.crt" \ -certfile "$::tlsdir/client.crt" \ diff --git a/tests/support/cluster.tcl b/tests/support/cluster.tcl index f9d4792d7bf..0c26310a5ca 100644 --- a/tests/support/cluster.tcl +++ b/tests/support/cluster.tcl @@ -9,7 +9,6 @@ # $c get foo # $c close -package require Tcl 8.5 package provide valkey_cluster 0.1 namespace eval valkey_cluster {} diff --git a/tests/support/response_transformers.tcl b/tests/support/response_transformers.tcl index 26a4e0f1774..da3d31912ce 100644 --- a/tests/support/response_transformers.tcl +++ b/tests/support/response_transformers.tcl @@ -14,8 +14,6 @@ # changes in many files) we decided to transform the response to RESP2 # when running with --force-resp3 -package require Tcl 8.5 - namespace eval response_transformers {} # Transform a map response into an array of tuples (tuple = array with 2 elements) diff --git a/tests/support/valkey.tcl b/tests/support/valkey.tcl index a5d1e2ca0a4..65e65fe057a 100644 --- a/tests/support/valkey.tcl +++ b/tests/support/valkey.tcl @@ -25,7 +25,6 @@ # # vwait forever -package require Tcl 8.5 package provide valkey 0.1 source [file join [file dirname [info script]] "response_transformers.tcl"] diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index 5ca9c6ba4c8..e0b98737bbf 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -2,8 +2,6 @@ # This software is released under the BSD License. See the COPYING file for # more information. -package require Tcl 8.5 - set tcl_precision 17 source tests/support/valkey.tcl source tests/support/aofmanifest.tcl @@ -419,7 +417,7 @@ proc test_server_cron {} { } proc accept_test_clients {fd addr port} { - fconfigure $fd -encoding binary + fconfigure $fd -translation binary fileevent $fd readable [list read_from_test_client $fd] } @@ -620,7 +618,7 @@ proc the_end {} { # to read the command, execute, reply... all this in a loop. proc test_client_main server_port { set ::test_server_fd [socket localhost $server_port] - fconfigure $::test_server_fd -encoding binary + fconfigure $::test_server_fd -translation binary send_data_packet $::test_server_fd ready [pid] while 1 { set bytes [gets $::test_server_fd] @@ -755,7 +753,7 @@ for {set j 0} {$j < [llength $argv]} {incr j} { } elseif {$opt eq {--io-threads}} { set ::io_threads 1 } elseif {$opt eq {--tls} || $opt eq {--tls-module}} { - package require tls 1.6 + package require tls set ::tls 1 ::tls::init \ -cafile "$::tlsdir/ca.crt" \ diff --git a/tests/unit/other.tcl b/tests/unit/other.tcl index 4a802eed357..46f4b1b7bbe 100644 --- a/tests/unit/other.tcl +++ b/tests/unit/other.tcl @@ -216,7 +216,7 @@ start_server {tags {"other"}} { } else { set fd2 [socket [srv host] [srv port]] } - fconfigure $fd2 -encoding binary -translation binary + fconfigure $fd2 -translation binary if {!$::singledb} { puts -nonewline $fd2 "SELECT 9\r\n" flush $fd2 From bd15cc178653434929bf832b75d28b9edcb4249c Mon Sep 17 00:00:00 2001 From: Binbin Date: Wed, 14 Jan 2026 11:25:52 +0800 Subject: [PATCH 08/12] Avoid duplicate calculations of network-bytes-out in slot stats with copy-avoidance (#3046) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In releaseBufReferences(), clusterSlotStatsAddNetworkBytesOutForSlot() is called in the IO thread after writing the reply to the client, and (since #2652) it's also done in the normal code path in the main thread (afterCommand() -> clusterSlotStatsAddNetworkBytesOutForUserClient()). This means the output bytes are recorded twice in the cluster slot stats. Fixes #3043. Bug was introduced in #2652 (9.0.1). Signed-off-by: Binbin Co-authored-by: Viktor Söderqvist --- src/cluster_slot_stats.h | 2 -- src/networking.c | 8 +++++++- tests/unit/cluster/slot-stats.tcl | 23 ++++++++++++++++++++++- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/cluster_slot_stats.h b/src/cluster_slot_stats.h index f5c103e9ed5..29f15cc85be 100644 --- a/src/cluster_slot_stats.h +++ b/src/cluster_slot_stats.h @@ -14,8 +14,6 @@ void clusterSlotStatsInvalidateSlotIfApplicable(scriptRunCtx *ctx); /* network-bytes-in metric. */ void clusterSlotStatsAddNetworkBytesInForUserClient(client *c); -void clusterSlotStatsSetClusterMsgLength(uint32_t len); -void clusterSlotStatsResetClusterMsgLength(void); /* network-bytes-out metric. */ void clusterSlotStatsAddNetworkBytesOutForSlot(int slot, unsigned long long net_bytes_out); diff --git a/src/networking.c b/src/networking.c index de957811e72..db259a627f1 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2758,7 +2758,13 @@ static void releaseBufReferences(char *buf, size_t bufpos) { ptr += sizeof(payloadHeader); if (header->payload_type == BULK_STR_REF) { - clusterSlotStatsAddNetworkBytesOutForSlot(header->slot, header->reply_len); + /* Here clusterSlotStatsAddNetworkBytesOutForSlot() is called in + * the IO thread after writing the reply to the client, and after + * #2652 it is also done in the normal code path in the main thread + * (afterCommand() -> clusterSlotStatsAddNetworkBytesOutForUserClient()). + * Before we come up with a better solution (see #2652), we need to + * comment it out to avoid the duplicate calculations. */ + /* clusterSlotStatsAddNetworkBytesOutForSlot(header->slot, header->reply_len); */ bulkStrRef *str_ref = (bulkStrRef *)ptr; size_t len = header->payload_len; diff --git a/tests/unit/cluster/slot-stats.tcl b/tests/unit/cluster/slot-stats.tcl index 2034a51d97d..70757e3b26c 100644 --- a/tests/unit/cluster/slot-stats.tcl +++ b/tests/unit/cluster/slot-stats.tcl @@ -525,7 +525,7 @@ start_cluster 1 0 {tags {external:skip cluster}} { # +OK\r\n --> 5 bytes R 0 GET $key - # $3\r\nvalue\r\n -> 11 bytes + # $5\r\nvalue\r\n -> 11 bytes set expected_slot_stats [ dict create $key_slot [ @@ -538,6 +538,27 @@ start_cluster 1 0 {tags {external:skip cluster}} { R 0 CONFIG RESETSTAT R 0 FLUSHALL + test "CLUSTER SLOT-STATS network-bytes-out, for slot specific commands, for reply copy avoidance" { + set copy_avoid [lindex [R 0 config get min-string-size-avoid-copy-reply] 1] + R 0 config set min-string-size-avoid-copy-reply 1 + + set value [string repeat A 1024] ;# Make sure it is a RAW string + R 0 set $key $value ;# +OK\r\n --> 5 bytes + R 0 get $key ;# $1024\r\nAA..AA\r\n -> 1033 bytes + + set expected_slot_stats [ + dict create $key_slot [ + dict create network-bytes-out 1038 + ] + ] + set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE $key_slot $key_slot] + assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert + + R 0 config set min-string-size-avoid-copy-reply $copy_avoid + } + R 0 CONFIG RESETSTAT + R 0 FLUSHALL + test "CLUSTER SLOT-STATS network-bytes-out, blocking commands." { set rd [valkey_deferring_client] $rd BLPOP $key 0 From 9a670bad8c81eac0bdc7d959731da98b3299aed3 Mon Sep 17 00:00:00 2001 From: Diego Ciciani Date: Thu, 22 Jan 2026 11:17:10 +0100 Subject: [PATCH 09/12] Fix XREAD returning error on empty stream with + ID (#2742) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using XREAD STREAMS + on an empty stream created with MKSTREAM, valkey returns an error instead of nil. This happens because is missing a check on the stream length. The fix adds the length check on the condition. Fixes #2728 Signed-off-by: diego-ciciani01 Signed-off-by: Viktor Söderqvist Co-authored-by: Viktor Söderqvist --- src/t_stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/t_stream.c b/src/t_stream.c index b953b84eba9..68e7caa3db1 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -2275,7 +2275,7 @@ void xreadCommand(client *c) { "just return an empty result set."); goto cleanup; } - if (o) { + if (o && ((stream *)(o->ptr))->length) { stream *s = o->ptr; ids[id_idx] = s->last_id; if (streamDecrID(&ids[id_idx]) != C_OK) { From 755f9b94c758e9c604e84a619b37d8b518334545 Mon Sep 17 00:00:00 2001 From: Daniil Kashapov Date: Mon, 26 Jan 2026 23:47:23 +0500 Subject: [PATCH 10/12] Perf: Track net bytes only if commandlog-request-larger-than != -1 (#3086) Currently after #2652 in copy avoidance path we unconditionally track `c->net_output_bytes_curr_cmd` even when `commandlog-request-larger-than -1`. This PR provides ability to skip that accounting in copy avoidance path based on config value. If `commandlog-request-larger-than -1` then performance is same as before #2652. Also added tracking of `c->net_output_bytes_curr_cmd` in IO thread if it is not tracked in main thread based on decision made in main thread. Read Performance (write performance is the same): ``` With this change: Summary: throughput summary: 2191732.75 requests per second latency summary (msec): avg min p50 p95 p99 max 1.720 0.072 1.743 1.919 2.647 23.983 Unstable: Summary: throughput summary: 1658197.25 requests per second latency summary (msec): avg min p50 p95 p99 max 2.299 0.120 2.343 2.503 3.319 4.791 Config: commandlog-request-larger-than -1 ^^ without this performance is just like on unstable branch databases 1 save "" appendonly no rdbcompression no activedefrag no maxclients 1000 io-threads 9 protected-mode no hz 10 maxmemory 2gb ``` --------- Signed-off-by: Daniil Kashapov --- src/networking.c | 66 ++++++++++++++++++++----------- tests/unit/cluster/slot-stats.tcl | 52 ++++++++++++++++++++++++ tests/unit/commandlog.tcl | 32 +++++++++++++++ valkey.conf | 3 ++ 4 files changed, 129 insertions(+), 24 deletions(-) diff --git a/src/networking.c b/src/networking.c index db259a627f1..ae0a316ce2a 100644 --- a/src/networking.c +++ b/src/networking.c @@ -113,10 +113,12 @@ typedef enum { * The packed attribute is specified because buffer is accessed at arbitrary offsets, * so no benefit in data structure padding and applying packed saves the space in the buffer */ typedef struct __attribute__((__packed__)) payloadHeader { - size_t payload_len; /* payload length in a reply buffer */ - size_t reply_len; /* actual reply length for non-plain payloads */ - uint8_t payload_type; /* one of payloadType */ - int16_t slot; /* to report network-bytes-out for BULK_STR_REF chunks */ + size_t payload_len; /* payload length in a reply buffer */ + size_t reply_len; /* actual reply length for non-plain payloads */ + int16_t slot; /* to report network-bytes-out for BULK_STR_REF chunks */ + uint8_t payload_type : 1; /* one of payloadType */ + uint8_t track_bytes : 1; /* 1 if net bytes tracking was enabled when reply was added */ + uint8_t reserved : 6; } payloadHeader; /* To avoid copy of whole string in reply buffer @@ -506,7 +508,14 @@ void deleteCachedResponseClient(client *recording_client) { /* Updates an existing header, if possible; otherwise inserts a new one * Returns the length of data that can be added to the reply buffer (i.e. min(available, requested)) */ -static size_t upsertPayloadHeader(char *buf, size_t *bufpos, payloadHeader **last_header, uint8_t type, size_t len, int slot, size_t available) { +static size_t upsertPayloadHeader(char *buf, + size_t *bufpos, + payloadHeader **last_header, + uint8_t type, + size_t len, + int slot, + int track_bytes, + size_t available) { /* Enforce min len for BULK_STR_REF chunks as whole pointers must be written to the buffer */ size_t min_len = (type == BULK_STR_REF ? len : 1); if (min_len > available) return 0; @@ -516,7 +525,8 @@ static size_t upsertPayloadHeader(char *buf, size_t *bufpos, payloadHeader **las if (!clusterSlotStatsEnabled(slot)) slot = -1; /* Try to add payload to last chunk if possible */ - if (*last_header != NULL && (*last_header)->payload_type == type && (*last_header)->slot == slot) { + if (*last_header != NULL && (*last_header)->payload_type == type && (*last_header)->slot == slot && + (*last_header)->track_bytes == track_bytes) { (*last_header)->payload_len += allowed_len; return allowed_len; } @@ -533,6 +543,8 @@ static size_t upsertPayloadHeader(char *buf, size_t *bufpos, payloadHeader **las (*last_header)->payload_len = allowed_len; (*last_header)->slot = slot; (*last_header)->reply_len = 0; + (*last_header)->track_bytes = track_bytes; + (*last_header)->reserved = 0; *bufpos += sizeof(payloadHeader); @@ -556,7 +568,8 @@ static size_t _addReplyPayloadToBuffer(client *c, const void *payload, size_t le size_t available = c->buf_usable_size - c->bufpos; size_t reply_len = min(available, len); if (c->flag.buf_encoded) { - reply_len = upsertPayloadHeader(c->buf, &c->bufpos, &c->last_header, payload_type, len, c->slot, available); + int track_bytes = (server.commandlog[COMMANDLOG_TYPE_LARGE_REQUEST].threshold != -1); + reply_len = upsertPayloadHeader(c->buf, &c->bufpos, &c->last_header, payload_type, len, c->slot, track_bytes, available); } if (!reply_len) return 0; @@ -606,7 +619,8 @@ static void _addReplyPayloadToList(client *c, list *reply_list, const char *payl size_t copy = avail >= len ? len : avail; if (tail->flag.buf_encoded) { - copy = upsertPayloadHeader(tail->buf, &tail->used, &tail->last_header, payload_type, len, c->slot, avail); + int track_bytes = (server.commandlog[COMMANDLOG_TYPE_LARGE_REQUEST].threshold != -1); + copy = upsertPayloadHeader(tail->buf, &tail->used, &tail->last_header, payload_type, len, c->slot, track_bytes, avail); } else if (encoded) { /* If encoded buffer is required but tail is unencoded then pretend nothing can be added to it * and, as consequence, cause addition of a new tail */ @@ -634,7 +648,8 @@ static void _addReplyPayloadToList(client *c, list *reply_list, const char *payl tail->flag.buf_encoded = encoded; tail->last_header = NULL; if (tail->flag.buf_encoded) { - upsertPayloadHeader(tail->buf, &tail->used, &tail->last_header, payload_type, len, c->slot, tail->size); + int track_bytes = (server.commandlog[COMMANDLOG_TYPE_LARGE_REQUEST].threshold != -1); + upsertPayloadHeader(tail->buf, &tail->used, &tail->last_header, payload_type, len, c->slot, track_bytes, tail->size); } memcpy(tail->buf + tail->used, payload, len); tail->used += len; @@ -1392,14 +1407,17 @@ static int tryAvoidBulkStrCopyToReply(client *c, robj *obj) { /* Add an Object as a bulk reply */ void addReplyBulk(client *c, robj *obj) { if (tryAvoidBulkStrCopyToReply(c, obj) == C_OK) { - /* If copy avoidance allowed, then we explicitly maintain net_output_bytes_curr_cmd. */ - serverAssert(obj->encoding == OBJ_ENCODING_RAW); - size_t str_len = sdslen(obj->ptr); - uint32_t num_len = digits10(str_len); - /* RESP encodes bulk strings as $\r\n\r\n */ - c->net_output_bytes_curr_cmd += (num_len + 3); /* $\r\n */ - c->net_output_bytes_curr_cmd += str_len; /* */ - c->net_output_bytes_curr_cmd += 2; /* \r\n */ + /* If copy avoidance allowed, then we explicitly maintain net_output_bytes_curr_cmd. + * We determine per-reply if tracking is enabled by checking the config in the main thread. */ + if (server.commandlog[COMMANDLOG_TYPE_LARGE_REQUEST].threshold != -1) { + serverAssert(obj->encoding == OBJ_ENCODING_RAW); + size_t str_len = sdslen(obj->ptr); + uint32_t num_len = digits10(str_len); + /* RESP encodes bulk strings as $\r\n\r\n */ + c->net_output_bytes_curr_cmd += (num_len + 3); /* $\r\n */ + c->net_output_bytes_curr_cmd += str_len; /* */ + c->net_output_bytes_curr_cmd += 2; /* \r\n */ + } return; } addReplyBulkLen(c, obj); @@ -2758,13 +2776,13 @@ static void releaseBufReferences(char *buf, size_t bufpos) { ptr += sizeof(payloadHeader); if (header->payload_type == BULK_STR_REF) { - /* Here clusterSlotStatsAddNetworkBytesOutForSlot() is called in - * the IO thread after writing the reply to the client, and after - * #2652 it is also done in the normal code path in the main thread - * (afterCommand() -> clusterSlotStatsAddNetworkBytesOutForUserClient()). - * Before we come up with a better solution (see #2652), we need to - * comment it out to avoid the duplicate calculations. */ - /* clusterSlotStatsAddNetworkBytesOutForSlot(header->slot, header->reply_len); */ + /* When net byte tracking was disabled in the main thread (commandlog-request-larger-than -1) + * at the time this reply was added, we account for cluster slot stats here in the IO thread + * after writing the reply. When tracking was enabled, it's already accounted in the main thread + * via afterCommand() -> clusterSlotStatsAddNetworkBytesOutForUserClient(). */ + if (!header->track_bytes) { + clusterSlotStatsAddNetworkBytesOutForSlot(header->slot, header->reply_len); + } bulkStrRef *str_ref = (bulkStrRef *)ptr; size_t len = header->payload_len; diff --git a/tests/unit/cluster/slot-stats.tcl b/tests/unit/cluster/slot-stats.tcl index 70757e3b26c..c5adc3e9041 100644 --- a/tests/unit/cluster/slot-stats.tcl +++ b/tests/unit/cluster/slot-stats.tcl @@ -996,3 +996,55 @@ start_cluster 1 1 {tags {external:skip cluster} overrides {cluster-slot-stats-en R 0 CONFIG RESETSTAT R 1 CONFIG RESETSTAT } + +start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} { + set key "testslotbytes" + set key_slot [R 0 cluster keyslot $key] + set metrics_to_assert [list network-bytes-out] + + test "CLUSTER SLOT-STATS network-bytes-out with copy avoidance and commandlog disabled" { + set copy_avoid [lindex [R 0 config get min-string-size-avoid-copy-reply] 1] + R 0 config set min-string-size-avoid-copy-reply 1 + + # Disable commandlog tracking + R 0 config set commandlog-request-larger-than -1 + R 0 config resetstat + + set value [string repeat A 2048] + R 0 set $key $value + + # Reset stats after SET to only measure GET reply + R 0 config resetstat + + # Get should use copy avoidance path + R 0 get $key + + # Verify cluster slot stats tracked the bytes correctly + # Even though commandlog tracking is disabled, cluster slot stats should work + # via IO thread accounting in releaseBufReferences() + set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] + set slot_stats [convert_array_into_dict $slot_stats] + + set network_bytes_out [dict get $slot_stats $key_slot network-bytes-out] + + # For 2048 bytes: $2048\r\n\r\n = 2057 bytes + assert_equal $network_bytes_out 2057 + + # Re-enable commandlog + R 0 config set commandlog-request-larger-than 1024 + R 0 config resetstat + + # Get should still track correctly + R 0 get $key + + set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] + set slot_stats [convert_array_into_dict $slot_stats] + set network_bytes_out [dict get $slot_stats $key_slot network-bytes-out] + + assert_equal $network_bytes_out 2057 + + # Cleanup + R 0 del $key + R 0 config set min-string-size-avoid-copy-reply $copy_avoid + } +} diff --git a/tests/unit/commandlog.tcl b/tests/unit/commandlog.tcl index c4c26bb5e91..d7223763d0a 100644 --- a/tests/unit/commandlog.tcl +++ b/tests/unit/commandlog.tcl @@ -376,4 +376,36 @@ start_server {tags {"commandlog"} overrides {commandlog-execution-slower-than 10 assert_equal {test-client} [lindex $ping_cmd 5] } } + + test {COMMANDLOG large-reply - byte tracking with copy avoidance} { + set copy_avoid [lindex [r config get min-string-size-avoid-copy-reply] 1] + r config set min-string-size-avoid-copy-reply 1 + + # Disable request tracking + r config set commandlog-request-larger-than -1 + r commandlog reset large-reply + + set value [string repeat A 2048] + r set testkey $value + + # Should not be logged + r get testkey + assert_equal [r commandlog len large-reply] 0 + + # Enable tracking + r config set commandlog-request-larger-than 1024 + r commandlog reset large-reply + + # Get the value again, should be tracked + r get testkey + assert_equal [r commandlog len large-reply] 1 + set e [lindex [r commandlog get -1 large-reply] 0] + assert_equal [lindex $e 3] {get testkey} + # For 2048 bytes: $2048\r\n\r\n = 2057 + assert_equal [lindex $e 2] 2057 + + # Cleanup + r config set min-string-size-avoid-copy-reply $copy_avoid + r del testkey + } } diff --git a/valkey.conf b/valkey.conf index 1f6fe56d7e8..44eb6d34d54 100644 --- a/valkey.conf +++ b/valkey.conf @@ -2122,6 +2122,9 @@ commandlog-slow-execution-max-len 128 # # The threshold is measured in bytes. # Note that -1 disables the large request log, while 0 forces logging of every command. +# Enabling this feature (values other than -1) has performance implications +# when I/O threads are used due to additional tracking overhead. +# Consider using -1 to disable if large request monitoring is not needed. commandlog-request-larger-than 1048576 # Record the number of commands. # There is no limit to this length. Just be aware that it will consume memory. From 9f049d3bfe31f7d2bede5e1fbf148bd3441ad1a9 Mon Sep 17 00:00:00 2001 From: Daniil Kashapov Date: Thu, 29 Jan 2026 19:46:13 +0500 Subject: [PATCH 11/12] Fix mixup of reply-larger-than and request-larger-than (#3126) This change amends #3086 in which `commandlog-request-larger-than` was mixed up with `commandlog-reply-larger-than` while checking for config values and in tests. --------- Signed-off-by: Daniil Kashapov --- src/networking.c | 10 +++++----- tests/unit/cluster/slot-stats.tcl | 4 ++-- tests/unit/commandlog.tcl | 6 +++--- valkey.conf | 6 +++--- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/networking.c b/src/networking.c index ae0a316ce2a..b2103bae40d 100644 --- a/src/networking.c +++ b/src/networking.c @@ -568,7 +568,7 @@ static size_t _addReplyPayloadToBuffer(client *c, const void *payload, size_t le size_t available = c->buf_usable_size - c->bufpos; size_t reply_len = min(available, len); if (c->flag.buf_encoded) { - int track_bytes = (server.commandlog[COMMANDLOG_TYPE_LARGE_REQUEST].threshold != -1); + int track_bytes = (server.commandlog[COMMANDLOG_TYPE_LARGE_REPLY].threshold != -1); reply_len = upsertPayloadHeader(c->buf, &c->bufpos, &c->last_header, payload_type, len, c->slot, track_bytes, available); } if (!reply_len) return 0; @@ -619,7 +619,7 @@ static void _addReplyPayloadToList(client *c, list *reply_list, const char *payl size_t copy = avail >= len ? len : avail; if (tail->flag.buf_encoded) { - int track_bytes = (server.commandlog[COMMANDLOG_TYPE_LARGE_REQUEST].threshold != -1); + int track_bytes = (server.commandlog[COMMANDLOG_TYPE_LARGE_REPLY].threshold != -1); copy = upsertPayloadHeader(tail->buf, &tail->used, &tail->last_header, payload_type, len, c->slot, track_bytes, avail); } else if (encoded) { /* If encoded buffer is required but tail is unencoded then pretend nothing can be added to it @@ -648,7 +648,7 @@ static void _addReplyPayloadToList(client *c, list *reply_list, const char *payl tail->flag.buf_encoded = encoded; tail->last_header = NULL; if (tail->flag.buf_encoded) { - int track_bytes = (server.commandlog[COMMANDLOG_TYPE_LARGE_REQUEST].threshold != -1); + int track_bytes = (server.commandlog[COMMANDLOG_TYPE_LARGE_REPLY].threshold != -1); upsertPayloadHeader(tail->buf, &tail->used, &tail->last_header, payload_type, len, c->slot, track_bytes, tail->size); } memcpy(tail->buf + tail->used, payload, len); @@ -1409,7 +1409,7 @@ void addReplyBulk(client *c, robj *obj) { if (tryAvoidBulkStrCopyToReply(c, obj) == C_OK) { /* If copy avoidance allowed, then we explicitly maintain net_output_bytes_curr_cmd. * We determine per-reply if tracking is enabled by checking the config in the main thread. */ - if (server.commandlog[COMMANDLOG_TYPE_LARGE_REQUEST].threshold != -1) { + if (server.commandlog[COMMANDLOG_TYPE_LARGE_REPLY].threshold != -1) { serverAssert(obj->encoding == OBJ_ENCODING_RAW); size_t str_len = sdslen(obj->ptr); uint32_t num_len = digits10(str_len); @@ -2776,7 +2776,7 @@ static void releaseBufReferences(char *buf, size_t bufpos) { ptr += sizeof(payloadHeader); if (header->payload_type == BULK_STR_REF) { - /* When net byte tracking was disabled in the main thread (commandlog-request-larger-than -1) + /* When net byte tracking was disabled in the main thread (commandlog-reply-larger-than -1) * at the time this reply was added, we account for cluster slot stats here in the IO thread * after writing the reply. When tracking was enabled, it's already accounted in the main thread * via afterCommand() -> clusterSlotStatsAddNetworkBytesOutForUserClient(). */ diff --git a/tests/unit/cluster/slot-stats.tcl b/tests/unit/cluster/slot-stats.tcl index c5adc3e9041..ee5d2c28d10 100644 --- a/tests/unit/cluster/slot-stats.tcl +++ b/tests/unit/cluster/slot-stats.tcl @@ -1007,7 +1007,7 @@ start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-en R 0 config set min-string-size-avoid-copy-reply 1 # Disable commandlog tracking - R 0 config set commandlog-request-larger-than -1 + R 0 config set commandlog-reply-larger-than -1 R 0 config resetstat set value [string repeat A 2048] @@ -1031,7 +1031,7 @@ start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-en assert_equal $network_bytes_out 2057 # Re-enable commandlog - R 0 config set commandlog-request-larger-than 1024 + R 0 config set commandlog-reply-larger-than 1024 R 0 config resetstat # Get should still track correctly diff --git a/tests/unit/commandlog.tcl b/tests/unit/commandlog.tcl index d7223763d0a..5fda2f0e111 100644 --- a/tests/unit/commandlog.tcl +++ b/tests/unit/commandlog.tcl @@ -381,8 +381,8 @@ start_server {tags {"commandlog"} overrides {commandlog-execution-slower-than 10 set copy_avoid [lindex [r config get min-string-size-avoid-copy-reply] 1] r config set min-string-size-avoid-copy-reply 1 - # Disable request tracking - r config set commandlog-request-larger-than -1 + # Disable reply tracking + r config set commandlog-reply-larger-than -1 r commandlog reset large-reply set value [string repeat A 2048] @@ -393,7 +393,7 @@ start_server {tags {"commandlog"} overrides {commandlog-execution-slower-than 10 assert_equal [r commandlog len large-reply] 0 # Enable tracking - r config set commandlog-request-larger-than 1024 + r config set commandlog-reply-larger-than 1024 r commandlog reset large-reply # Get the value again, should be tracked diff --git a/valkey.conf b/valkey.conf index 44eb6d34d54..9c615215ca3 100644 --- a/valkey.conf +++ b/valkey.conf @@ -2122,9 +2122,6 @@ commandlog-slow-execution-max-len 128 # # The threshold is measured in bytes. # Note that -1 disables the large request log, while 0 forces logging of every command. -# Enabling this feature (values other than -1) has performance implications -# when I/O threads are used due to additional tracking overhead. -# Consider using -1 to disable if large request monitoring is not needed. commandlog-request-larger-than 1048576 # Record the number of commands. # There is no limit to this length. Just be aware that it will consume memory. @@ -2139,6 +2136,9 @@ commandlog-large-request-max-len 128 # # The threshold is measured in bytes. # Note that -1 disables the large reply log, while 0 forces logging of every command. +# Enabling this feature (values other than -1) has performance implications +# when I/O threads are used due to additional tracking overhead. +# Consider using -1 to disable if large reply monitoring is not needed. commandlog-reply-larger-than 1048576 # Record the number of commands. # There is no limit to this length. Just be aware that it will consume memory. From 529dad64395ddc6474dc830994b1200466d9d610 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Viktor=20S=C3=B6derqvist?= Date: Thu, 29 Jan 2026 16:22:11 +0100 Subject: [PATCH 12/12] Release notes and version file for 9.0.2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Viktor Söderqvist --- 00-RELEASENOTES | 30 ++++++++++++++++++++++++++++++ src/version.h | 4 ++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/00-RELEASENOTES b/00-RELEASENOTES index 3ee74b2be15..8982b8f48a5 100644 --- a/00-RELEASENOTES +++ b/00-RELEASENOTES @@ -11,6 +11,36 @@ Upgrade urgency levels: | CRITICAL | There is a critical bug affecting MOST USERS. Upgrade ASAP. | | SECURITY | There are security fixes in the release. | +Valkey 9.0.2 - February 3, 2026 +------------------------------- + +Upgrade urgency HIGH: There are critical bugs that may affect a subset of users. + +### Bug fixes + +* Avoid memory leak of new argv when HEXPIRE commands target only non-exiting fields (#2973) +* Fix HINCRBY and HINCRBYFLOAT to update volatile key tracking (#2974) +* Avoid empty hash object when HSETEX added no fields (#2998) +* Fix case-sensitive check for the FNX and FXX arguments in HSETEX (#3000) +* Prevent assertion in active expiration job after a hash with volatile fields is overwritten (#3003, #3007) +* Fix HRANDFIELD to return null response when no field could be found (#3022) +* Fix HEXPIRE to not delete items when validation rules fail and expiration is in the past (#3023, #3048) +* Fix how hash is handling overriding of expired fields overwrite (#3060) +* HSETEX - Always issue keyspace notifications after validation (#3001) +* Make zero a valid TTL for hash fields during import mode and data loading (#3006) +* Trigger prepareCommand on argc change in module command filters (#2945) +* Restrict TTL from being negative and avoid crash in import-mode (#2944) +* Fix chained replica crash when doing dual channel replication (#2983) +* Skip slot cache optimization for AOF client to prevent key duplication and data corruption (#3004) +* Fix used_memory_dataset underflow due to miscalculated used_memory_overhead (#3005) +* Avoid duplicate calculations of network-bytes-out in slot stats with copy-avoidance (#3046) +* Fix XREAD returning error on empty stream with + ID (#2742) + +### Performance/Efficiency Improvements + +* Track reply bytes in I/O threads if commandlog-reply-larger-than is -1 (#3086, #3126). + This makes it possible to mitigate a performance regression in 9.0.1 caused by the bug fix #2652. + Valkey 9.0.1 - December 9, 2025 ------------------------------- diff --git a/src/version.h b/src/version.h index 599db3ca68c..38aa70032bf 100644 --- a/src/version.h +++ b/src/version.h @@ -4,8 +4,8 @@ * similar. */ #define SERVER_NAME "valkey" #define SERVER_TITLE "Valkey" -#define VALKEY_VERSION "9.0.1" -#define VALKEY_VERSION_NUM 0x00090001 +#define VALKEY_VERSION "9.0.2" +#define VALKEY_VERSION_NUM 0x00090002 /* The release stage is used in order to provide release status information. * In unstable branch the status is always "dev". * During release process the status will be set to rc1,rc2...rcN.