From 597bd90b210598c6b9c9bc7bce938914a83fbc4b Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Mon, 19 May 2025 09:50:02 +0300 Subject: [PATCH 01/21] Add auto-repair options for broken AOF tail on startup --- src/aof.c | 53 ++++++++++++++++++++++++------- src/config.c | 3 ++ src/server.h | 3 ++ tests/integration/aof.tcl | 65 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 113 insertions(+), 11 deletions(-) diff --git a/src/aof.c b/src/aof.c index 22c64d14b60..f6ee58d34d9 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1489,7 +1489,6 @@ int loadSingleAppendOnlyFile(char *filename) { off_t valid_before_multi = 0; /* Offset before MULTI command loaded. */ off_t last_progress_report_size = 0; int ret = AOF_OK; - sds aof_filepath = makePath(server.aof_dirname, filename); FILE *fp = fopen(aof_filepath, "r"); if (fp == NULL) { @@ -1658,7 +1657,7 @@ int loadSingleAppendOnlyFile(char *filename) { /* Clean up. Command code may have changed argv/argc so we use the * argv/argc of the client instead of the local variables. */ freeClientArgv(fakeClient); - if (server.aof_load_truncated) valid_up_to = ftello(fp); + if (server.aof_load_truncated || server.aof_load_broken ) valid_up_to = ftello(fp); if (server.key_load_delay) debugDelay(server.key_load_delay); } @@ -1690,23 +1689,23 @@ int loadSingleAppendOnlyFile(char *filename) { if (server.aof_load_truncated) { serverLog(LL_WARNING,"!!! Warning: short read while loading the AOF file %s!!!", filename); serverLog(LL_WARNING,"!!! Truncating the AOF %s at offset %llu !!!", - filename, (unsigned long long) valid_up_to); + filename, (unsigned long long) valid_up_to); if (valid_up_to == -1 || truncate(aof_filepath,valid_up_to) == -1) { if (valid_up_to == -1) { serverLog(LL_WARNING,"Last valid command offset is invalid"); } else { serverLog(LL_WARNING,"Error truncating the AOF file %s: %s", - filename, strerror(errno)); + filename, strerror(errno)); } } else { /* Make sure the AOF file descriptor points to the end of the - * file after the truncate call. */ + * file after the truncate call. */ if (server.aof_fd != -1 && lseek(server.aof_fd,0,SEEK_END) == -1) { serverLog(LL_WARNING,"Can't seek the end of the AOF file %s: %s", - filename, strerror(errno)); + filename, strerror(errno)); } else { serverLog(LL_WARNING, - "AOF %s loaded anyway because aof-load-truncated is enabled", filename); + "AOF %s loaded anyway because aof-load-truncated is enabled", filename); ret = AOF_TRUNCATED; goto loaded_ok; } @@ -1719,9 +1718,41 @@ int loadSingleAppendOnlyFile(char *filename) { goto cleanup; fmterr: /* Format error. */ - serverLog(LL_WARNING, "Bad file format reading the append only file %s: " - "make a backup of your AOF file, then use ./redis-check-aof --fix ", filename); + /* fmterr may be caused by accidentally machine shutdown, so if the broken tail is less than a specified size, + * try to recover it automatically */ + if (server.aof_load_broken) { + if (valid_up_to == -1) { + serverLog(LL_WARNING,"Last valid command offset is invalid"); + } else if (sb.st_size - valid_up_to < server.aof_load_broken_max_size) { + if (truncate(aof_filepath,valid_up_to) == -1) { + serverLog(LL_WARNING,"Error truncating the AOF file: %s", + strerror(errno)); + } else { + /* Make sure the AOF file descriptor points to the end of the + * file after the truncate call. */ + if (server.aof_fd != -1 && lseek(server.aof_fd,0,SEEK_END) == -1) { + serverLog(LL_WARNING,"Can't seek the end of the AOF file: %s", + strerror(errno)); + + } else { + serverLog(LL_WARNING, + "AOF loaded anyway because aof-load-broken is enabled and broken size '%ld' is less than aof-load-broken-max-size '%ld'", + sb.st_size - valid_up_to, server.aof_load_broken_max_size); + ret = AOF_BROKEN_RECOVERED; + goto loaded_ok; + } + } + }else { // The size of the corrupted portion exceeds the configured limit + serverLog(LL_WARNING, + "AOF did not loaded because the size of the corrupted portion exceeds the configured limit. aof-load-broken is enabled and broken size '%ld' is bigger than aof-load-broken-max-size '%ld'", + sb.st_size - valid_up_to, server.aof_load_broken_max_size); + } + }else { + serverLog(LL_WARNING, "Bad file format reading the append only file %s: " + "make a backup of your AOF file, then use ./redis-check-aof --fix ", filename); + } ret = AOF_FAILED; + /* fall through to cleanup. */ cleanup: @@ -1794,13 +1825,13 @@ int loadAppendOnlyFiles(aofManifest *am) { last_file = ++aof_num == total_num; start = ustime(); ret = loadSingleAppendOnlyFile(aof_name); - if (ret == AOF_OK || (ret == AOF_TRUNCATED && last_file)) { + if (ret == AOF_OK || ((ret == AOF_TRUNCATED || ret == AOF_BROKEN_RECOVERED) && last_file)){ serverLog(LL_NOTICE, "DB loaded from base file %s: %.3f seconds", aof_name, (float)(ustime()-start)/1000000); } /* If the truncated file is not the last file, we consider this to be a fatal error. */ - if (ret == AOF_TRUNCATED && !last_file) { + if ((ret == AOF_TRUNCATED || ret == AOF_BROKEN_RECOVERED) && !last_file) { ret = AOF_FAILED; serverLog(LL_WARNING, "Fatal error: the truncated file is not the last file"); } diff --git a/src/config.c b/src/config.c index 673cf60c162..8eaaa410d55 100644 --- a/src/config.c +++ b/src/config.c @@ -3090,6 +3090,7 @@ standardConfig static_configs[] = { createBoolConfig("cluster-require-full-coverage", NULL, MODIFIABLE_CONFIG, server.cluster_require_full_coverage, 1, NULL, NULL), createBoolConfig("rdb-save-incremental-fsync", NULL, MODIFIABLE_CONFIG, server.rdb_save_incremental_fsync, 1, NULL, NULL), createBoolConfig("aof-load-truncated", NULL, MODIFIABLE_CONFIG, server.aof_load_truncated, 1, NULL, NULL), + createBoolConfig("aof-load-broken", NULL, MODIFIABLE_CONFIG, server.aof_load_broken, 0, NULL, NULL), createBoolConfig("aof-use-rdb-preamble", NULL, MODIFIABLE_CONFIG, server.aof_use_rdb_preamble, 1, NULL, NULL), createBoolConfig("aof-timestamp-enabled", NULL, MODIFIABLE_CONFIG, server.aof_timestamp_enabled, 0, NULL, NULL), createBoolConfig("cluster-replica-no-failover", "cluster-slave-no-failover", MODIFIABLE_CONFIG, server.cluster_slave_no_failover, 0, NULL, updateClusterFlags), /* Failover by default. */ @@ -3249,6 +3250,8 @@ standardConfig static_configs[] = { createSizeTConfig("tracking-table-max-keys", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.tracking_table_max_keys, 1000000, INTEGER_CONFIG, NULL, NULL), /* Default: 1 million keys max. */ createSizeTConfig("client-query-buffer-limit", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, 1024*1024, LONG_MAX, server.client_max_querybuf_len, 1024*1024*1024, MEMORY_CONFIG, NULL, NULL), /* Default: 1GB max query buffer. */ createSSizeTConfig("maxmemory-clients", NULL, MODIFIABLE_CONFIG, -100, SSIZE_MAX, server.maxmemory_clients, 0, MEMORY_CONFIG | PERCENT_CONFIG, NULL, applyClientMaxMemoryUsage), + createSSizeTConfig("aof-load-broken-max-size", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.aof_load_broken_max_size, (4*1024), INTEGER_CONFIG, NULL, NULL), + /* Other configs */ createTimeTConfig("repl-backlog-ttl", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.repl_backlog_time_limit, 60*60, INTEGER_CONFIG, NULL, NULL), /* Default: 1 hour */ diff --git a/src/server.h b/src/server.h index 07b784d59e5..50be1b87219 100644 --- a/src/server.h +++ b/src/server.h @@ -345,6 +345,7 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT]; #define AOF_OPEN_ERR 3 #define AOF_FAILED 4 #define AOF_TRUNCATED 5 +#define AOF_BROKEN_RECOVERED 6 /* RDB return values for rdbLoad. */ #define RDB_OK 0 @@ -2006,6 +2007,8 @@ struct redisServer { int aof_last_write_status; /* C_OK or C_ERR */ int aof_last_write_errno; /* Valid if aof write/fsync status is ERR */ int aof_load_truncated; /* Don't stop on unexpected AOF EOF. */ + int aof_load_broken; /* Don't stop on bad fmt. */ + off_t aof_load_broken_max_size; /* the max size of broken AOF tail than can be ignored. */ int aof_use_rdb_preamble; /* Specify base AOF to use RDB encoding on AOF rewrites. */ redisAtomic int aof_bio_fsync_status; /* Status of AOF fsync in bio job. */ redisAtomic int aof_bio_fsync_errno; /* Errno of AOF fsync in bio job. */ diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index d4a556e3bc5..e8d65e9171d 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -701,4 +701,69 @@ tags {"aof external:skip"} { assert_equal {1} [r get t] } } + + + # Check AOF load broken behavior + start_server_aof [list dir $server_path aof-load-broken yes] { + test "Unfinished MULTI: Server should start if aof-load-broken is yes" { + assert_equal 1 [is_alive [srv pid]] + } + } + + # Should also start with broken AOF. + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand set foo 1] + append_to_aof [formatCommand incr foo] + append_to_aof [formatCommand incr foo] + append_to_aof [formatCommand incr foo] + append_to_aof [formatCommand incr foo] + append_to_aof "corruption" + } + + start_server_aof [list dir $server_path aof-load-broken yes] { + test "Short read: Server should start if aof-load-broken is yes" { + assert_equal 1 [is_alive [srv pid]] + } + + # The AOF file is expected to be correct because default value for aof-load-broken-max-size is 4096, + #so the AOF will reload without the corruption + test "Broken AOF loaded: we expect foo to be equal to 5" { + set client [redis [srv host] [srv port] 0 $::tls] + wait_done_loading $client + assert {[$client get foo] eq "5"} + } + + test "Append a new command after loading an incomplete AOF" { + $client incr foo + } + } + + start_server_aof [list dir $server_path aof-load-broken yes] { + test "Short read + command: Server should start" { + assert_equal 1 [is_alive [srv pid]] + } + + test "Broken AOF loaded: we expect foo to be equal to 6 now" { + set client [redis [srv host] [srv port] 0 $::tls] + wait_done_loading $client + assert {[$client get foo] eq "6"} + } + } + + # Test that the server exits when the AOF contains a format error + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand set foo hello] + append_to_aof [string range [formatCommand incr foo] 0 end-3] + append_to_aof "corruption" + } + + # We set the maximum allowed corrupted size to 2 bytes, but the actual corrupted portion is larger, + # so the AOF file will not be reloaded. + start_server_aof_ex [list dir $server_path aof-load-broken yes aof-load-broken-max-size 2] [list wait_ready false] { + test "Bad format: Server should have logged an error" { + + wait_for_log_messages 0 {"*AOF did not loaded because the size*"} 0 10 1000 + } + } } + From ced41b1dddb85c1ffd2bf687f627c09d406c87e9 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Mon, 19 May 2025 10:21:33 +0300 Subject: [PATCH 02/21] change aof-load-broken-max-size configuration --- src/config.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/config.c b/src/config.c index 8eaaa410d55..fa47e5608aa 100644 --- a/src/config.c +++ b/src/config.c @@ -3256,7 +3256,8 @@ standardConfig static_configs[] = { /* Other configs */ createTimeTConfig("repl-backlog-ttl", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.repl_backlog_time_limit, 60*60, INTEGER_CONFIG, NULL, NULL), /* Default: 1 hour */ createOffTConfig("auto-aof-rewrite-min-size", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, server.aof_rewrite_min_size, 64*1024*1024, MEMORY_CONFIG, NULL, NULL), - createOffTConfig("loading-process-events-interval-bytes", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 1024, INT_MAX, server.loading_process_events_interval_bytes, 1024*512, INTEGER_CONFIG, NULL, NULL), + createOffTConfig("loading-process-events-interval-bytes", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 1024, INT_MAX, server.loading_process_events_interval_bytes, 1024*1024*2, INTEGER_CONFIG, NULL, NULL), + createOffTConfig("aof-load-broken-max-size", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.aof_load_broken_max_size, (4*1024), INTEGER_CONFIG, NULL, NULL), createIntConfig("tls-port", NULL, MODIFIABLE_CONFIG, 0, 65535, server.tls_port, 0, INTEGER_CONFIG, NULL, applyTLSPort), /* TCP port. */ createIntConfig("tls-session-cache-size", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.tls_ctx_config.session_cache_size, 20*1024, INTEGER_CONFIG, NULL, applyTlsCfg), From 614f039671523b265406145c5b9381caf8019001 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Mon, 19 May 2025 11:16:59 +0300 Subject: [PATCH 03/21] fix of-load-broken-max-size configuration --- src/config.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/config.c b/src/config.c index fa47e5608aa..238eb73a75c 100644 --- a/src/config.c +++ b/src/config.c @@ -3250,7 +3250,6 @@ standardConfig static_configs[] = { createSizeTConfig("tracking-table-max-keys", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.tracking_table_max_keys, 1000000, INTEGER_CONFIG, NULL, NULL), /* Default: 1 million keys max. */ createSizeTConfig("client-query-buffer-limit", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, 1024*1024, LONG_MAX, server.client_max_querybuf_len, 1024*1024*1024, MEMORY_CONFIG, NULL, NULL), /* Default: 1GB max query buffer. */ createSSizeTConfig("maxmemory-clients", NULL, MODIFIABLE_CONFIG, -100, SSIZE_MAX, server.maxmemory_clients, 0, MEMORY_CONFIG | PERCENT_CONFIG, NULL, applyClientMaxMemoryUsage), - createSSizeTConfig("aof-load-broken-max-size", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.aof_load_broken_max_size, (4*1024), INTEGER_CONFIG, NULL, NULL), /* Other configs */ From 6e75dd6723099eaca1069e1454d2f443e5ed8dc3 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Wed, 21 May 2025 12:05:15 +0300 Subject: [PATCH 04/21] fix unnecessary changes --- src/aof.c | 19 +++++++++---------- src/config.c | 1 - src/server.h | 2 +- tests/integration/aof.tcl | 2 +- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/aof.c b/src/aof.c index f6ee58d34d9..62ad539659d 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1489,6 +1489,7 @@ int loadSingleAppendOnlyFile(char *filename) { off_t valid_before_multi = 0; /* Offset before MULTI command loaded. */ off_t last_progress_report_size = 0; int ret = AOF_OK; + sds aof_filepath = makePath(server.aof_dirname, filename); FILE *fp = fopen(aof_filepath, "r"); if (fp == NULL) { @@ -1684,28 +1685,27 @@ int loadSingleAppendOnlyFile(char *filename) { ret = AOF_FAILED; goto cleanup; } - uxeof: /* Unexpected AOF end of file. */ if (server.aof_load_truncated) { serverLog(LL_WARNING,"!!! Warning: short read while loading the AOF file %s!!!", filename); serverLog(LL_WARNING,"!!! Truncating the AOF %s at offset %llu !!!", - filename, (unsigned long long) valid_up_to); + filename, (unsigned long long) valid_up_to); if (valid_up_to == -1 || truncate(aof_filepath,valid_up_to) == -1) { if (valid_up_to == -1) { serverLog(LL_WARNING,"Last valid command offset is invalid"); } else { serverLog(LL_WARNING,"Error truncating the AOF file %s: %s", - filename, strerror(errno)); + filename, strerror(errno)); } } else { /* Make sure the AOF file descriptor points to the end of the - * file after the truncate call. */ + * file after the truncate call. */ if (server.aof_fd != -1 && lseek(server.aof_fd,0,SEEK_END) == -1) { serverLog(LL_WARNING,"Can't seek the end of the AOF file %s: %s", - filename, strerror(errno)); + filename, strerror(errno)); } else { serverLog(LL_WARNING, - "AOF %s loaded anyway because aof-load-truncated is enabled", filename); + "AOF %s loaded anyway because aof-load-truncated is enabled", filename); ret = AOF_TRUNCATED; goto loaded_ok; } @@ -1733,7 +1733,6 @@ int loadSingleAppendOnlyFile(char *filename) { if (server.aof_fd != -1 && lseek(server.aof_fd,0,SEEK_END) == -1) { serverLog(LL_WARNING,"Can't seek the end of the AOF file: %s", strerror(errno)); - } else { serverLog(LL_WARNING, "AOF loaded anyway because aof-load-broken is enabled and broken size '%ld' is less than aof-load-broken-max-size '%ld'", @@ -1742,12 +1741,12 @@ int loadSingleAppendOnlyFile(char *filename) { goto loaded_ok; } } - }else { // The size of the corrupted portion exceeds the configured limit + } else { /* The size of the corrupted portion exceeds the configured limit. */ serverLog(LL_WARNING, "AOF did not loaded because the size of the corrupted portion exceeds the configured limit. aof-load-broken is enabled and broken size '%ld' is bigger than aof-load-broken-max-size '%ld'", sb.st_size - valid_up_to, server.aof_load_broken_max_size); } - }else { + } else { serverLog(LL_WARNING, "Bad file format reading the append only file %s: " "make a backup of your AOF file, then use ./redis-check-aof --fix ", filename); } @@ -1825,7 +1824,7 @@ int loadAppendOnlyFiles(aofManifest *am) { last_file = ++aof_num == total_num; start = ustime(); ret = loadSingleAppendOnlyFile(aof_name); - if (ret == AOF_OK || ((ret == AOF_TRUNCATED || ret == AOF_BROKEN_RECOVERED) && last_file)){ + if (ret == AOF_OK || ((ret == AOF_TRUNCATED || ret == AOF_BROKEN_RECOVERED) && last_file)) { serverLog(LL_NOTICE, "DB loaded from base file %s: %.3f seconds", aof_name, (float)(ustime()-start)/1000000); } diff --git a/src/config.c b/src/config.c index 238eb73a75c..fc26ba48119 100644 --- a/src/config.c +++ b/src/config.c @@ -3251,7 +3251,6 @@ standardConfig static_configs[] = { createSizeTConfig("client-query-buffer-limit", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, 1024*1024, LONG_MAX, server.client_max_querybuf_len, 1024*1024*1024, MEMORY_CONFIG, NULL, NULL), /* Default: 1GB max query buffer. */ createSSizeTConfig("maxmemory-clients", NULL, MODIFIABLE_CONFIG, -100, SSIZE_MAX, server.maxmemory_clients, 0, MEMORY_CONFIG | PERCENT_CONFIG, NULL, applyClientMaxMemoryUsage), - /* Other configs */ createTimeTConfig("repl-backlog-ttl", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.repl_backlog_time_limit, 60*60, INTEGER_CONFIG, NULL, NULL), /* Default: 1 hour */ createOffTConfig("auto-aof-rewrite-min-size", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, server.aof_rewrite_min_size, 64*1024*1024, MEMORY_CONFIG, NULL, NULL), diff --git a/src/server.h b/src/server.h index 50be1b87219..425b874c2e4 100644 --- a/src/server.h +++ b/src/server.h @@ -2008,7 +2008,7 @@ struct redisServer { int aof_last_write_errno; /* Valid if aof write/fsync status is ERR */ int aof_load_truncated; /* Don't stop on unexpected AOF EOF. */ int aof_load_broken; /* Don't stop on bad fmt. */ - off_t aof_load_broken_max_size; /* the max size of broken AOF tail than can be ignored. */ + off_t aof_load_broken_max_size; /* The max size of broken AOF tail than can be ignored. */ int aof_use_rdb_preamble; /* Specify base AOF to use RDB encoding on AOF rewrites. */ redisAtomic int aof_bio_fsync_status; /* Status of AOF fsync in bio job. */ redisAtomic int aof_bio_fsync_errno; /* Errno of AOF fsync in bio job. */ diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index e8d65e9171d..30e95ff2910 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -726,7 +726,7 @@ tags {"aof external:skip"} { } # The AOF file is expected to be correct because default value for aof-load-broken-max-size is 4096, - #so the AOF will reload without the corruption + # so the AOF will reload without the corruption test "Broken AOF loaded: we expect foo to be equal to 5" { set client [redis [srv host] [srv port] 0 $::tls] wait_done_loading $client From 05b04a5ab5148960f1e61a50080ef3843d0b967b Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Thu, 22 May 2025 13:07:49 +0300 Subject: [PATCH 05/21] remove unnecessary change --- src/aof.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/aof.c b/src/aof.c index 62ad539659d..65c8ae95664 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1751,7 +1751,6 @@ int loadSingleAppendOnlyFile(char *filename) { "make a backup of your AOF file, then use ./redis-check-aof --fix ", filename); } ret = AOF_FAILED; - /* fall through to cleanup. */ cleanup: From e625c9f26a409cfecdcca99d82b89f67d758668d Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Mon, 26 May 2025 14:29:37 +0300 Subject: [PATCH 06/21] Fix format specifier in AOF error message --- src/aof.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/aof.c b/src/aof.c index 65c8ae95664..b6c9d833ac7 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1735,7 +1735,7 @@ int loadSingleAppendOnlyFile(char *filename) { strerror(errno)); } else { serverLog(LL_WARNING, - "AOF loaded anyway because aof-load-broken is enabled and broken size '%ld' is less than aof-load-broken-max-size '%ld'", + "AOF loaded anyway because aof-load-broken is enabled and broken size '%lld' is less than aof-load-broken-max-size '%lld'", sb.st_size - valid_up_to, server.aof_load_broken_max_size); ret = AOF_BROKEN_RECOVERED; goto loaded_ok; @@ -1743,7 +1743,7 @@ int loadSingleAppendOnlyFile(char *filename) { } } else { /* The size of the corrupted portion exceeds the configured limit. */ serverLog(LL_WARNING, - "AOF did not loaded because the size of the corrupted portion exceeds the configured limit. aof-load-broken is enabled and broken size '%ld' is bigger than aof-load-broken-max-size '%ld'", + "AOF was not loaded because the size of the corrupted portion exceeds the configured limit. aof-load-broken is enabled and broken size '%lld' is bigger than aof-load-broken-max-size '%lld'", sb.st_size - valid_up_to, server.aof_load_broken_max_size); } } else { From fe045ff167e8246ef07e868c18fc61c1b18f962e Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Tue, 27 May 2025 09:47:24 +0300 Subject: [PATCH 07/21] Fix format specifier in AOF error message --- src/aof.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/aof.c b/src/aof.c index b6c9d833ac7..c54a122d39e 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1736,7 +1736,7 @@ int loadSingleAppendOnlyFile(char *filename) { } else { serverLog(LL_WARNING, "AOF loaded anyway because aof-load-broken is enabled and broken size '%lld' is less than aof-load-broken-max-size '%lld'", - sb.st_size - valid_up_to, server.aof_load_broken_max_size); + (long long)(sb.st_size - valid_up_to), (long long)(server.aof_load_broken_max_size)); ret = AOF_BROKEN_RECOVERED; goto loaded_ok; } @@ -1744,7 +1744,7 @@ int loadSingleAppendOnlyFile(char *filename) { } else { /* The size of the corrupted portion exceeds the configured limit. */ serverLog(LL_WARNING, "AOF was not loaded because the size of the corrupted portion exceeds the configured limit. aof-load-broken is enabled and broken size '%lld' is bigger than aof-load-broken-max-size '%lld'", - sb.st_size - valid_up_to, server.aof_load_broken_max_size); + (long long)(sb.st_size - valid_up_to), (long long)(server.aof_load_broken_max_size)); } } else { serverLog(LL_WARNING, "Bad file format reading the append only file %s: " From 775890a51c405c101416a30a756bbf916358ee1c Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Tue, 27 May 2025 12:05:38 +0300 Subject: [PATCH 08/21] Fix tcl error msg --- tests/integration/aof.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index 30e95ff2910..ca51a5347b6 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -762,7 +762,7 @@ tags {"aof external:skip"} { start_server_aof_ex [list dir $server_path aof-load-broken yes aof-load-broken-max-size 2] [list wait_ready false] { test "Bad format: Server should have logged an error" { - wait_for_log_messages 0 {"*AOF did not loaded because the size*"} 0 10 1000 + wait_for_log_messages 0 {"*AOF was not loaded because the size*"} 0 10 1000 } } } From 3bdf6169eceff3d7449c342abc009dc089d9bbf2 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Tue, 3 Jun 2025 09:40:12 +0300 Subject: [PATCH 09/21] Add incr file support + remove spaces --- src/aof.c | 2 +- src/config.c | 2 +- tests/integration/aof.tcl | 4 +--- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/aof.c b/src/aof.c index c54a122d39e..43342a043a8 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1853,7 +1853,7 @@ int loadAppendOnlyFiles(aofManifest *am) { last_file = ++aof_num == total_num; start = ustime(); ret = loadSingleAppendOnlyFile(aof_name); - if (ret == AOF_OK || (ret == AOF_TRUNCATED && last_file)) { + if (ret == AOF_OK || ((ret == AOF_TRUNCATED || ret == AOF_BROKEN_RECOVERED) && last_file)) { serverLog(LL_NOTICE, "DB loaded from incr file %s: %.3f seconds", aof_name, (float)(ustime()-start)/1000000); } diff --git a/src/config.c b/src/config.c index fc26ba48119..dfa1a4a8cfb 100644 --- a/src/config.c +++ b/src/config.c @@ -3255,7 +3255,7 @@ standardConfig static_configs[] = { createTimeTConfig("repl-backlog-ttl", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.repl_backlog_time_limit, 60*60, INTEGER_CONFIG, NULL, NULL), /* Default: 1 hour */ createOffTConfig("auto-aof-rewrite-min-size", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, server.aof_rewrite_min_size, 64*1024*1024, MEMORY_CONFIG, NULL, NULL), createOffTConfig("loading-process-events-interval-bytes", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 1024, INT_MAX, server.loading_process_events_interval_bytes, 1024*1024*2, INTEGER_CONFIG, NULL, NULL), - createOffTConfig("aof-load-broken-max-size", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.aof_load_broken_max_size, (4*1024), INTEGER_CONFIG, NULL, NULL), + createOffTConfig("aof-load-broken-max-size", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.aof_load_broken_max_size, 4*1024, INTEGER_CONFIG, NULL, NULL), createIntConfig("tls-port", NULL, MODIFIABLE_CONFIG, 0, 65535, server.tls_port, 0, INTEGER_CONFIG, NULL, applyTLSPort), /* TCP port. */ createIntConfig("tls-session-cache-size", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.tls_ctx_config.session_cache_size, 20*1024, INTEGER_CONFIG, NULL, applyTlsCfg), diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index ca51a5347b6..a7dd6c846e8 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -702,7 +702,6 @@ tags {"aof external:skip"} { } } - # Check AOF load broken behavior start_server_aof [list dir $server_path aof-load-broken yes] { test "Unfinished MULTI: Server should start if aof-load-broken is yes" { @@ -765,5 +764,4 @@ tags {"aof external:skip"} { wait_for_log_messages 0 {"*AOF was not loaded because the size*"} 0 10 1000 } } -} - +} \ No newline at end of file From 7e17545831c09826ca01c97d8e660df6cd2f155c Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Wed, 11 Jun 2025 11:00:51 +0300 Subject: [PATCH 10/21] add tests for base aof --- src/aof.c | 2 +- src/config.c | 2 +- tests/integration/aof.tcl | 35 ++++++++++++++++++++++++++++++++--- 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/aof.c b/src/aof.c index 43342a043a8..7bcecb4771b 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1658,7 +1658,7 @@ int loadSingleAppendOnlyFile(char *filename) { /* Clean up. Command code may have changed argv/argc so we use the * argv/argc of the client instead of the local variables. */ freeClientArgv(fakeClient); - if (server.aof_load_truncated || server.aof_load_broken ) valid_up_to = ftello(fp); + if (server.aof_load_truncated || server.aof_load_broken) valid_up_to = ftello(fp); if (server.key_load_delay) debugDelay(server.key_load_delay); } diff --git a/src/config.c b/src/config.c index dfa1a4a8cfb..27b5c44a06e 100644 --- a/src/config.c +++ b/src/config.c @@ -3254,7 +3254,7 @@ standardConfig static_configs[] = { /* Other configs */ createTimeTConfig("repl-backlog-ttl", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.repl_backlog_time_limit, 60*60, INTEGER_CONFIG, NULL, NULL), /* Default: 1 hour */ createOffTConfig("auto-aof-rewrite-min-size", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, server.aof_rewrite_min_size, 64*1024*1024, MEMORY_CONFIG, NULL, NULL), - createOffTConfig("loading-process-events-interval-bytes", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 1024, INT_MAX, server.loading_process_events_interval_bytes, 1024*1024*2, INTEGER_CONFIG, NULL, NULL), + createOffTConfig("loading-process-events-interval-bytes", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 1024, INT_MAX, server.loading_process_events_interval_bytes, 1024*512, INTEGER_CONFIG, NULL, NULL), createOffTConfig("aof-load-broken-max-size", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.aof_load_broken_max_size, 4*1024, INTEGER_CONFIG, NULL, NULL), createIntConfig("tls-port", NULL, MODIFIABLE_CONFIG, 0, 65535, server.tls_port, 0, INTEGER_CONFIG, NULL, applyTLSPort), /* TCP port. */ diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index a7dd6c846e8..4d2599a05e3 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -702,6 +702,35 @@ tags {"aof external:skip"} { } } + # Check AOF load broken behavior + # Corrupted base AOF, no incr AOF present + create_aof $aof_dirpath $aof_base_file { + append_to_aof [formatCommand set param ok] + append_to_aof "corruption" + } + + start_server_aof_ex [list dir $server_path aof-load-broken yes] [list wait_ready false] { + test "Log should mention truncated file is not last" { + wait_for_log_messages 0 { + {*AOF loaded anyway because aof-load-broken is enabled*} + {*Fatal error: the truncated file is not the last file*} + } 0 10 1000 + } + } + # Remove all incr AOF files to make the base file being the last file + exec rm -f $aof_dirpath/appendonly.aof.* + start_server_aof [list dir $server_path aof-load-broken yes] { + test "Corrupted base AOF (last file): should recover" { + assert_equal 1 [is_alive [srv pid]] + } + + test "param should be 'ok'" { + set client [redis [srv host] [srv port]] + wait_done_loading $client + assert {[$client get param] eq "ok"} + } + } + # Check AOF load broken behavior start_server_aof [list dir $server_path aof-load-broken yes] { test "Unfinished MULTI: Server should start if aof-load-broken is yes" { @@ -709,7 +738,7 @@ tags {"aof external:skip"} { } } - # Should also start with broken AOF. + # Should also start with broken incer AOF. create_aof $aof_dirpath $aof_file { append_to_aof [formatCommand set foo 1] append_to_aof [formatCommand incr foo] @@ -760,8 +789,8 @@ tags {"aof external:skip"} { # so the AOF file will not be reloaded. start_server_aof_ex [list dir $server_path aof-load-broken yes aof-load-broken-max-size 2] [list wait_ready false] { test "Bad format: Server should have logged an error" { - + wait_for_log_messages 0 {"*AOF was not loaded because the size*"} 0 10 1000 } } -} \ No newline at end of file +# } \ No newline at end of file From e8d494d90620c439f269ee914c92dba754138fbe Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Thu, 12 Jun 2025 11:01:48 +0300 Subject: [PATCH 11/21] fix spaces --- src/aof.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/aof.c b/src/aof.c index 7bcecb4771b..12bc54dfefb 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1718,8 +1718,8 @@ int loadSingleAppendOnlyFile(char *filename) { goto cleanup; fmterr: /* Format error. */ - /* fmterr may be caused by accidentally machine shutdown, so if the broken tail is less than a specified size, - * try to recover it automatically */ + /* fmterr may be caused by accidentally machine shutdown, so if the broken tail + * is less than a specified size, try to recover it automatically */ if (server.aof_load_broken) { if (valid_up_to == -1) { serverLog(LL_WARNING,"Last valid command offset is invalid"); @@ -1735,7 +1735,8 @@ int loadSingleAppendOnlyFile(char *filename) { strerror(errno)); } else { serverLog(LL_WARNING, - "AOF loaded anyway because aof-load-broken is enabled and broken size '%lld' is less than aof-load-broken-max-size '%lld'", + "AOF loaded anyway because aof-load-broken is enabled and " + "broken size '%lld' is less than aof-load-broken-max-size '%lld'", (long long)(sb.st_size - valid_up_to), (long long)(server.aof_load_broken_max_size)); ret = AOF_BROKEN_RECOVERED; goto loaded_ok; @@ -1743,7 +1744,9 @@ int loadSingleAppendOnlyFile(char *filename) { } } else { /* The size of the corrupted portion exceeds the configured limit. */ serverLog(LL_WARNING, - "AOF was not loaded because the size of the corrupted portion exceeds the configured limit. aof-load-broken is enabled and broken size '%lld' is bigger than aof-load-broken-max-size '%lld'", + "AOF was not loaded because the size of the corrupted portion " + "exceeds the configured limit. aof-load-broken is enabled and broken size '%lld' " + "is bigger than aof-load-broken-max-size '%lld'", (long long)(sb.st_size - valid_up_to), (long long)(server.aof_load_broken_max_size)); } } else { From 8233868c36872187a2282993e771ff2b0094f999 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Sun, 22 Jun 2025 09:17:06 +0300 Subject: [PATCH 12/21] add support to incr aof + add test for an intermediate AOF file is broken --- src/aof.c | 2 +- tests/integration/aof.tcl | 29 ++++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/aof.c b/src/aof.c index 12bc54dfefb..00d387be7a5 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1866,7 +1866,7 @@ int loadAppendOnlyFiles(aofManifest *am) { if (ret == AOF_EMPTY) ret = AOF_OK; /* If the truncated file is not the last file, we consider this to be a fatal error. */ - if (ret == AOF_TRUNCATED && !last_file) { + if ((ret == AOF_TRUNCATED || ret == AOF_BROKEN_RECOVERED) && !last_file) { ret = AOF_FAILED; serverLog(LL_WARNING, "Fatal error: the truncated file is not the last file"); } diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index 4d2599a05e3..4674d8f489e 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -793,4 +793,31 @@ tags {"aof external:skip"} { wait_for_log_messages 0 {"*AOF was not loaded because the size*"} 0 10 1000 } } -# } \ No newline at end of file + # Create base and incr files + create_aof $aof_dirpath $aof_base_file { + append_to_aof [formatCommand set foo base] + } + + # Create first incr AOF (not last) + set mid_aof_file $aof_dirpath/appendonly.aof.1 + create_aof $aof_dirpath $mid_aof_file { + append_to_aof [formatCommand set foo mid] + append_to_aof "CORRUPTION" + } + + # Create second incr AOF (last and correct) + set last_aof_file $aof_dirpath/appendonly.aof.2 + create_aof $aof_dirpath $last_aof_file { + append_to_aof [formatCommand set foo last] + } + + # Start Redis with broken intermediate file + start_server_aof_ex [list dir $server_path aof-load-broken yes] [list wait_ready false] { + test "Intermediate AOF is broken: should load last and recover" { + wait_for_log_messages 0 { + {*AOF loaded anyway because aof-load-broken is enabled*} + {*Fatal error: the truncated file is not the last file*} + } 0 10 1000 + } + } +} \ No newline at end of file From eadcfa046fe058193853c9093fa1b19d697e031b Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Tue, 1 Jul 2025 13:30:01 +0300 Subject: [PATCH 13/21] fix indentation and add more tests --- src/aof.c | 2 +- tests/integration/aof.tcl | 61 ++++++++++++++++++++++++--------------- 2 files changed, 39 insertions(+), 24 deletions(-) diff --git a/src/aof.c b/src/aof.c index 00d387be7a5..18f0407e23e 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1719,7 +1719,7 @@ int loadSingleAppendOnlyFile(char *filename) { fmterr: /* Format error. */ /* fmterr may be caused by accidentally machine shutdown, so if the broken tail - * is less than a specified size, try to recover it automatically */ + * is less than a specified size, try to recover it automatically */ if (server.aof_load_broken) { if (valid_up_to == -1) { serverLog(LL_WARNING,"Last valid command offset is invalid"); diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index 4674d8f489e..5c1b3ff43e5 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -703,7 +703,7 @@ tags {"aof external:skip"} { } # Check AOF load broken behavior - # Corrupted base AOF, no incr AOF present + # Corrupted base AOF, existing AOF files create_aof $aof_dirpath $aof_base_file { append_to_aof [formatCommand set param ok] append_to_aof "corruption" @@ -717,6 +717,7 @@ tags {"aof external:skip"} { } 0 10 1000 } } + # Remove all incr AOF files to make the base file being the last file exec rm -f $aof_dirpath/appendonly.aof.* start_server_aof [list dir $server_path aof-load-broken yes] { @@ -731,14 +732,7 @@ tags {"aof external:skip"} { } } - # Check AOF load broken behavior - start_server_aof [list dir $server_path aof-load-broken yes] { - test "Unfinished MULTI: Server should start if aof-load-broken is yes" { - assert_equal 1 [is_alive [srv pid]] - } - } - - # Should also start with broken incer AOF. + # Should also start with broken incr AOF. create_aof $aof_dirpath $aof_file { append_to_aof [formatCommand set foo 1] append_to_aof [formatCommand incr foo] @@ -789,35 +783,56 @@ tags {"aof external:skip"} { # so the AOF file will not be reloaded. start_server_aof_ex [list dir $server_path aof-load-broken yes aof-load-broken-max-size 2] [list wait_ready false] { test "Bad format: Server should have logged an error" { - wait_for_log_messages 0 {"*AOF was not loaded because the size*"} 0 10 1000 } } - # Create base and incr files - create_aof $aof_dirpath $aof_base_file { - append_to_aof [formatCommand set foo base] + + create_aof_manifest $aof_dirpath $aof_manifest_file { + append_to_manifest "file appendonly.aof.1.base.aof seq 1 type b\n" + append_to_manifest "file appendonly.aof.1.incr.aof seq 1 type i\n" + append_to_manifest "file appendonly.aof.2.incr.aof seq 2 type i\n" + } + # Create base AOF file + set base_aof_file "$aof_dirpath/appendonly.aof.1.base.aof" + create_aof $aof_dirpath $base_aof_file { + append_to_aof [formatCommand set fo base] } - # Create first incr AOF (not last) - set mid_aof_file $aof_dirpath/appendonly.aof.1 + # Create middle incr AOF file with corruption + set mid_aof_file "$aof_dirpath/appendonly.aof.1.incr.aof" create_aof $aof_dirpath $mid_aof_file { - append_to_aof [formatCommand set foo mid] + append_to_aof [formatCommand set fo mid] append_to_aof "CORRUPTION" } - # Create second incr AOF (last and correct) - set last_aof_file $aof_dirpath/appendonly.aof.2 + # Create last incr AOF file (valid) + set last_aof_file "$aof_dirpath/appendonly.aof.2.incr.aof" create_aof $aof_dirpath $last_aof_file { - append_to_aof [formatCommand set foo last] + append_to_aof [formatCommand set fo last] } - # Start Redis with broken intermediate file + # Check that Redis fails to load because corruption is in the middle file start_server_aof_ex [list dir $server_path aof-load-broken yes] [list wait_ready false] { - test "Intermediate AOF is broken: should load last and recover" { + test "Intermediate AOF is broken: should log fatal and not start" { wait_for_log_messages 0 { - {*AOF loaded anyway because aof-load-broken is enabled*} {*Fatal error: the truncated file is not the last file*} } 0 10 1000 } } -} \ No newline at end of file + + # Swap mid and last files + set tmp_file "$aof_dirpath/temp.aof" + file rename -force $mid_aof_file $tmp_file + file rename -force $last_aof_file $mid_aof_file + file rename -force $tmp_file $last_aof_file + + # Should now start successfully since corruption is in last AOF file + start_server_aof [list dir $server_path aof-load-broken yes] { + test "Corrupted last AOF file: Server should still start and recover" { + assert_equal 1 [is_alive [srv pid]] + set client [redis [srv host] [srv port]] + wait_done_loading $client + assert {[$client get fo] eq "mid"} + } + } +} From bb9f838db108a19827795bcaa11ab912b6ed5e74 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Tue, 1 Jul 2025 14:06:27 +0300 Subject: [PATCH 14/21] add tls --- tests/integration/aof.tcl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index 5c1b3ff43e5..cf4b64e3d38 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -726,7 +726,7 @@ tags {"aof external:skip"} { } test "param should be 'ok'" { - set client [redis [srv host] [srv port]] + set client [redis [srv host] [srv port] $::tls] wait_done_loading $client assert {[$client get param] eq "ok"} } @@ -830,7 +830,7 @@ tags {"aof external:skip"} { start_server_aof [list dir $server_path aof-load-broken yes] { test "Corrupted last AOF file: Server should still start and recover" { assert_equal 1 [is_alive [srv pid]] - set client [redis [srv host] [srv port]] + set client [redis [srv host] [srv port] $::tls] wait_done_loading $client assert {[$client get fo] eq "mid"} } From ac15703026434d8ce45322807cee713662f22515 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Tue, 1 Jul 2025 15:33:10 +0300 Subject: [PATCH 15/21] add space + file for test --- src/aof.c | 1 + tests/integration/aof.tcl | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/aof.c b/src/aof.c index 18f0407e23e..be2f0661a13 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1685,6 +1685,7 @@ int loadSingleAppendOnlyFile(char *filename) { ret = AOF_FAILED; goto cleanup; } + uxeof: /* Unexpected AOF end of file. */ if (server.aof_load_truncated) { serverLog(LL_WARNING,"!!! Warning: short read while loading the AOF file %s!!!", filename); diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index cf4b64e3d38..ea65883a743 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -708,7 +708,9 @@ tags {"aof external:skip"} { append_to_aof [formatCommand set param ok] append_to_aof "corruption" } - + create_aof $aof_dirpath $aof_file { + append_to_aof [formatCommand set foo hello] + } start_server_aof_ex [list dir $server_path aof-load-broken yes] [list wait_ready false] { test "Log should mention truncated file is not last" { wait_for_log_messages 0 { From b8171add64f52bb2634561e69a887b4b53a16779 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Wed, 9 Jul 2025 08:25:41 +0300 Subject: [PATCH 16/21] try to remove tls for tests --- tests/integration/aof.tcl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index ea65883a743..319a416da12 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -728,7 +728,8 @@ tags {"aof external:skip"} { } test "param should be 'ok'" { - set client [redis [srv host] [srv port] $::tls] + set client [redis [srv host] [srv port]] + # set client [redis [srv host] [srv port] $::tls] wait_done_loading $client assert {[$client get param] eq "ok"} } @@ -832,7 +833,8 @@ tags {"aof external:skip"} { start_server_aof [list dir $server_path aof-load-broken yes] { test "Corrupted last AOF file: Server should still start and recover" { assert_equal 1 [is_alive [srv pid]] - set client [redis [srv host] [srv port] $::tls] + # set client [redis [srv host] [srv port] $::tls] + set client [redis [srv host] [srv port]] wait_done_loading $client assert {[$client get fo] eq "mid"} } From ca6e9bfbb0e14f8145df0fdc41d7725fb17f90e7 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Wed, 9 Jul 2025 12:04:08 +0300 Subject: [PATCH 17/21] fix tls --- tests/integration/aof.tcl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index 319a416da12..71555586f05 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -728,8 +728,7 @@ tags {"aof external:skip"} { } test "param should be 'ok'" { - set client [redis [srv host] [srv port]] - # set client [redis [srv host] [srv port] $::tls] + set client [redis [srv host] [srv port] 0 $::tls] wait_done_loading $client assert {[$client get param] eq "ok"} } @@ -833,8 +832,7 @@ tags {"aof external:skip"} { start_server_aof [list dir $server_path aof-load-broken yes] { test "Corrupted last AOF file: Server should still start and recover" { assert_equal 1 [is_alive [srv pid]] - # set client [redis [srv host] [srv port] $::tls] - set client [redis [srv host] [srv port]] + set client [redis [srv host] [srv port] 0 $::tls] wait_done_loading $client assert {[$client get fo] eq "mid"} } From 46e972f9606a2a2467eb6238fecb82f40c7572b4 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Mon, 21 Jul 2025 09:16:43 +0300 Subject: [PATCH 18/21] add aof-load-broken to redis.conf --- redis.conf | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/redis.conf b/redis.conf index 62cec06de4e..24d0b939860 100644 --- a/redis.conf +++ b/redis.conf @@ -1529,6 +1529,21 @@ auto-aof-rewrite-min-size 64mb # will be found. aof-load-truncated yes +# When the AOF file is corrupted in the middle (format errors), Redis can +# attempt to automatically recover by truncating the corrupted portion if +# it's smaller than the configured maximum size. This is more aggressive +# than aof-load-truncated which only handles truncation at the end of files. +# +# If aof-load-broken is set to yes and the corrupted portion is smaller than +# aof-load-broken-max-size, Redis will truncate the corrupted data and start +# normally, logging a warning about the recovery. Otherwise, the server will +# exit with an error and require manual intervention using "redis-check-aof". +# +# This option is disabled by default since automatically truncating corrupted +# data can lead to data loss. Only enable this if you understand the risks +# and prefer availability over data integrity in corruption scenarios. +aof-load-broken no + # Redis can create append-only base files in either RDB or AOF formats. Using # the RDB format is always faster and more efficient, and disabling it is only # supported for backward compatibility purposes. From d38b53d3830b24ce9f0c8cc6e9aa05c26161d6c9 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Mon, 21 Jul 2025 13:58:14 +0300 Subject: [PATCH 19/21] add expansion about aof-load-broken-max-size --- redis.conf | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/redis.conf b/redis.conf index 24d0b939860..9f0746239db 100644 --- a/redis.conf +++ b/redis.conf @@ -1535,9 +1535,10 @@ aof-load-truncated yes # than aof-load-truncated which only handles truncation at the end of files. # # If aof-load-broken is set to yes and the corrupted portion is smaller than -# aof-load-broken-max-size, Redis will truncate the corrupted data and start -# normally, logging a warning about the recovery. Otherwise, the server will -# exit with an error and require manual intervention using "redis-check-aof". +# aof-load-broken-max-size (which sets the maximum size in bytes of corrupted +# data that can be automatically truncated), Redis will truncate the corrupted +# data and start normally, logging a warning about the recovery. Otherwise, the +# server will exit with an error and require manual intervention using "redis-check-aof". # # This option is disabled by default since automatically truncating corrupted # data can lead to data loss. Only enable this if you understand the risks From f006bcbb772d13af50dccb9bb312cc2dd0edbd83 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Mon, 21 Jul 2025 15:25:47 +0300 Subject: [PATCH 20/21] add expansion about aof-load-broken-max-size --- redis.conf | 10 ++++++---- src/t_string.c | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/redis.conf b/redis.conf index 9f0746239db..c085cd76a4a 100644 --- a/redis.conf +++ b/redis.conf @@ -1534,11 +1534,13 @@ aof-load-truncated yes # it's smaller than the configured maximum size. This is more aggressive # than aof-load-truncated which only handles truncation at the end of files. # +# The aof-load-broken-max-size setting controls the maximum size in bytes +# of corrupted data that can be automatically truncated. +# # If aof-load-broken is set to yes and the corrupted portion is smaller than -# aof-load-broken-max-size (which sets the maximum size in bytes of corrupted -# data that can be automatically truncated), Redis will truncate the corrupted -# data and start normally, logging a warning about the recovery. Otherwise, the -# server will exit with an error and require manual intervention using "redis-check-aof". +# aof-load-broken-max-size, Redis will truncate the corrupted data and start +# normally, logging a warning about the recovery. Otherwise, the server will +# exit with an error and require manual intervention using "redis-check-aof". # # This option is disabled by default since automatically truncating corrupted # data can lead to data loss. Only enable this if you understand the risks diff --git a/src/t_string.c b/src/t_string.c index 34806541c85..4330a0feb28 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -97,8 +97,8 @@ void setGenericCommand(client *c, int flags, robj *key, robj **valref, robj *exp return; } - /* When expire is not NULL, we avoid deleting the TTL so it can be updated later instead of being deleted and then created again. */ - setkey_flags |= ((flags & OBJ_KEEPTTL) || expire) ? SETKEY_KEEPTTL : 0; + /* Only keep TTL when explicitly requested with KEEPTTL flag */ + setkey_flags |= (flags & OBJ_KEEPTTL) ? SETKEY_KEEPTTL : 0; setkey_flags |= found ? SETKEY_ALREADY_EXIST : SETKEY_DOESNT_EXIST; setKeyByLink(c, c->db, key, valref, setkey_flags, &link); From 6fa861ae4c2c0d1d5aaf66beddfb069c192af087 Mon Sep 17 00:00:00 2001 From: Stav Levi Date: Wed, 23 Jul 2025 10:17:20 +0300 Subject: [PATCH 21/21] add aof-load-broken-max-size to conf file --- redis.conf | 1 + src/aof.c | 2 +- src/t_string.c | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/redis.conf b/redis.conf index c085cd76a4a..b745ecad286 100644 --- a/redis.conf +++ b/redis.conf @@ -1546,6 +1546,7 @@ aof-load-truncated yes # data can lead to data loss. Only enable this if you understand the risks # and prefer availability over data integrity in corruption scenarios. aof-load-broken no +aof-load-broken-max-size 4096 # Redis can create append-only base files in either RDB or AOF formats. Using # the RDB format is always faster and more efficient, and disabling it is only diff --git a/src/aof.c b/src/aof.c index be2f0661a13..ab592f81137 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1685,7 +1685,7 @@ int loadSingleAppendOnlyFile(char *filename) { ret = AOF_FAILED; goto cleanup; } - + uxeof: /* Unexpected AOF end of file. */ if (server.aof_load_truncated) { serverLog(LL_WARNING,"!!! Warning: short read while loading the AOF file %s!!!", filename); diff --git a/src/t_string.c b/src/t_string.c index 4330a0feb28..34806541c85 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -97,8 +97,8 @@ void setGenericCommand(client *c, int flags, robj *key, robj **valref, robj *exp return; } - /* Only keep TTL when explicitly requested with KEEPTTL flag */ - setkey_flags |= (flags & OBJ_KEEPTTL) ? SETKEY_KEEPTTL : 0; + /* When expire is not NULL, we avoid deleting the TTL so it can be updated later instead of being deleted and then created again. */ + setkey_flags |= ((flags & OBJ_KEEPTTL) || expire) ? SETKEY_KEEPTTL : 0; setkey_flags |= found ? SETKEY_ALREADY_EXIST : SETKEY_DOESNT_EXIST; setKeyByLink(c, c->db, key, valref, setkey_flags, &link);