diff --git a/src/bgiteration.c b/src/bgiteration.c
index bb514daed1e..8d635dbff0f 100644
--- a/src/bgiteration.c
+++ b/src/bgiteration.c
@@ -13,9 +13,6 @@
 #include "mutexqueue.h"
 #include "server.h"
 
-// Just for the moment, until https://github.com/valkey-io/valkey/issues/3450 is resolved
-// clang-format off
-
 int getFlushCommandFlags(client *c, int *flags);                                                        // in db.c
 uint64_t dictObjHash(const void *key);                                                                  // in server.c
 int dictObjKeyCompare(const void *key1, const void *key2);                                              // in server.c
@@ -23,33 +20,22 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid);
 robj *createStringObjectWithKeyAndExpire(const char *ptr, size_t len, const sds key, long long expire); // in object.c
 
 
-// Non-public hashtable/kvstore functions...
-hashtableIterator *kvstoreInternalIteratorGetCurrentHashtableIterator(kvstoreIterator *kvs_it);
-
-
-static bool receiveItemsBackFromOneIterator(bgIterator *it); // in bgiteration.c - forward declaration
-
-// ################  TEMP COMPILE HACKS   ###########################
-// Issue found.  server.db has changed from an array of db to an array of pointers to db (change all refs to server.db)
-// Issue: iterators (kvstore/hashtable) are not safe across event loop invocations.  Hashtable (kvstore?) needs to track and maintain safe iterators.
+static bool receiveItemsBackFromOneIterator(bgIterator *it);
 
 
-// Don't think there's any current need for this...
+// Future extendability
 static bool ignoreKeyForSave(const_sds key) {
     UNUSED(key);
     return false;
 }
 
-//------- END OF COMPILE HACKS -------------------
-
-
 // Returns true if the cmd is a script command that may replicate.
 static bool isScriptCallWriteCmd(struct serverCommand *cmd) {
     return ((cmd->proc == fcallCommand) || (cmd->proc == evalCommand) || (cmd->proc == evalShaCommand));
 }
 
-// The PFCOUNT command (which does NOT have the CMD_WRITE flag) modifies the underlying string and
-//  is replicated as a write.  So it needs to be detected and handled specially.
+/* The PFCOUNT command (which does NOT have the CMD_WRITE flag) modifies the underlying string and
+ * is replicated as a write.  So it needs to be detected and handled specially. */
 static bool isWriteCmd(struct serverCommand *cmd) {
     return ((cmd->flags & CMD_WRITE) || (cmd->proc == pfcountCommand) || (cmd->proc == execCommand) || (isScriptCallWriteCmd(cmd)));
 }
@@ -60,16 +46,19 @@ static bool isDeleteCmd(struct serverCommand *cmd) {
 }
 
 
+/* This utility utilizes the main thread and background threads for processing.  The API is split,
+ * with some of the functions intended for the main thread and others intended for the background
+ * clients.  This sanity check ensures that we maintain thread safety, calling the API as intended. */
 static bool onValkeyMainThread(void) {
-    // Modules interact with the main thread using a mutex.  If a module owns the mutex, consider
-    //  that equivalent to being on the main thread.
+    /* Modules interact with the main thread using a mutex.  If a module owns the mutex, consider
+     *  that equivalent to being on the main thread. */
     bool inModule = (atomic_load_explicit(&server.module_gil_acquired, memory_order_relaxed) == 0);
     return (inModule || pthread_equal(server.main_thread_id, pthread_self()) != 0);
 }
 
+
 /* Parse a parameters robj, extracting a valid DBID.
- * Returns FALSE if DBID isn't valid.
- */
+ * Returns FALSE if DBID isn't valid. */
 static bool getDbIdFromRobj(robj *obj, int *db_id) {
     long long value;
     if (getLongLongFromObject(obj, &value) != C_OK) return false;
@@ -79,22 +68,20 @@ static bool getDbIdFromRobj(robj *obj, int *db_id) {
 }
 
 /* Parse the parameters of the COPY command, extracting the target DBID.
- * Returns FALSE if the command would not run.
- */
+ * Returns FALSE if the command would not run. */
 static bool getTargetDbIdForCopyCommand(int argc, robj **argv, int selected_dbid, int *target_dbid) {
     const int COPY_COMMAND_OPTIONAL_ARG_START_INDEX = 3;
 
     *target_dbid = selected_dbid;
 
-    for (int i = COPY_COMMAND_OPTIONAL_ARG_START_INDEX;  i < argc;  i++) {
+    for (int i = COPY_COMMAND_OPTIONAL_ARG_START_INDEX; i < argc; i++) {
         if (!strcasecmp((char *)objectGetVal(argv[i]), "replace")) {
             continue;
         } else if (!strcasecmp((char *)objectGetVal(argv[i]), "db") && (i + 1 < argc)) {
             /* Note the parsing here needs to perfectly match what we have in Valkey OSS for COPY.
              * The following command is considered OK by Valkey 8.1 so we can't return here, but
              * must continue to parse till the last db which is the one that's effectively used.
-             *    COPY key1 key2 db 1 db 2 db 3    // (This will use db 3)
-             */
+             *    COPY key1 key2 db 1 db 2 db 3    (This will use db 3) */
             if (!getDbIdFromRobj(argv[i + 1], target_dbid)) {
                 return false; // parse failure
             }
@@ -108,8 +95,7 @@ static bool getTargetDbIdForCopyCommand(int argc, robj **argv, int selected_dbid
 
 /* Get parameters for the SWAPDB command.
  * The optional permission_client allows for checking of a client's permission for swapdb.
- * Returns true if command would be executed.
- */
+ * Returns true if command would be executed. */
 static bool getParamsForSwapdb(int argc, robj **argv, client *permission_client, int *id1_p, int *id2_p) {
     static struct serverCommand *swapdb_cmd = NULL;
 
@@ -132,7 +118,7 @@ static bool getParamsForSwapdb(int argc, robj **argv, client *permission_client,
     if (getLongLongFromObject(argv[2], &dbid2) != C_OK) return false;
     if (dbid1 < 0 || dbid1 >= server.dbnum) return false;
     if (dbid2 < 0 || dbid2 >= server.dbnum) return false;
-    if (dbid1 == dbid2) return false;  // Valid, but doesn't do anything
+    if (dbid1 == dbid2) return false; // Valid, but doesn't do anything
 
     *id1_p = (int)dbid1;
     *id2_p = (int)dbid2;
@@ -141,8 +127,7 @@ static bool getParamsForSwapdb(int argc, robj **argv, client *permission_client,
 
 /* Get parameters for the SELECT command.
  * The optional permission_client allows for checking of a client's permission for select.
- * Returns true if command would be executed.
- */
+ * Returns true if command would be executed. */
 static bool getParamsForSelect(int argc, robj **argv, client *permission_client, int *dbid_p) {
     static struct serverCommand *select_cmd = NULL;
 
@@ -177,13 +162,13 @@ static void resumeReshahForKvsHashtable(kvstore *kvs, int didx) {
     if (ht != NULL) hashtableResumeRehashing(ht);
 }
 
+
 /* DictType for SDS->ptr.  The SDS is referenced, no destructor. */
 static dictType sdsrefToPtrDictType = {
     .entryGetKey = dictEntryGetKey,
     .hashFunction = dictSdsHash,
     .keyCompare = dictSdsKeyCompare,
-    .entryDestructor = zfree
-};
+    .entryDestructor = zfree};
 
 
 /* Wrap decrRefCount() so that it can be used as a callback requiring void. */
@@ -204,20 +189,27 @@ static sds createSdsFromClientArgv(int argc, robj **argv) {
 }
 
 
-//###########################################################################
+// ###########################################################################
 
 
 /* bgIteration internal (compile time) configuration values */
 enum {
-    BGITER_EARLY_ITERATE_DICT_INITIAL_SIZE = 16384,    // Prevent initial rehashing
-    BGITER_MAX_CLONE_ITEM_BYTES = 512,                 // Max size item to clone
-    BGITER_MAX_CLONE_POOL_BYTES = (1 * 1024 * 1024),   // Total limit for all cloned items
-    BGITER_QUEUE_INCREASE_INCR = 100,                  // Step size when increasing queue target
-    BGITER_CYCLE_DELAY_MS = 2,                         // Delay between calls on bgIteration timer
-    BGITER_CYCLE_BUDGET_MS = 1,                        // Normal time limit for timer processing
-    BGITER_CYCLE_BUDGET_MAX_MS = 10                    // Maximum time limit when starvation seen
+    BGITER_EARLY_ITERATE_DICT_INITIAL_SIZE = 16384,  // Prevent initial rehashing
+    BGITER_MAX_CLONE_ITEM_BYTES = 512,               // Max size item to clone
+    BGITER_MAX_CLONE_POOL_BYTES = (1 * 1024 * 1024), // Total limit for all cloned items
+    BGITER_QUEUE_INCREASE_INCR = 100,                // Step size when increasing queue target
+    BGITER_CYCLE_DELAY_MS = 2,                       // Delay between calls on bgIteration timer
+    BGITER_CYCLE_BUDGET_MS = 1,                      // Normal time limit for timer processing
+    BGITER_CYCLE_BUDGET_MAX_MS = 10                  // Maximum time limit when starvation seen
 };
 
+// dbEntry metadata
+typedef struct {
+    uint32_t iterator_epoch; // iterator epoch of last modification
+} bgIterationEntryMetadata;
+static_assert(sizeof(bgIterationEntryMetadata) == BGITERATION_ENTRY_METADATA_SIZE, "");
+
+
 // These can be tweaked by unit tests
 static int bgiter_max_clone_item_bytes = BGITER_MAX_CLONE_ITEM_BYTES;
 static int bgiter_max_clone_pool_bytes = BGITER_MAX_CLONE_POOL_BYTES;
@@ -237,6 +229,22 @@ typedef enum {
     BGITERATION_TYPE_CLUSTERSLOT
 } bgIterationType;
 
+
+/* Flag indicates that a consistent iteration is required.  This is used to create a point-in-time
+ * iteration.  The iteration client will see all keys AS THEY EXISTED at the time when the iterator
+ * was created.
+ * Note:  The DBID provided with the DICTENTRY events is the original DBID (at the time of iteration
+ *        start).  SWAPDB events are NOT provided during a consistent iteration.  */
+#define BGITERATOR_FLAG_CONSISTENT (1 << 0)
+
+/* Flag indicating that the replication stream for keys which have already been processed should be
+ * forwarded to the iteration client.  Used for non-consistent iteration to track changes
+ * to keys already processed.  By tracking changes, this allows an non-consistent iteration client
+ * to achieve a consistent view at the END of the iteration.
+ * NOTE:  Replication events will be provided ordered and synchronized with any SWAPDB events. */
+#define BGITERATOR_FLAG_REPLICATION (1 << 1)
+
+
 /* Extensions to bgIteratorItemType.  These enumerations are used internally, and are not part of
  *  the published interface.  These allow for extensibility in the internal information-passing
  *  between the Valkey main thread and the iteration client thread. */
@@ -253,19 +261,13 @@ typedef struct {
     bgIterator *iter;
 } bgIteratorItemExtClose;
 
-/* Used for dictEntryPtrDictType. This dict grows and shrinks constantly during the iteration.
- * There is no point to rehash it all the time. */
-static int neverShrink(size_t moreMem, double usedRatio) {
-    UNUSED(moreMem);
-    return (usedRatio > 0.5); // Return true only if expanding
-}
 
-// A dictionary with a pointer (itself) as a key (the address pointed to is NOT referenced).
-//  Nothing is duplicated, this is a very fast dictionary, but potentially unsafe if the original
-//  items are deleted or moved.
-// WARNING:  Can't have active defrag running!  It might reallocate memory blocks, swapping their
-//           pointer values!  A check must be made in active defrag to ensure that no iteration is
-//           active.
+/* A dictionary with a pointer (itself) as a key (the address pointed to is NOT referenced).
+ * Nothing is duplicated, this is a very fast dictionary, but potentially unsafe if the original
+ * items are deleted or moved.
+ * WARNING:  This needs to maintain safety with things that may move the object.
+ *   + In db.c, if the object is reallocatd, bgIteration_updateDbEntryPtr() is called.
+ *   + In defrag.c, we don't defrag if there are multiple references (and we incr the refcount). */
 
 // Thomas Wang's 64-bit mix
 static uint64_t pointerHash(const void *key) {
@@ -284,81 +286,102 @@ static int pointerCompare(const void *key1, const void *key2) {
     return key1 == key2;
 }
 
+// This dict grows and shrinks constantly during the iteration.  Avoid constant rehashing.
+static int neverShrink(size_t moreMem, double usedRatio) {
+    UNUSED(moreMem);
+    return (usedRatio > 0.5); // Return true only if expanding
+}
+
 static dictType dictEntryPtrDictType = {
     .entryGetKey = dictEntryGetKey,
     .hashFunction = pointerHash,
     .keyCompare = pointerCompare,
     .resizeAllowed = neverShrink,
-    .entryDestructor = zfree
-};
+    .entryDestructor = zfree};
 
-// A TEMP set of robj's (of type sds).  This is only for temporary sets as the robj's are not
-//  ref-counted at insertion/deletion.
-static hashtableType tempKeysetHashtableType = {
-    .hashFunction = dictObjHash,
-    .keyCompare = dictObjKeyCompare
-};
-
-typedef struct genericIterator genericIterator;
-typedef void   (*iteratorReleaseFunc)       (genericIterator *genIt);
-typedef fifo * (*iteratorGetEntriesFunc)    (genericIterator *genIt, int *orig_dbid, int *cur_dbid);
-typedef void   (*iteratorSwapDbFunc)        (genericIterator *genIt, int db1, int db2);
-typedef void   (*iteratorFlushDbFunc)       (genericIterator *genIt, int cur_dbid);
-typedef bool   (*iteratorHasPassedItemFunc) (genericIterator *genIt, const_sds key, int cur_dbid);
-typedef int    (*iteratorOriginalDbFunc)    (genericIterator *genIt, int cur_dbid);
-typedef bool   (*iteratorIsKeyInScopeFunc)  (genericIterator *genIt, const_sds key);
+static hashtableType dbEntryPtrHashtableType = {
+    .hashFunction = pointerHash,
+    .keyCompare = pointerCompare,
+    .resizeAllowed = neverShrink};
 
-// Function pointers supporting polymorphic iterator implementation
-struct genericIterator {
-    iteratorReleaseFunc         release;
-    iteratorGetEntriesFunc      getEntries;
-    iteratorSwapDbFunc          swapDb;
-    iteratorFlushDbFunc         flushDb;
-    iteratorHasPassedItemFunc   hasPassedItem;
-    iteratorOriginalDbFunc      originalDb;
-    iteratorIsKeyInScopeFunc    isKeyInScope;
-};
 
+// A free list for bgIteratorItem's - avoids churning zmalloc calls
 typedef struct itemListNode {
     struct itemListNode *next;
 } itemListNode;
 
+static const int FREE_ITEM_MAX = 500;
 static itemListNode *freeItemStackHead = NULL;
-
-static void itemFreeList_returnItemBackToFreeList(bgIteratorItem* item) {
-    itemListNode *freedNode = (itemListNode*)item;
-    freedNode->next = freeItemStackHead;
-    freeItemStackHead = freedNode;
+static int freeItemStackCount = 0;
+
+static void itemFreeList_returnItemBackToFreeList(bgIteratorItem *item) {
+    itemListNode *freedNode = (itemListNode *)item;
+    if (freeItemStackCount < FREE_ITEM_MAX) {
+        freedNode->next = freeItemStackHead;
+        freeItemStackHead = freedNode;
+        freeItemStackCount++;
+    } else {
+        zfree(freedNode);
+    }
 }
 
+// Pop a free node from the free list or allocate if none free
 static bgIteratorItem *itemFreeList_getElementOrAllocate(void) {
- 
     bgIteratorItem *item;
-    // Pop a free node from the free list or allocate if none free
     if (freeItemStackHead) {
-        item = (bgIteratorItem*)freeItemStackHead;
+        item = (bgIteratorItem *)freeItemStackHead;
         freeItemStackHead = freeItemStackHead->next;
-        if (freeItemStackHead) {
-            valkey_prefetch(freeItemStackHead);
-        }
-    }
-    else {
+        freeItemStackCount--;
+        if (freeItemStackHead) valkey_prefetch(freeItemStackHead);
+    } else {
+        serverAssert(freeItemStackCount == 0);
         // Create new listNode and item
         item = zmalloc(sizeof(bgIteratorItem));
     }
     return item;
 }
- 
-static void itemFreeList_release(void) { 
-    while(freeItemStackHead) { 
+
+static void itemFreeList_release(void) {
+    while (freeItemStackHead) {
         itemListNode *node = freeItemStackHead;
         freeItemStackHead = node->next;
-        zfree((bgIteratorItem*)node);
+        freeItemStackCount--;
+        zfree(node);
     }
+    serverAssert(freeItemStackCount == 0);
 }
 
-// This struct is used across threads.  Unless otherwise noted, the fields are initialized at
-//  iterator creation (within the main thread) and are read-only by the client thread.
+
+/* A TEMPORARY set of robj's (of type sds).  This is only for temporary sets as the robj's are not
+ * ref-counted at insertion/deletion. */
+static hashtableType tempKeysetHashtableType = {
+    .hashFunction = dictObjHash,
+    .keyCompare = dictObjKeyCompare};
+
+
+typedef struct genericIterator genericIterator;
+typedef void (*iteratorReleaseFunc)(genericIterator *genIt);
+typedef fifo *(*iteratorGetEntriesFunc)(genericIterator *genIt, int *orig_dbid, int *cur_dbid);
+typedef void (*iteratorSwapDbFunc)(genericIterator *genIt, int db1, int db2);
+typedef void (*iteratorFlushDbFunc)(genericIterator *genIt, int cur_dbid);
+typedef bool (*iteratorHasPassedItemFunc)(genericIterator *genIt, const_sds key, int cur_dbid);
+typedef int (*iteratorOriginalDbFunc)(genericIterator *genIt, int cur_dbid);
+typedef bool (*iteratorIsKeyInScopeFunc)(genericIterator *genIt, const_sds key);
+
+// Function pointers supporting polymorphic iterator implementation
+struct genericIterator {
+    iteratorReleaseFunc release;
+    iteratorGetEntriesFunc getEntries;
+    iteratorSwapDbFunc swapDb;
+    iteratorFlushDbFunc flushDb;
+    iteratorHasPassedItemFunc hasPassedItem;
+    iteratorOriginalDbFunc originalDb;
+    iteratorIsKeyInScopeFunc isKeyInScope;
+};
+
+
+/* This struct is used across threads.  Unless otherwise noted, the fields are initialized at
+ *  iterator creation (within the main thread) and are read-only by the client thread. */
 struct bgIterator {
     sds name;                        // Iterator name
     bgIteratorReplDoneFunc repldone; // Optional repldone function to be run on the main thread
@@ -371,35 +394,33 @@ struct bgIterator {
 
     genericIterator *keyset_iter; // Low-level iterator (polymorphic)
 
-    dict *early_iterate_entries; // Used to keep track of what items have already been iterated
-                                 // over by out-of-order expedited process, ensuring a bgIterator 
-                                 // does not try to reprocess items.
-                                 // Used only by main thread.
-                                 // dictEntry -> NULL
+    /* A set of dbEntry, compared by pointer.  Used to track items which have already been iterated
+     * over by out-of-order expedited processing.  Ensures a bgIterator does not try to reprocess
+     * items.  Used only by main thread. */
+    hashtable *early_iterate_entries;
 
     mutexQueue *items_for_iterator; // Created/Destroyed in main thread, used in both (threadsafe)
 
     mutexQueue *return_to_valkey; // Queue of items to be returned to the Valkey main thread (threadsafe)
-    
+
     unsigned int item_count_target; // Used only by main thread
 
-    bgIteratorItem *volatile current_item; // current_item is normally only used in the iteration client.
-                                           //  It's marked volatile here only to support snooping from the
-                                           //  main thread when handling a FLUSHDB command.  This prevents
-                                           //  the compiler from generating code which might read the
-                                           //  pointer multiple times (when it's coded to read only once).
-                                           // Also - this syntax is for a volatile POINTER to a
-                                           //  non-volatile item.  "volatile" at the beginning of the
-                                           //  declaration, would indicate a (non-volatile) pointer to a
-                                           //  volatile item.
+    /* current_item is normally only used in the iteration client.  It's marked volatile here only
+     * to support snooping from the main thread when handling a FLUSHDB command.  This prevents the
+     * compiler from generating code which might read the pointer multiple times (when it's coded to
+     * read only once).
+     * (A volatile POINTER to a non-volatile item.) */
+    bgIteratorItem *volatile current_item;
 
     bool client_is_active; // Set to true when client performs 1st read
-    bool completed;        // Set to true in main thread when last item from iteration has
-                           //  been queued to the client.  No additional items will be
-                           //  enqueued to the client after this has been set.
 
-    volatile bool terminated; // Set to true in main thread when iteration is to be killed
-                              // Set to true in iteration client when it decides to end early
+    /* Set to true in main thread when last item from iteration has been queued to the client.  No
+     * additional items will be enqueued to the client after this has been set. */
+    bool completed;
+
+    /* Set to true in main thread when iteration is to be killed.
+     * Set to true in iteration client when it decides to end early. */
+    volatile bool terminated;
 
     bool cur_cmd_may_replicate; // Used only in main thread during command processing
 
@@ -416,10 +437,10 @@ struct bgIterator {
     unsigned long dbentry_clones_processed; // Updated by client thread
     monotime monotonic_start_time;          // Time iteration started
 
-    volatile monotime monotonic_item_start_time; // The item start time is set in the iteration client.  It is
-                                                 //  marked volatile as it can be read from the main thread by
-                                                 //  bgIteratorGetStatus.  If 0, this indicates that the
-                                                 //  iteration client is waiting for an item to process.
+    /* The item start time is set in the iteration client.  It is marked volatile as it can be read
+     * from the main thread by bgIteratorGetStatus.  If 0, this indicates that the iteration client
+     * is waiting for an item to process. */
+    volatile monotime monotonic_item_start_time;
 };
 
 
@@ -431,19 +452,19 @@ static dict *nameToIterator; // bgIterator->name -> bgIterator
 // Global, across all iterators, dict contains a dbEntry pointer -> ref count
 static dict *inUseEntries; // dbEntry -> ref count
 
-// Key values in the current command which don't exist in the DB yet.  Needed for determination of
-//  replication for NON-consistent iterations.
+/* Key values in the current command which don't exist in the DB yet.  Needed for determination of
+ * replication for NON-consistent iterations. */
 static list *curCmdMissingKeys; // list of robj
 
-// A counter of the total amount of memory used for buffered replication data.
-//  This amount is excluded when computing the need for evictions.
+/* A counter of the total amount of memory used for buffered replication data.  This amount is
+ * excluded when computing the need for evictions. */
 static ssize_t bufferedReplicationBytes;
 
 // Memory pool to track current allocated memory of cloned items (in bytes)
 static ssize_t bgiteration_current_clone_memory_pool_size;
 
-// Snapshot of the last queue size to seed the next queue
-// We assume all bgIterators consume items at the same rate
+/* Snapshot of the last queue size to seed the next queue.  We assume all bgIterators consume items
+ * at roughly the same rate. */
 static int last_item_count_target;
 
 // Eventloop ID of the timerproc (or AE_DELETED_EVENT_ID)
@@ -453,44 +474,44 @@ static long long bgIterator_timeproc_id;
 static uint32_t bgIteration_epoch = 1;
 
 
-// BgIteration debug captures BgIteration activity to a large sds buffer.  When an iterator is
-//  completed, the entire buffer is written to a file in the current working directory.  Note that
-//  memory must be available for the ENTIRE debug in memory.  This isn't captured incrementally to
-//  a file as the file I/O is more likely to affect timing.
-// Future implementation: the current design is most useful for a single iterator.  When items are
-//  queued to an iterator, the iterator name is not recorded (to save space).
-// Developer note: using a CONST value here allows the compiler to completely remove all of the
-//  debugging code at compile time.  There is no run-time performance overhead when set to FALSE.
-//  This is essentially like an IFDEF, however, it's better as it forces the compiler to validate
-//  syntax.
+/* BgIteration debug captures BgIteration activity to a large sds buffer.  When an iterator is
+ * completed, the entire buffer is written to a file in the current working directory.  Note that
+ * memory must be available for the ENTIRE debug in memory.  This isn't captured incrementally to
+ * a file as the file I/O is more likely to affect timing.
+ *
+ * Future implementation: the current design is most useful for a single iterator.  When items are
+ * queued to an iterator, the iterator name is not recorded (to save space).
+ *
+ * Developer note: using a CONST value here allows the compiler to completely remove all of the
+ * debugging code at compile time.  There is no run-time performance overhead when set to FALSE.
+ * This is essentially like an IFDEF, however, it's better as it forces the compiler to validate
+ * syntax. */
 static const bool BGITERATION_DEBUG = false; // DO NOT SUBMIT WITH THIS SYMBOL SET TO TRUE!
 static sds debugBuffer;
 
 
-
-//=============================================================================================
-//                        Full Scan Iterator
-//=============================================================================================
-/* The full scan iterator performs the actual iteration over the Valkey keyset.  The iterator is
+/* =============================================================================================
+ *                        Full Scan Iterator
+ * =============================================================================================
+ * The full scan iterator performs the actual iteration over the Valkey keyset.  The iterator is
  * only used from within the Valkey main thread.  Iteration proceeds one DB at a time, based on
  * the DB ordering at the time of iterator creation.  Each time the iterator returns items, all
- * of the dictionary entries from a single hash bucket are returned.
- */
+ * of the dictionary entries from a single hash bucket are returned. */
 
 struct fullScanIterator {
     genericIterator callbacks; // (must be first item)
 
-    // Array of mapping from original DB ID (at the time of iteration start) to that DB's
-    //  current index.  So, if the DB which was DB-0 is now at index 6, orig_to_cur_db[0]==6.
+    /* Array of mapping from original DB ID (at the time of iteration start) to that DB's current
+     * index.  So, if the DB which was DB-0 is now at index 6, orig_to_cur_db[0]==6. */
     int *orig_to_cur_db;
 
-    // The reverse of the above array.  This maps a current DB index to its original index
-    //  (at the time of iteration start).
+    /* The reverse of the above array.  This maps a current DB index to its original index (at the
+     * time of iteration start). */
     int *cur_to_orig_db;
 
-    // This is the DB we are currently iterating over.  This is relative to the ORIGINAL
-    //  DB ordering, at the time of iterator creation.  Iteration proceeds from 0..N based on
-    //  the original ordering.
+    /* This is the DB we are currently iterating over.  This is relative to the ORIGINAL DB
+     * ordering, at the time of iterator creation.  Iteration proceeds from 0..N based on the
+     * original ordering. */
     int iter_db;
 
     // Iterator for the DB orig_to_cur_db[iter_db]
@@ -568,7 +589,7 @@ static void fullScanIteratorSwapDb(genericIterator *genIt, int db1, int db2) {
 
 static void fullScanIteratorFlushDb(genericIterator *genIt, int cur_dbid) {
     struct fullScanIterator *it = (struct fullScanIterator *)genIt;
-    int orig_db = it->cur_to_orig_db[cur_dbid];
+    int orig_db = (cur_dbid == -1) ? it->iter_db : it->cur_to_orig_db[cur_dbid];
     if (orig_db == it->iter_db) {
         // We are currently iterating on the DB that's being flushed.
         it->kvs = NULL;
@@ -577,7 +598,7 @@ static void fullScanIteratorFlushDb(genericIterator *genIt, int cur_dbid) {
 }
 
 static bool fullScanIteratorHasPassedItem(genericIterator *genIt, const_sds key, int cur_dbid) {
-    struct fullScanIterator *it = (struct fullScanIterator *) genIt;
+    struct fullScanIterator *it = (struct fullScanIterator *)genIt;
     int orig_dbid = it->cur_to_orig_db[cur_dbid];
 
     if (orig_dbid < it->iter_db) return true;  // Entire DB has already been processed
@@ -586,8 +607,8 @@ static bool fullScanIteratorHasPassedItem(genericIterator *genIt, const_sds key,
 
     if (it->kvs == NULL) return true; // just finished this DB
 
-    // We're in the middle of processing a DB.  In cluster-mode, the DB is divided into 1 hashtable
-    //  per slot.  In cluster-mode-disabled, we treat all keys as in slot 0.
+    /* We're in the middle of processing a DB.  In cluster-mode, the DB is divided into 1 hashtable
+     * per slot.  In cluster-mode-disabled, we treat all keys as in slot 0. */
     int keySlot = server.cluster_enabled ? getKeySlot((sds)key) : 0;
     if (keySlot < it->kvs_didx) return true;
     if (keySlot > it->kvs_didx) return false;
@@ -615,7 +636,7 @@ static bool fullScanIteratorIsKeyInScope(genericIterator *genIt, const_sds key)
     return true; // All keys are in scope
 }
 
-static genericIterator * fullScanIteratorCreate(void) {
+static genericIterator *fullScanIteratorCreate(void) {
     struct fullScanIterator *it = zmalloc(sizeof(struct fullScanIterator));
     it->orig_to_cur_db = zmalloc(sizeof(int) * server.dbnum);
     it->cur_to_orig_db = zmalloc(sizeof(int) * server.dbnum);
@@ -638,13 +659,11 @@ static genericIterator * fullScanIteratorCreate(void) {
 }
 
 
-
-//=============================================================================================
-//                        Cluster Slot Iterator
-//=============================================================================================
-/* The cluster slot iterator performs iteration over one cluster slot of the Valkey keyset.  The
- * iterator is only used from within the Valkey main thread.
- */
+/* =============================================================================================
+ *                        Cluster Slot Iterator
+ * =============================================================================================
+ * The cluster slot iterator performs iteration over one cluster slot of the Valkey keyset.  The
+ * iterator is only used from within the Valkey main thread. */
 struct clusterSlotIterator {
     genericIterator callbacks; // (must be first item)
 };
@@ -654,7 +673,7 @@ static void clusterSlotIteratorRelease(genericIterator *genIt) {
     serverAssert(false); // Not yet implemented
 }
 
-static fifo * clusterSlotIteratorGetEntries(genericIterator *genIt, int *orig_dbid, int *cur_dbid) {
+static fifo *clusterSlotIteratorGetEntries(genericIterator *genIt, int *orig_dbid, int *cur_dbid) {
     UNUSED(genIt);
     UNUSED(orig_dbid);
     UNUSED(cur_dbid);
@@ -696,7 +715,7 @@ static bool clusterSlotIteratorIsKeyInScope(genericIterator *genIt, const_sds ke
     serverAssert(false); // Not yet implemented
 }
 
-static genericIterator * clusterSlotIteratorCreate(const int *slots, size_t slots_count) {
+static genericIterator *clusterSlotIteratorCreate(const int *slots, size_t slots_count) {
     struct clusterSlotIterator *it = zmalloc(sizeof(struct clusterSlotIterator));
     it->callbacks.release = clusterSlotIteratorRelease;
     it->callbacks.getEntries = clusterSlotIteratorGetEntries;
@@ -714,44 +733,43 @@ static genericIterator * clusterSlotIteratorCreate(const int *slots, size_t slot
 }
 
 
+/* =============================================================================================
+ *                        General iteration support (across all iterators)
+ * ============================================================================================= */
 
-//=============================================================================================
-//                        General iteration support (across all iterators)
-//=============================================================================================
-
-// While an item is potentially in use by a background thread, we can't have
-//  rehashing by the main thread.  Returns true if rehashing was paused.
+/* While an item is potentially in use by a background thread, we can't have rehashing by the main
+ * thread.  Returns true if rehashing was paused. */
 static bool pauseRehashing(dbEntry *de) {
     switch (de->encoding) {
-        case OBJ_ENCODING_HASHTABLE: {  // SET or HASH
-            hashtable *ht = objectGetVal(de);
-            hashtablePauseRehashing(ht);
-            return true;
-        }
-        case OBJ_ENCODING_SKIPLIST: {   // SORTED SET
-            zset *zs = objectGetVal(de);
-            hashtablePauseRehashing(zs->ht);
-            return true;
-        }
-        default:
-            return false;
+    case OBJ_ENCODING_HASHTABLE: { // SET or HASH
+        hashtable *ht = objectGetVal(de);
+        hashtablePauseRehashing(ht);
+        return true;
+    }
+    case OBJ_ENCODING_SKIPLIST: { // SORTED SET
+        zset *zs = objectGetVal(de);
+        hashtablePauseRehashing(zs->ht);
+        return true;
+    }
+    default:
+        return false;
     }
 }
 
 static void resumeRehashing(dbEntry *de) {
     switch (de->encoding) {
-        case OBJ_ENCODING_HASHTABLE: {  // SET or HASH
-            hashtable *ht = objectGetVal(de);
-            hashtableResumeRehashing(ht);
-            break;
-        }
-        case OBJ_ENCODING_SKIPLIST: {   // SORTED SET
-            zset *zs = objectGetVal(de);
-            hashtableResumeRehashing(zs->ht);
-            break;
-        }
-        default:
-            break;
+    case OBJ_ENCODING_HASHTABLE: { // SET or HASH
+        hashtable *ht = objectGetVal(de);
+        hashtableResumeRehashing(ht);
+        break;
+    }
+    case OBJ_ENCODING_SKIPLIST: { // SORTED SET
+        zset *zs = objectGetVal(de);
+        hashtableResumeRehashing(zs->ht);
+        break;
+    }
+    default:
+        break;
     }
 }
 
@@ -798,21 +816,22 @@ static ssize_t computeStringDbEntrySize(dbEntry *de) {
 
 
 static dbEntry *tryCloneDbEntry(dbEntry *de) {
-    if (bgiteration_current_clone_memory_pool_size + bgiter_max_clone_item_bytes 
-            > bgiter_max_clone_pool_bytes) {
-        return NULL;
-    }
+    if (bgiteration_current_clone_memory_pool_size + bgiter_max_clone_item_bytes >
+        bgiter_max_clone_pool_bytes) return NULL;
 
-    // Future optimization: Incorporate small ziplists, sorted sets, etc.
-    // OBJ_ENCODING_INT is omitted only because there isn't a good API for cloning it yet.
+    /* Future optimization: Incorporate small ziplists, sorted sets, etc.
+     * OBJ_ENCODING_INT is omitted only because there isn't a good API for cloning it yet. */
     if (de->type == OBJ_STRING && de->encoding != OBJ_ENCODING_INT) {
         ssize_t itemSize = computeStringDbEntrySize(de);
 
         if (itemSize <= bgiter_max_clone_item_bytes) {
             bgiteration_current_clone_memory_pool_size += itemSize;
-            dbEntry *clone = createStringObjectWithKeyAndExpire((char *)objectGetVal(de), sdslen(objectGetVal(de)), objectGetKey(de), objectGetExpire(de));
-            ((bgIterationEntryMetadata *)objectGetMetadata(clone))->iterator_epoch
-                    = ((bgIterationEntryMetadata *)objectGetMetadata(de))->iterator_epoch;
+            dbEntry *clone = createStringObjectWithKeyAndExpire((char *)objectGetVal(de),
+                                                                sdslen(objectGetVal(de)),
+                                                                objectGetKey(de),
+                                                                objectGetExpire(de));
+            ((bgIterationEntryMetadata *)objectGetMetadata(clone))->iterator_epoch =
+                ((bgIterationEntryMetadata *)objectGetMetadata(de))->iterator_epoch;
             return clone;
         }
     }
@@ -820,17 +839,16 @@ static dbEntry *tryCloneDbEntry(dbEntry *de) {
     return NULL;
 }
 
-
 static void freeClonedDictEntry(dbEntry *clonedEntry) {
     serverAssert(clonedEntry->type == OBJ_STRING);
 
-    // Add back to memory pool
     bgiteration_current_clone_memory_pool_size -= computeStringDbEntrySize(clonedEntry);
 
     decrRefCount(clonedEntry);
 }
 
-static bgIteratorItem * makeDbEntryItem(dbEntry *de, int dbid, bool isCloned) {
+
+static bgIteratorItem *makeDbEntryItem(dbEntry *de, int dbid, bool isCloned) {
     if (!isCloned) incrementEntryInuse(de);
 
     bgIteratorItem *item = itemFreeList_getElementOrAllocate();
@@ -843,8 +861,8 @@ static bgIteratorItem * makeDbEntryItem(dbEntry *de, int dbid, bool isCloned) {
     return item;
 }
 
-static robj ** cloneRobjArray(int argc, robj **argv) {
-    robj **newarray = zmalloc(sizeof(robj*) * argc);
+static robj **cloneRobjArray(int argc, robj **argv) {
+    robj **newarray = zmalloc(sizeof(robj *) * argc);
     for (int i = 0; i < argc; i++) {
         newarray[i] = argv[i];
         incrRefCount(argv[i]);
@@ -867,44 +885,43 @@ static void returnCurrentItemToValkey(bgIterator *it) {
     if (item == NULL) return;
 
     switch (item->type) {
-        case BGITERATOR_ITEM_DBENTRY:
-            it->dbentries_processed++;
-            if (item->u.dbe.is_cloned) it->dbentry_clones_processed++;
-            mutexQueueAdd(it->return_to_valkey, item);
-            break;
-        case BGITERATOR_ITEM_REPLICATION:
-            it->replication_processed++;
-            mutexQueueAdd(it->return_to_valkey, item);
-            break;
-        case BGITERATOR_ITEM_SWAPDB:
-            it->swapdb_processed++;
-            mutexQueueAdd(it->return_to_valkey, item);
-            break;
-        case BGITERATOR_ITEM_FLUSHDB:
-            it->flushdb_processed++;
-            mutexQueueAdd(it->return_to_valkey, item);
-            break;
-
-        case BGITERATOR_ITEM_COMPLETE:
-        case BGITERATOR_ITEM_TERMINATED:
-            // These are static and just used to wake the iterator - they should never be returned.
-            serverAssert(false);
-            break;
+    case BGITERATOR_ITEM_DBENTRY:
+        it->dbentries_processed++;
+        if (item->u.dbe.is_cloned) it->dbentry_clones_processed++;
+        mutexQueueAdd(it->return_to_valkey, item);
+        break;
+    case BGITERATOR_ITEM_REPLICATION:
+        it->replication_processed++;
+        mutexQueueAdd(it->return_to_valkey, item);
+        break;
+    case BGITERATOR_ITEM_SWAPDB:
+        it->swapdb_processed++;
+        mutexQueueAdd(it->return_to_valkey, item);
+        break;
+    case BGITERATOR_ITEM_FLUSHDB:
+        it->flushdb_processed++;
+        mutexQueueAdd(it->return_to_valkey, item);
+        break;
+
+    case BGITERATOR_ITEM_COMPLETE:
+    case BGITERATOR_ITEM_TERMINATED:
+        // These are static and just used to wake the iterator - they should never be returned.
+        serverAssert(false);
+        break;
 
-        default:
-            serverAssert(false);
+    default:
+        serverAssert(false);
     }
 
-    // Do this AFTER placing into return_to_valkey.  This is volatile and snooped when there is a
-    //  flushall event.  Don't want an item to be missed.
+    /* Do this AFTER placing into return_to_valkey.  This is volatile and snooped when there is a
+     *  flushall event.  Don't want an item to be missed. */
     it->current_item = NULL;
 }
 
 
-
-//=============================================================================================
-//                        Background Iterator (private)
-//=============================================================================================
+/* =============================================================================================
+ *                        Background Iterator (private)
+ * ============================================================================================= */
 
 static void bgIteratorRelease(bgIterator *it) {
     serverAssert(onValkeyMainThread());
@@ -924,7 +941,7 @@ static void bgIteratorRelease(bgIterator *it) {
     it->keyset_iter->release(it->keyset_iter);
     it->keyset_iter = NULL;
 
-    dictRelease(it->early_iterate_entries);
+    hashtableRelease(it->early_iterate_entries);
     it->early_iterate_entries = NULL;
 
     sdsfree(it->name);
@@ -933,9 +950,9 @@ static void bgIteratorRelease(bgIterator *it) {
 
 
 static bool shouldFeedIteratorMore(bgIterator *it) {
-    return (!it->completed
-         && !it->terminated
-         && mutexQueueLength(it->items_for_iterator) < it->item_count_target);
+    return (!it->completed &&
+            !it->terminated &&
+            mutexQueueLength(it->items_for_iterator) < it->item_count_target);
 }
 
 
@@ -975,10 +992,10 @@ static void feedIterator(bgIterator *it, monotime end_time_us) {
 
         if (dbEntryFifo == NULL) {
             // Iteration of items is complete for this iterator
-            serverAssert(it->dbentries_queued      >= it->dbentries_processed);
-            serverAssert(it->replication_queued    >= it->replication_processed);
-            serverAssert(it->swapdb_queued         >= it->swapdb_processed);
-            serverAssert(it->flushdb_queued        >= it->flushdb_processed);
+            serverAssert(it->dbentries_queued >= it->dbentries_processed);
+            serverAssert(it->replication_queued >= it->replication_processed);
+            serverAssert(it->swapdb_queued >= it->swapdb_processed);
+            serverAssert(it->flushdb_queued >= it->flushdb_processed);
             serverAssert(it->dbentry_clones_queued >= it->dbentry_clones_processed);
 
             // Snapshot queue size to seed next iterator when terminated
@@ -986,10 +1003,10 @@ static void feedIterator(bgIterator *it, monotime end_time_us) {
 
             if (it->iteration_flags & BGITERATOR_FLAG_REPLICATION) {
                 if (!it->client_is_active || (it->dbentries_queued > it->dbentries_processed)) {
-                    // We are done feeding dict entries to the iterator, but before ending the
-                    //  replication processing make sure that the iterator has become active (has
-                    //  started reading) and make sure that all of the dict entries have been processed
-                    //  by the client.
+                    /* We are done feeding dict entries to the iterator, but before ending the
+                     * replication processing make sure that the iterator has become active (has
+                     * started reading) and make sure that all of the dict entries have been
+                     * processed by the client. */
                     break;
                 }
                 if (it->repldone) {
@@ -998,7 +1015,7 @@ static void feedIterator(bgIterator *it, monotime end_time_us) {
                 }
             }
             bgIteratorItem *completionItem = itemFreeList_getElementOrAllocate();
-            *completionItem = (bgIteratorItem){ .type = BGITERATOR_ITEM_COMPLETE };
+            *completionItem = (bgIteratorItem){.type = BGITERATOR_ITEM_COMPLETE};
             if (it->iteration_flags & BGITERATOR_FLAG_REPLICATION) {
                 rdbSaveInfo rsi;
                 completionItem->dbid = (rdbPopulateSaveInfo(&rsi)) ? rsi.repl_stream_db : 0;
@@ -1025,14 +1042,13 @@ static void feedIterator(bgIterator *it, monotime end_time_us) {
             fifoPop(dbEntryFifo, (void **)&de);
 
             // Remove new/modified items during consistent iteration.
-            if (it->iteration_flags & BGITERATOR_FLAG_CONSISTENT
-                    && ((bgIterationEntryMetadata *)objectGetMetadata(de))->iterator_epoch > it->consistent_modification_id) {
+            if (it->iteration_flags & BGITERATOR_FLAG_CONSISTENT &&
+                ((bgIterationEntryMetadata *)objectGetMetadata(de))->iterator_epoch > it->consistent_modification_id) {
                 continue;
             }
 
             // Remove any items which have been processed early
-            if (dictFind(it->early_iterate_entries, de) != NULL) {
-                dictDelete(it->early_iterate_entries, de);
+            if (hashtableDelete(it->early_iterate_entries, de)) {
                 if (BGITERATION_DEBUG) {
                     sds entryString = createEntryString(dbid, de);
                     debugBuffer = sdscatprintf(debugBuffer, "SKIPPING ITEM(early iterate): %s\n", entryString);
@@ -1073,12 +1089,12 @@ static void feedIterator(bgIterator *it, monotime end_time_us) {
 
 
 static bool addEarlyIterationKey(bgIterator *it, dbEntry *earlyEntry, int cur_dbid) {
-    int rc = dictAdd(it->early_iterate_entries, earlyEntry, NULL);
-    serverAssert(rc == DICT_OK);
-  
+    bool wasAdded = hashtableAdd(it->early_iterate_entries, earlyEntry);
+    serverAssert(wasAdded);
+
     int dbid = (it->iteration_flags & BGITERATOR_FLAG_CONSISTENT)
-            ? it->keyset_iter->originalDb(it->keyset_iter, cur_dbid)
-            : cur_dbid;
+                   ? it->keyset_iter->originalDb(it->keyset_iter, cur_dbid)
+                   : cur_dbid;
 
     dbEntry *cloneEntry = tryCloneDbEntry(earlyEntry);
     bool isClonedEntry = (cloneEntry != NULL);
@@ -1088,8 +1104,8 @@ static bool addEarlyIterationKey(bgIterator *it, dbEntry *earlyEntry, int cur_db
     if (isClonedEntry) it->dbentry_clones_queued++;
 
     if (it->iteration_flags & BGITERATOR_FLAG_CONSISTENT) { // JHB - can we optimize here in cluster mode (no swap)
-        // On consistent iteration, SWAPDB events are not provided.  So there is no requirement to
-        //  keep items in order or synchronized with SWAPDB.
+        /* On consistent iteration, SWAPDB events are not provided.  So there is no requirement to
+         * keep items in order or synchronized with SWAPDB. */
         if (BGITERATION_DEBUG) {
             sds entryString = createEntryString(dbid, item->u.dbe.de);
             debugBuffer = sdscatprintf(debugBuffer, "EARLY_1: %s\n", entryString);
@@ -1109,12 +1125,10 @@ static bool addEarlyIterationKey(bgIterator *it, dbEntry *earlyEntry, int cur_db
 
 
 // This expedites a single key and doesn't attempt to avoid expediting through optimization.
-static bool expediteSingleKeyWithoutOptimization(
-        bgIterator *it,
-        int dbid,
-        robj *oKey,
-        hashtable *waitingOnKeys) {
-
+static bool expediteSingleKeyWithoutOptimization(bgIterator *it,
+                                                 int dbid,
+                                                 robj *oKey,
+                                                 hashtable *waitingOnKeys) {
     bool mustBlock = false;
 
     bool iterComplete = it->completed || it->terminated;
@@ -1122,11 +1136,11 @@ static bool expediteSingleKeyWithoutOptimization(
     sds key = objectGetVal(oKey);
     dbEntry *de = dbFind(server.db[dbid], key);
     if (de != NULL) {
-        if (!(iterComplete || it->keyset_iter->hasPassedItem(it->keyset_iter, key, dbid))
-                && (dictFind(it->early_iterate_entries, de) == NULL)) {
+        if (!(iterComplete || it->keyset_iter->hasPassedItem(it->keyset_iter, key, dbid)) &&
+            !hashtableFind(it->early_iterate_entries, de, NULL)) {
             if (addEarlyIterationKey(it, de, dbid)) {
                 mustBlock = true;
-                hashtableAdd(waitingOnKeys, oKey); 
+                hashtableAdd(waitingOnKeys, oKey);
             }
         } else {
             if (isEntryInuseByAnyIterator(de)) {
@@ -1142,12 +1156,11 @@ static bool expediteSingleKeyWithoutOptimization(
 
 // MOVE/COPY are unfortunate special commands.  They work on 2 DBs at once.
 const int MOVE_COMMAND_DBID_ARG_INDEX = 2;
-static bool expediteKeysForMove(
-        bgIterator *it,
-        int dbid,
-        int argc,
-        robj **argv,
-        hashtable *waitingOnKeys) {
+static bool expediteKeysForMove(bgIterator *it,
+                                int dbid,
+                                int argc,
+                                robj **argv,
+                                hashtable *waitingOnKeys) {
     if (argc <= MOVE_COMMAND_DBID_ARG_INDEX) return false;
 
     int destDbid;
@@ -1156,10 +1169,10 @@ static bool expediteKeysForMove(
     bool mustBlock = false;
     robj *key = argv[1];
 
-    // Not looking for special cases to optimize here.  Just try to expedite both src and dest
-    //  keys.  Note that the dest key might exist (and need iteration) but could be expired and
-    //  could be overwritten by MOVE.  In this case, a DEL would replicate due to the expiry.  So
-    //  even if the target is expired, we need to replicate it before executing the command.
+    /* Not looking for special cases to optimize here.  Just try to expedite both src and dest
+     * keys.  Note that the dest key might exist (and need iteration) but could be expired and
+     * could be overwritten by MOVE.  In this case, a DEL would replicate due to the expiry.  So
+     * even if the target is expired, we need to replicate it before executing the command. */
     if (expediteSingleKeyWithoutOptimization(it, dbid, key, waitingOnKeys)) mustBlock = true;
     if (expediteSingleKeyWithoutOptimization(it, destDbid, key, waitingOnKeys)) mustBlock = true;
 
@@ -1169,13 +1182,11 @@ static bool expediteKeysForMove(
 
 
 // MOVE/COPY are unfortunate special commands.  They work on 2 DBs at once.
-static bool expediteKeysForCopy(
-        bgIterator *it,
-        int dbid,
-        int argc,
-        robj **argv,
-        hashtable *waitingOnKeys) {
-
+static bool expediteKeysForCopy(bgIterator *it,
+                                int dbid,
+                                int argc,
+                                robj **argv,
+                                hashtable *waitingOnKeys) {
     int destDbid;
     if (!getTargetDbIdForCopyCommand(argc, argv, dbid, &destDbid)) return false;
 
@@ -1183,8 +1194,8 @@ static bool expediteKeysForCopy(
     robj *srcKey = argv[1];
     robj *destKey = argv[2];
 
-    // Not trying to optimize COPY.  Just expedite source and destination (if it exists).  We
-    //  don't really care if the value is overwritten or not (so no need to parse REPLACE option).
+    /* Not trying to optimize COPY.  Just expedite source and destination (if it exists).  We
+     * don't really care if the value is overwritten or not (so no need to parse REPLACE option). */
     if (expediteSingleKeyWithoutOptimization(it, dbid, srcKey, waitingOnKeys)) mustBlock = true;
     if (expediteSingleKeyWithoutOptimization(it, destDbid, destKey, waitingOnKeys)) mustBlock = true;
 
@@ -1223,40 +1234,38 @@ static bool expediteKeysForCopy(
  * Iterator:  CONSISTENT = YES, REPLICATION = YES
  *   (Combination only valid in cluster mode - no SWAPDB possible)
  *   - Block if any write-key is in use by an the iterator
- *   - Block and immediately queue any key (read or write) that has not already been iterated
- */
-static bool expediteKeysForWrite(
-        bgIterator *it,
-        int dbid,
-        struct serverCommand *cmd,
-        int argc,
-        robj **argv,
-        keyReference *keyrefs,
-        int numKeys,
-        hashtable *waitingOnKeys) {
+ *   - Block and immediately queue any key (read or write) that has not already been iterated */
+static bool expediteKeysForWrite(bgIterator *it,
+                                 int dbid,
+                                 struct serverCommand *cmd,
+                                 int argc,
+                                 robj **argv,
+                                 keyReference *keyrefs,
+                                 int numKeys,
+                                 hashtable *waitingOnKeys) {
     serverAssert(numKeys > 0);
 
     bool mustBlock = false;
 
-    // All keys of the command should either be in scope or not since in cluster mode enabled they
-    // should all be in the same slot. So we just check the first key.
+    /* All keys of the command should either be in scope or not since in cluster mode enabled they
+     * should all be in the same slot. So we just check the first key. */
     robj *oKey = argv[keyrefs[0].pos];
     sds key = objectGetVal(oKey);
-    // If it's not in the iteration scope for the current iterator, then we don't need to do
-    // anything with this command.
+    /* If it's not in the iteration scope for the current iterator, then we don't need to do
+     * anything with this command. */
     if (!it->keyset_iter->isKeyInScope(it->keyset_iter, key)) return false;
 
-    // Note: performance optimization for commands which only modify the first key.  If this flag
-    //  is not available, we can safely remove this `if` statement.
-    if ((cmd->flags & CMD_WRITE_FIRSTKEY_ONLY)
-            && !(it->iteration_flags & BGITERATOR_FLAG_REPLICATION)) {
-        // If this write command only modifies the 1st key, we don't need to expedite others
-        //  unless replication enabled.
+    /* Note: performance optimization for commands which only modify the first key.  If this flag
+     * is not available, we can safely remove this `if` statement. */
+    if ((cmd->flags & CMD_WRITE_FIRSTKEY_ONLY) &&
+        !(it->iteration_flags & BGITERATOR_FLAG_REPLICATION)) {
+        /* If this write command only modifies the 1st key, we don't need to expedite others
+         * unless replication enabled. */
         numKeys = 1;
     }
 
     if (cmd->proc == moveCommand) {
-        // Unfortunate special case for MOVE
+        // Special case for MOVE
         return expediteKeysForMove(it, dbid, argc, argv, waitingOnKeys);
     }
 
@@ -1274,12 +1283,12 @@ static bool expediteKeysForWrite(
             sds key = objectGetVal(oKey);
             dbEntry *de = dbFind(server.db[dbid], key);
             if (de == NULL) continue; // New key, no need to expedite
-            if (!(iterComplete || it->keyset_iter->hasPassedItem(it->keyset_iter, key, dbid))
-                    && dictFind(it->early_iterate_entries, de) == NULL
-                    && ((bgIterationEntryMetadata *)objectGetMetadata(de))->iterator_epoch <= it->consistent_modification_id) {
+            if (!(iterComplete || it->keyset_iter->hasPassedItem(it->keyset_iter, key, dbid)) &&
+                !hashtableFind(it->early_iterate_entries, de, NULL) &&
+                ((bgIterationEntryMetadata *)objectGetMetadata(de))->iterator_epoch <= it->consistent_modification_id) {
                 if (addEarlyIterationKey(it, de, dbid)) {
                     mustBlock = true;
-                    hashtableAdd(waitingOnKeys, oKey); 
+                    hashtableAdd(waitingOnKeys, oKey);
                 }
             } else {
                 if (isEntryInuseByAnyIterator(de)) {
@@ -1290,15 +1299,15 @@ static bool expediteKeysForWrite(
         }
         it->cur_cmd_may_replicate = true; // Will replicate only if replication enabled
     } else {
-        // Identification of missing keys is only needed for non-consistent iteration.  This only
-        //  needs to be collected once (on the 1st non-consistent iteration)
+        /* Identification of missing keys is only needed for non-consistent iteration.  This only
+         * needs to be collected once (on the 1st non-consistent iteration). */
         bool collectMissing = (listLength(curCmdMissingKeys) == 0);
 
         if (it->iteration_flags & BGITERATOR_FLAG_REPLICATION) {
             // CONSISTENT = NO,  REPLICATION = YES
             bool someIterated = false;
-            // dict containing the keys that have not been iterated yet.
-            //  Using a dict dedupes the keys in case the command contains duplicated keys.
+            /* dict containing the keys that have not been iterated yet.
+             * Using a dict dedupes the keys in case the command contains duplicated keys. */
             dict *notIteratedKeys = dictCreate(&dictEntryPtrDictType); // dict of dbEntry* -> robj*
 
             for (int i = 0; i < numKeys; i++) {
@@ -1312,9 +1321,9 @@ static bool expediteKeysForWrite(
                     }
                     continue;
                 }
-                if (iterComplete
-                        || it->keyset_iter->hasPassedItem(it->keyset_iter, key, dbid)
-                        || (dictFind(it->early_iterate_entries, de) != NULL)) {
+                if (iterComplete ||
+                    it->keyset_iter->hasPassedItem(it->keyset_iter, key, dbid) ||
+                    hashtableFind(it->early_iterate_entries, de, NULL)) {
                     someIterated = true;
                 } else {
                     dictAdd(notIteratedKeys, de, oKey);
@@ -1325,26 +1334,26 @@ static bool expediteKeysForWrite(
                 }
             }
 
-            // Since missing keys are considered as already iterated, if there are any missing keys
-            //  we must consider that some keys have been iterated, and make sure all other keys
-            //  will be expedited if needed.
+            /* Since missing keys are considered as already iterated, if there are any missing keys
+             * we must consider that some keys have been iterated, and make sure all other keys
+             * will be expedited if needed. */
             if (listLength(curCmdMissingKeys) > 0) someIterated = true;
 
-            // This command may be executing as part of a larger transaction.  If some parts of the
-            //  transaction have already been identified to replicate, we must wait on all keys and
-            //  replicate here as well.  (Take care not to set cur_cmd_may_replicate to false.)
+            /* This command may be executing as part of a larger transaction.  If some parts of the
+             * transaction have already been identified to replicate, we must wait on all keys and
+             * replicate here as well.  (Take care not to set cur_cmd_may_replicate to false.) */
             if (someIterated) {
                 if (server.in_exec) {
-                    // We are now executing the commands in a multi-exec block.
-                    //
-                    // Regarding MULTI/EXEC:  Remember that this code is executed twice for commands
-                    //  within a MULTI/EXEC block.  First, we parse all the commands when deciding
-                    //  if the EXEC should be blocked.  Then, as each command is executed, it's
-                    //  re-parsed so that we can maintain the early iterated list as the commands
-                    //  execute.  In this second pass, as each command is executed, we can't change
-                    //  the replication decision which was made earlier (when the EXEC was processed).
-                    // We don't want to get tricked (by a key being removed and recreated) into
-                    //  into starting to replicate in the middle of a MULTI/EXEC block.
+                    /* We are now executing the commands in a multi-exec block.
+                     *
+                     * Regarding MULTI/EXEC:  Remember that this code is executed twice for commands
+                     * within a MULTI/EXEC block.  First, we parse all the commands when deciding
+                     * if the EXEC should be blocked.  Then, as each command is executed, it's
+                     * re-parsed so that we can maintain the early iterated list as the commands
+                     * execute.  In this second pass, as each command is executed, we can't change
+                     * the replication decision which was made earlier (when the EXEC was processed).
+                     * We don't want to get tricked (by a key being removed and recreated) into
+                     * starting to replicate in the middle of a MULTI/EXEC block. */
                 } else {
                     it->cur_cmd_may_replicate = true;
                 }
@@ -1358,7 +1367,7 @@ static bool expediteKeysForWrite(
 
                     if (addEarlyIterationKey(it, notIteratedEntry, dbid)) {
                         mustBlock = true;
-                        hashtableAdd(waitingOnKeys, oKey); 
+                        hashtableAdd(waitingOnKeys, oKey);
                     }
                 }
                 dictReleaseIterator(di);
@@ -1389,8 +1398,8 @@ static bool expediteKeysForWrite(
 }
 
 
-// Called when an iterator is terminated.  Pulls everything out of the queue
-//  and returns the items to Valkey (before they hit the iterator).
+/* Called when an iterator is terminated.  Pulls everything out of the queue
+ * and returns the items to Valkey (before they hit the iterator). */
 static void returnAllItemsToValkey(bgIterator *it) {
     serverAssert(onValkeyMainThread());
 
@@ -1403,35 +1412,35 @@ static void returnAllItemsToValkey(bgIterator *it) {
         bgIteratorItem *item;
         fifoPop(poppedFifo, (void **)&item);
         switch (item->type) {
-            // back out the "queued" statistic
-            case BGITERATOR_ITEM_DBENTRY:
-                it->dbentries_queued--;
-                if (item->u.dbe.is_cloned) it->dbentry_clones_queued--;
-                break;
-            case BGITERATOR_ITEM_REPLICATION:
-                it->replication_queued--;
-                break;
-            case BGITERATOR_ITEM_SWAPDB:
-                it->swapdb_queued--;
-                break;
-            case BGITERATOR_ITEM_FLUSHDB:
-                it->flushdb_queued--;
-                break;
-
-            case BGITERATOR_ITEM_COMPLETE:
-                // This can only happen if the completion item has been enqueued and
-                //  the iterator is terminated before reaching the completion item.
-                itemFreeList_returnItemBackToFreeList(item);
-                continue; // Skip pushing this onto itemsToReturn
-
-            case BGITERATOR_ITEM_TERMINATED:
-                // This can only happen if there is a race when terminating between
-                //  the iteration client and main thread.
-                itemFreeList_returnItemBackToFreeList(item);
-                continue; // Skip pushing this onto itemsToReturn
-
-            default:
-                serverAssert(false);
+        // back out the "queued" statistic
+        case BGITERATOR_ITEM_DBENTRY:
+            it->dbentries_queued--;
+            if (item->u.dbe.is_cloned) it->dbentry_clones_queued--;
+            break;
+        case BGITERATOR_ITEM_REPLICATION:
+            it->replication_queued--;
+            break;
+        case BGITERATOR_ITEM_SWAPDB:
+            it->swapdb_queued--;
+            break;
+        case BGITERATOR_ITEM_FLUSHDB:
+            it->flushdb_queued--;
+            break;
+
+        case BGITERATOR_ITEM_COMPLETE:
+            /* This can only happen if the completion item has been enqueued and
+             * the iterator is terminated before reaching the completion item. */
+            itemFreeList_returnItemBackToFreeList(item);
+            continue; // Skip pushing this onto itemsToReturn
+
+        case BGITERATOR_ITEM_TERMINATED:
+            /* This can only happen if there is a race when terminating between
+             *  the iteration client and main thread. */
+            itemFreeList_returnItemBackToFreeList(item);
+            continue; // Skip pushing this onto itemsToReturn
+
+        default:
+            serverAssert(false);
         }
 
         fifoPush(itemsToReturn, item);
@@ -1446,10 +1455,9 @@ static void returnAllItemsToValkey(bgIterator *it) {
 }
 
 
-
-//=============================================================================================
-//                        Foreground support functions (private)
-//=============================================================================================
+/* =============================================================================================
+ *                        Foreground support functions (private)
+ * ============================================================================================= */
 
 static size_t replicationItemSize(bgIteratorItem *item) {
     serverAssert(item->type == BGITERATOR_ITEM_REPLICATION);
@@ -1463,94 +1471,89 @@ static size_t replicationItemSize(bgIteratorItem *item) {
 static void processReturnOfItemToValkey(bgIteratorItem *item, bgIterator *iter) {
     serverAssert(onValkeyMainThread());
     switch ((int)item->type) {
-        case BGITERATOR_ITEM_REPLICATION:
-            bufferedReplicationBytes -= replicationItemSize(item);
-            freeRobjArray(item->u.repl.argc, item->u.repl.argv);
-            break;
-
-        case BGITERATOR_ITEM_DBENTRY:
-            {
-                if (item->u.dbe.is_cloned) {
-                    freeClonedDictEntry(item->u.dbe.de);
-                } else {
-                    if (isEntryInuseBySingleIterator(item->u.dbe.de)) {
-                        // This blocking mechanism isn't the best.  Written for slot-migration,
-                        //  it assumes a single DB so if the same key appears in multiple DBs,
-                        //  commands might get unblocked only to get blocked again.  (This would
-                        //  happen only rarely, and with minimal impact.)
-                        robj *key = createStringObjectFromSds(objectGetKey(item->u.dbe.de));
-                        unblockClientsInUseOnKey(key);
-                        decrRefCount(key);
-                    }
-                    // resumeRehashing must be called before decrementEntryInuse, since decrementEntryInuse can free
-                    if (item->u.dbe.is_rehashing_paused) resumeRehashing(item->u.dbe.de);
-                    decrementEntryInuse(item->u.dbe.de);
-                }
+    case BGITERATOR_ITEM_REPLICATION:
+        bufferedReplicationBytes -= replicationItemSize(item);
+        freeRobjArray(item->u.repl.argc, item->u.repl.argv);
+        break;
+
+    case BGITERATOR_ITEM_DBENTRY:
+        if (item->u.dbe.is_cloned) {
+            freeClonedDictEntry(item->u.dbe.de);
+        } else {
+            if (isEntryInuseBySingleIterator(item->u.dbe.de)) {
+                /* This blocking mechanism assumes a single DB so if the same key appears in
+                 * multiple DBs, commands might get unblocked only to get blocked again.  (This
+                 * would happen only rarely, and with minimal impact.) */
+                robj *key = createStringObjectFromSds(objectGetKey(item->u.dbe.de));
+                unblockClientsInUseOnKey(key);
+                decrRefCount(key);
             }
-            break;
-
-        case BGITERATOR_ITEM_SWAPDB:
-        case BGITERATOR_ITEM_FLUSHDB:
-            break;
-
-        case BGITERATOR_ITEMEXT_ITER_CLOSED:
-            {
-                bgIterator *it = ((bgIteratorItemExtClose*)item)->iter;
-                serverAssert(it == iter);
-                if (it->terminated) {
-                    // Abnormal termination
-                    //  Normally the item is TERMINATED, but might be COMPLETE in race
-                    serverAssert(it->current_item->type == BGITERATOR_ITEM_TERMINATED
-                            || it->current_item->type == BGITERATOR_ITEM_COMPLETE);
-                    // Release any items stranded on the iterator after early termination
-                    returnAllItemsToValkey(it);
-                    receiveItemsBackFromOneIterator(it);
-                } else {
-                    // Normal completion
-                    serverAssert(it->current_item->type == BGITERATOR_ITEM_COMPLETE);
-                }
-                serverAssert(mutexQueueLength(it->items_for_iterator) == 0);
-                serverAssert(it->dbentries_queued      == it->dbentries_processed);
-                serverAssert(it->replication_queued    == it->replication_processed);
-                serverAssert(it->swapdb_queued         == it->swapdb_processed);
-                serverAssert(it->flushdb_queued        == it->flushdb_processed);
-                serverAssert(it->dbentry_clones_queued >= it->dbentry_clones_processed);
+            // resumeRehashing must be called before decrementEntryInuse, since decrementEntryInuse can free
+            if (item->u.dbe.is_rehashing_paused) resumeRehashing(item->u.dbe.de);
+            decrementEntryInuse(item->u.dbe.de);
+        }
+        break;
+
+    case BGITERATOR_ITEM_SWAPDB:
+    case BGITERATOR_ITEM_FLUSHDB:
+        break;
+
+    case BGITERATOR_ITEMEXT_ITER_CLOSED: {
+        bgIterator *it = ((bgIteratorItemExtClose *)item)->iter;
+        serverAssert(it == iter);
+        if (it->terminated) {
+            /* Abnormal termination
+             * Normally the item is TERMINATED, but might be COMPLETE in race */
+            serverAssert(it->current_item->type == BGITERATOR_ITEM_TERMINATED ||
+                         it->current_item->type == BGITERATOR_ITEM_COMPLETE);
+            // Release any items stranded on the iterator after early termination
+            returnAllItemsToValkey(it);
+            receiveItemsBackFromOneIterator(it);
+        } else {
+            // Normal completion
+            serverAssert(it->current_item->type == BGITERATOR_ITEM_COMPLETE);
+        }
+        serverAssert(mutexQueueLength(it->items_for_iterator) == 0);
+        serverAssert(it->dbentries_queued == it->dbentries_processed);
+        serverAssert(it->replication_queued == it->replication_processed);
+        serverAssert(it->swapdb_queued == it->swapdb_processed);
+        serverAssert(it->flushdb_queued == it->flushdb_processed);
+        serverAssert(it->dbentry_clones_queued >= it->dbentry_clones_processed);
 
-                listEmpty(curCmdMissingKeys); // Just in case any remain
+        listEmpty(curCmdMissingKeys); // Just in case any remain
 
-                itemFreeList_returnItemBackToFreeList(it->current_item);
-                it->current_item = NULL;
+        itemFreeList_returnItemBackToFreeList(it->current_item);
+        it->current_item = NULL;
 
-                bool terminated = it->terminated;
-                void *privdata = it->privdata;
-                bgIteratorCleanupFunc cleanup = it->cleanup;
-                bgIteratorRelease(it); // Fully release the iterator before calling cleanup
+        bool terminated = it->terminated;
+        void *privdata = it->privdata;
+        bgIteratorCleanupFunc cleanup = it->cleanup;
+        bgIteratorRelease(it); // Fully release the iterator before calling cleanup
 
-                if (BGITERATION_DEBUG) {
-                    if (cleanup) debugBuffer = sdscatprintf(debugBuffer, "CLEANUP FN (%s)\n",
-                            (terminated) ? "terminated" : "success");
+        if (BGITERATION_DEBUG) {
+            if (cleanup) debugBuffer = sdscatprintf(debugBuffer, "CLEANUP FN (%s)\n",
+                                                    (terminated) ? "terminated" : "success");
 
-                    sds filename = sdscatprintf(sdsempty(), "bgiteration_debug.%d", getpid());
-                    FILE *f = fopen(filename, "w");
-                    sdsfree(filename);
+            sds filename = sdscatprintf(sdsempty(), "bgiteration_debug.%d", getpid());
+            FILE *f = fopen(filename, "w");
+            sdsfree(filename);
 
-                    fputs(debugBuffer, f);
+            fputs(debugBuffer, f);
 
-                    fclose(f);
-                    sdsfree(debugBuffer);
-                    debugBuffer = sdsempty();
-                }
+            fclose(f);
+            sdsfree(debugBuffer);
+            debugBuffer = sdsempty();
+        }
 
-                if (cleanup) cleanup(terminated, privdata);
-            }
-            break;
+        if (cleanup) cleanup(terminated, privdata);
+    } break;
 
-        default:
-            serverAssert(false); // Not expecting any other type of item!
+    default:
+        serverAssert(false); // Not expecting any other type of item!
     }
 
     // We don't allocate extension items from the pool so we manually free them
-    if((int)item->type == BGITERATOR_ITEMEXT_ITER_CLOSED) {
+    if ((int)item->type == BGITERATOR_ITEMEXT_ITER_CLOSED) {
         zfree(item);
     } else {
         itemFreeList_returnItemBackToFreeList(item);
@@ -1558,29 +1561,24 @@ static void processReturnOfItemToValkey(bgIteratorItem *item, bgIterator *iter)
 }
 
 static void prepareAndProcessReturnedItems(int n, bgIteratorItem **items, bgIterator *iter) {
-    int i = 0;
-    for (i = 0; i < n; i++) valkey_prefetch(items[i]);
-    for (i = 0; i < n; i++) {
+    for (int i = 0; i < n; i++) valkey_prefetch(items[i]);
+    for (int i = 0; i < n; i++) {
         if (items[i]->type != BGITERATOR_ITEM_DBENTRY) continue;
-        // Prefetch can have a significant perf hit on NULL
-        // but we never expect items[i]->u.dbe.de to be NULL
         valkey_prefetch(items[i]->u.dbe.de);
     }
-    for (i = 0; i < n; i++) {
+    for (int i = 0; i < n; i++) {
         if (items[i]->type != BGITERATOR_ITEM_DBENTRY) continue;
-        // Same as above, assume key is never NULL
         valkey_prefetch(objectGetKey(items[i]->u.dbe.de));
     }
-    for (i = 0; i < n; i++) processReturnOfItemToValkey(items[i], iter);
+    for (int i = 0; i < n; i++) processReturnOfItemToValkey(items[i], iter);
 }
 
 #define PREFETCH_BATCH_SIZE 16
 
+// Returns true if we process at least one item from a given iterator's return_to_valkey queue.
 static bool receiveItemsBackFromOneIterator(bgIterator *it) {
-    bgIteratorItem* batchPool[PREFETCH_BATCH_SIZE];
+    bgIteratorItem *batchPool[PREFETCH_BATCH_SIZE];
     int n = 0;
-    // Returns true if we process at least one item from
-    // a given iterator's return_to_valkey queue, false otherwise.
     fifo *poppedFifo = mutexQueuePopAll(it->return_to_valkey, false);
     if (poppedFifo != NULL) {
         while (fifoLength(poppedFifo) > 0) {
@@ -1599,10 +1597,9 @@ static bool receiveItemsBackFromOneIterator(bgIterator *it) {
     return false;
 }
 
+/* Process each iterator's return_to_valkey queue
+ * If `blocking` is true, continue reading until at least one queue was not empty. */
 static void receiveItemsBackFromIterators(bool blocking) {
-    // Process each iterator's return_to_valkey queue
-    // If `blocking` is true, continue reading until
-    // at least one queue was not empty.
     serverAssert(onValkeyMainThread());
     listIter li;
     listNode *node;
@@ -1613,15 +1610,14 @@ static void receiveItemsBackFromIterators(bool blocking) {
             bgIterator *it = listNodeValue(node);
             processedItems |= receiveItemsBackFromOneIterator(it);
         }
-        if (blocking) usleep(100); // Sleep for 1ms and re-try processing iterators
+        if (blocking && !processedItems) usleep(100); // Short sleep before retry
     } while (blocking && !processedItems);
 }
 
 
-static long long bgIteration_feedIterators_task(
-        struct aeEventLoop *eventLoop,
-        long long id,
-        void *clientData) {
+static long long bgIteration_feedIterators_task(struct aeEventLoop *eventLoop,
+                                                long long id,
+                                                void *clientData) {
     UNUSED(eventLoop);
     UNUSED(id);
     UNUSED(clientData);
@@ -1649,12 +1645,12 @@ static long long bgIteration_feedIterators_task(
 
     long dutyTimeUs = BGITER_CYCLE_BUDGET_MS * 1000;
     if (lastFeedEndTime > 0) {
-        // If the timer was delayed, compute the proportional time we should have had, and increase
-        //  the duty cycle to compensate (up to a limit).
+        /* If the timer was delayed, compute the proportional time we should have had, and increase
+         *  the duty cycle to compensate (up to a limit). */
         long starvationUs = (startTime - lastFeedEndTime) - BGITER_CYCLE_DELAY_MS * 1000;
         if (starvationUs > 0) {
-            long starvationCompensationUs = starvationUs * BGITER_CYCLE_BUDGET_MS
-                / (BGITER_CYCLE_BUDGET_MS + BGITER_CYCLE_DELAY_MS);
+            long starvationCompensationUs = starvationUs * BGITER_CYCLE_BUDGET_MS /
+                                            (BGITER_CYCLE_BUDGET_MS + BGITER_CYCLE_DELAY_MS);
             dutyTimeUs += starvationCompensationUs;
             dutyTimeUs = MIN(dutyTimeUs, BGITER_CYCLE_BUDGET_MAX_MS * 1000);
         }
@@ -1682,8 +1678,8 @@ static long long bgIteration_feedIterators_task(
 
 // Not static, but not API.  Intended for unit tests where the event loop may not be active.
 void bgIteration_feedIterators(void) {
-    // For unit testing, force the item_count_target to 1 in each call.  This ensures that we only
-    //  feed a minimal amount to the iterators rather than a non-deterministic amount.
+    /* For unit testing, force the item_count_target to 1 in each call.  This ensures that we only
+     * feed a minimal amount to the iterators rather than a non-deterministic amount. */
     listIter li;
     listNode *node;
     listRewind(allIterators, &li);
@@ -1698,64 +1694,75 @@ void bgIteration_feedIterators(void) {
 
 
 static void resetReplicationFlagForIterators(client *c) {
-    // For any given command, the command may or may not need to be replicated based on the status
-    //  and flags of each iterator.  Furthermore, if a command does need to be replicated, this
-    //  replication must occur for an entire atomic unit; we can't replicate only part of a script
-    //  or multi/exec.
-    // This function is the only place where the replication flag is cleared.
+    /* For any given command, the command may or may not need to be replicated based on the status
+     * and flags of each iterator.  Furthermore, if a command does need to be replicated, this
+     * replication must occur for an entire atomic unit; we can't replicate only part of a script
+     * or multi/exec.
+     * This function is the only place where the replication flag is cleared. */
 
     if (c->flag.multi || c->flag.script) {
-        // REGARDING MULTI/EXEC
-        // --------------------
-        // When processing a MULTI/EXEC, blockClientIfRequired is called first for the MULTI.  Then,
-        //  all of the commands are queued up in server.c:processCommand().  It's only when EXEC is
-        //  encountered, that server.c:call() is fired to begin execution.
-        // AFTER the EXEC is processed by call(), then each of the commands in the MULTI/EXEC block
-        //  will be processed through call().
-        // If write commands are present, MULTI & EXEC will be passed to the replication stream
-        //  before/after the transaction commands.  Note that MULTI & EXEC are not actually
-        //  "executed" at the time when their replication is passed to the replication stream.
-        //
-        // Example:  MULTI; SET A B; EXEC
-        //  1. blockClientIfRequired() called for MULTI.  MULTI flag IS NOT set.  (Won't block.)
-        //  2. blockClientIfRequired() called for EXEC.  MULTI flag IS set.  (Might block.)
-        //  3. blockClientIfRequired() called for SET.  MULTI flag IS set.  (Won't block.)
-        //  4. handleCommandReplication() is called for MULTI.
-        //  5. handleCommandReplication() is called for SET.
-        //  6. handleCommandReplication() is called for EXEC.
-        //
-        // SO - if the MULTI flag is set, we DON'T clear the flag.  It should only be cleared at the
-        //  start of the transaction, when MULTI is received - and the flag isn't set yet.
-
-        // REGARDING SCRIPTS
-        // -----------------
-        // When processing a script, blockClientIfRequired is called first for the EVAL/EVALSHA/FCALL.
-        //  Then, all of the commands are processed using a special script client.  The script
-        //  client has the CLIENT_SCRIPT flag set.  For scripts, the replication flag is set when
-        //  processing the EVAL/EVALSHA/FCALL and should not be cleared when executing individual
-        //  commands in the script.
-
-        // If it's the EXEC command, we fall through and clear the flag below.  But for all other
-        //  commands within the transaction, we don't clear the flag.
+        /* REGARDING MULTI/EXEC
+         * --------------------
+         * When processing a MULTI/EXEC, blockClientIfRequired is called first for the MULTI.  Then,
+         * all of the commands are queued up in server.c:processCommand().  It's only when EXEC is
+         * encountered, that server.c:call() is fired to begin execution.
+         *
+         * AFTER the EXEC is processed by call(), then each of the commands in the MULTI/EXEC block
+         * will be processed through call().
+         *
+         * If write commands are present, MULTI & EXEC will be passed to the replication stream
+         * before/after the transaction commands.  Note that MULTI & EXEC are not actually
+         * "executed" at the time when their replication is passed to the replication stream.
+         *
+         * Example:  MULTI; SET A B; EXEC
+         *  1. blockClientIfRequired() called for MULTI.  MULTI flag IS NOT set.  (Won't block.)
+         *  2. blockClientIfRequired() called for EXEC.  MULTI flag IS set.  (Might block.)
+         *  3. blockClientIfRequired() called for SET.  MULTI flag IS set.  (Won't block.)
+         *  4. handleCommandReplication() is called for MULTI.
+         *  5. handleCommandReplication() is called for SET.
+         *  6. handleCommandReplication() is called for EXEC.
+         *
+         * SO - if the MULTI flag is set, we DON'T clear the flag.  It should only be cleared at the
+         * start of the transaction, when MULTI is received - and the flag isn't set yet. */
+
+        /* REGARDING SCRIPTS
+         * -----------------
+         * When processing a script, blockClientIfRequired is called first for the EVAL/EVALSHA/FCALL.
+         * Then, all of the commands are processed using a special script client.  The script
+         * client has the CLIENT_SCRIPT flag set.  For scripts, the replication flag is set when
+         * processing the EVAL/EVALSHA/FCALL and should not be cleared when executing individual
+         * commands in the script. */
+
+        /* If it's the EXEC command, we fall through and clear the flag below.  But for all other
+         *  commands within the transaction, we don't clear the flag. */
         if (c->cmd->proc != execCommand) return;
     }
 
-    // For most commands, the replication flag is cleared and we determine if replication is needed
-    //  based on the keys being used and their state in each iterator. If a modified key hasn't been
-    //  processed yet, there's no need to expedite the key or send the replication.  The key will be
-    //  sent later, when reached by the iterator.
-    // However, for scripts, it is not possible to perform this optimization.  There is no way to
-    //  know if an undeclared key might be modified.  Since the entire script needs to be replicated
-    //  (or not replicated) atomically, we can't take the chance that an undeclared key might be
-    //  hit which requires replication.
+    /* For most commands, the replication flag is cleared and we determine if replication is needed
+     * based on the keys being used and their state in each iterator. If a modified key hasn't been
+     * processed yet, there's no need to expedite the key or send the replication.  The key will be
+     * sent later, when reached by the iterator.
+     *
+     * However, for scripts, it is not possible to perform this optimization.  There is no way to
+     * know if an undeclared key might be modified.  Since the entire script needs to be replicated
+     * (or not replicated) atomically, we can't take the chance that an undeclared key might be
+     * hit which requires replication. */
     bool isScript = isScriptCallWriteCmd(c->cmd);
 
-    getKeysResult result;
-    initGetKeysResult(&result);
-    getKeysFromCommand(c->cmd, c->argv, c->argc, &result);
-
-    // [sm-bgiterator] TODO: ELMO-108525, This assumes all keys are in the same slot, should consider cross-slot script case.
-    sds check_key = (result.numkeys > 0) ? objectGetVal(c->argv[result.keys[0].pos]) : NULL;
+    sds firstScriptKey = NULL;
+    if (isScript) {
+        /* If it's a script, we will normally replicate.  But if the keys are out of scope for the
+         * iteration, we shouldn't.  The use-case for this is with slot iteration, when the script
+         * is acting on keys from a different slot.  Here, we just check the first declared key, and
+         * if it's out of scope for the iteration, we won't replicate it.  This might cause issues
+         * for cross-slot scripts (anti-pattern), but the alternative is replicating all scripts,
+         * regardless of slot. */
+        getKeysResult result;
+        initGetKeysResult(&result);
+        getKeysFromCommand(c->cmd, c->argv, c->argc, &result);
+        if (result.numkeys > 0) firstScriptKey = objectGetVal(c->argv[result.keys[0].pos]);
+        getKeysFreeResult(&result);
+    }
 
     listIter li;
     listNode *node;
@@ -1765,14 +1772,16 @@ static void resetReplicationFlagForIterators(client *c) {
         if (it->completed || it->terminated) {
             it->cur_cmd_may_replicate = false;
         } else {
-            // Set initial state of the replication flag for this transaction
-            // For full scan iterators, write commands within scripts must always be replicated.
-            // For cluster slot iterators, replication of script write commands depends on whether
-            // the key is in scope of the current iterator.
-            it->cur_cmd_may_replicate = isScript && it->keyset_iter->isKeyInScope(it->keyset_iter, check_key);
+            /* For normal commands, the flag is initialized to false (not to replicate).  For these
+             * commands, we decide later based on the actual commands.
+             *
+             * However, for scripts, we don't know what commands will be executed.  So IF it's a
+             * script, and the keys are in scope (on the right slot) we initialize the replication
+             * flag to true. */
+            it->cur_cmd_may_replicate = isScript && firstScriptKey &&
+                                        it->keyset_iter->isKeyInScope(it->keyset_iter, firstScriptKey);
         }
     }
-    getKeysFreeResult(&result);
 }
 
 
@@ -1809,93 +1818,14 @@ static void handleSwapdb(int db1, int db2) {
 
 
 static void removePtrFromEarlyIterate(dbEntry *de) {
-    // If the item is being released, let's get the pointer out of our early_iterate_entries.
-    //  Note that this is not strictly necessary, but it frees some memory and keeps the
-    //  dictionary small.
+    /* If the item is being released, let's get the pointer out of our early_iterate_entries.
+     * This is not strictly necessary, but it frees some memory and keeps the dictionary small. */
     listIter li;
     listNode *node;
     listRewind(allIterators, &li);
     while ((node = listNext(&li)) != NULL) {
         bgIterator *it = listNodeValue(node);
-        dictDelete(it->early_iterate_entries, de); // just try delete (might not be here)
-    }
-}
-
-
-static int findDbForEntry(dbEntry *de) {
-    for (int i = 0; i < server.dbnum; i++) {
-        if (server.db[i] && dbFind(server.db[i], objectGetKey(de)) == de) return i;
-    }
-    serverAssert(false); // the entry MUST be in one of the DBs
-}
-
-
-static void terminateIteratorForFlush(bgIterator *it, int dbid) {
-    if (!it->terminated) bgIteratorTerminate(it);
-
-    // Snoop on the iterator.  There might be 1 item still being processed.  If that item is in the
-    //  DB being flushed, the item is removed from the dict and held for deferred deletion.  This
-    //  allows the iterator to complete processing on the current item without the item being
-    //  deleted unexpectedly.
-    // Since this is running in parallel with a background thread, the results are volatile.  This
-    //  is OK as when the iterator completes processing the item, it still won't have been accepted
-    //  back to Valkey yet, meaning the item will still be in inUseEntries.
-    bgIteratorItem *item = it->current_item;
-    if (item && item->type == BGITERATOR_ITEM_DBENTRY) {
-        dbEntry *de = item->u.dbe.de;
-        int deDb = findDbForEntry(de);
-        if (dbid == -1 || dbid == deDb) {
-            removePtrFromEarlyIterate(de);
-        }
-    }
-}
-
-
-static void preserveIteratorItemsForFlush(bgIterator *it, int dbid) {
-    serverAssert(onValkeyMainThread());
-    serverAssert(!(it->iteration_flags & BGITERATOR_FLAG_CONSISTENT));
-    serverAssert(dbid >= 0);
-    // Since this is not a consistent iteration, it's OK if the early_iterate_entries contains
-    //  pointers to items being deleted.  The item is not actually accessed from the pointer.  And
-    //  if the pointer gets reused for a new item, there's no guarantee that we would iterate it
-    //  anyway.  If replication is enabled, both new items and early_iterate_entries are treated the
-    //  same (replication is processed).  So this is safe in all cases.
-    // Given this, we will just worry about preserving items in the iterator's processing queue.
-    //  Because of commands like SWAPDB and MOVE, there's no attempt to remove unnecessary items
-    //  from the queue.  This is also safer to future Valkey extensions.
-
-    // Temporarily yank all items from the iterator's queue
-    fifo *poppedFifo = mutexQueuePopAll(it->items_for_iterator, false);
-    if (poppedFifo != NULL) {
-        fifo *readdFifo = fifoCreate();
-        while(fifoLength(poppedFifo) > 0) {
-            bgIteratorItem *item;
-            fifoPop(poppedFifo, (void **)&item);
-            if (item->type == BGITERATOR_ITEM_DBENTRY) {
-                dbEntry *de = item->u.dbe.de;
-                if (dbFind(server.db[dbid], objectGetKey(de)) == de) {
-                  // Found the entry in the DB about to be flushed
-                  removePtrFromEarlyIterate(de);
-                }
-            }
-            fifoPush(readdFifo, item);
-        }
-        fifoRelease(poppedFifo);
-
-        // Now give the list back to the iterator
-        mutexQueueAddMultiple(it->items_for_iterator, readdFifo);
-        fifoRelease(readdFifo);
-    }
-
-    // And snoop on the active item.  Even if the background task finishes with this item as we look
-    //  at it, the item can't have been returned to Valkey yet.
-    bgIteratorItem *item = it->current_item;
-    if (item && item->type == BGITERATOR_ITEM_DBENTRY) {
-        dbEntry *de = item->u.dbe.de;
-        if (dbFind(server.db[dbid], objectGetKey(de)) == de) {
-          // Found the entry in the DB about to be flushed
-          removePtrFromEarlyIterate(de);
-        }
+        hashtableDelete(it->early_iterate_entries, de); // just try delete (might not be here)
     }
 }
 
@@ -1919,24 +1849,22 @@ static void handleFlushdb(int dbid) {
     while ((node = listNext(&li)) != NULL) {
         bgIterator *it = listNodeValue(node);
 
+        // Let the low-level iterator know the DB is being flushed
+        it->keyset_iter->flushDb(it->keyset_iter, dbid);
+
         if (should_abort_iterators || it->iteration_flags & BGITERATOR_FLAG_CONSISTENT) {
-            terminateIteratorForFlush(it, dbid);
+            if (!it->terminated) bgIteratorTerminate(it);
         } else {
-            // In this (limited) case, we're only flushing a single DB that contains < half the
-            //  keys.  We don't want to kill a full-sync replication.  We will just continue with
-            //  iteration, knowing that a replication client will also receive the FLUSHDB on the
-            //  replication stream.
-            // It would be nice to do this with consistent snapshot also, but given that this is a
-            //  very rare condition, development is not justified to save off the DB for deferred
-            //  delete.  This would add a lot of complexity as well as memory implications.
-            preserveIteratorItemsForFlush(it, dbid);
-            it->keyset_iter->flushDb(it->keyset_iter, dbid);
+            /* In this (limited) case, we're only flushing a single DB that contains < half the
+             * keys.  We don't want to kill a full-sync replication.  We will just continue with
+             * iteration, knowing that a replication client will also receive the FLUSHDB on the
+             * replication stream.  There's no need to worry about the items themselves.  Since
+             * we've incremented the refcount, the items still in queue won't be physically deleted. */
 
             // Send a flushdb event to notify the client
             if (BGITERATION_DEBUG) {
                 debugBuffer = sdscatprintf(debugBuffer, "FLUSH: %d\n", dbid);
             }
-
             bgIteratorItem *item = itemFreeList_getElementOrAllocate();
             item->type = BGITERATOR_ITEM_FLUSHDB;
             item->dbid = dbid;
@@ -1948,14 +1876,13 @@ static void handleFlushdb(int dbid) {
 }
 
 
-static bool expediteKeysForWriteOnAllIterators(
-        int dbid,
-        struct serverCommand *cmd,
-        int argc,
-        robj **argv,
-        keyReference *keyrefs,
-        int numKeys,
-        hashtable *waitingOnKeys) {
+static bool expediteKeysForWriteOnAllIterators(int dbid,
+                                               struct serverCommand *cmd,
+                                               int argc,
+                                               robj **argv,
+                                               keyReference *keyrefs,
+                                               int numKeys,
+                                               hashtable *waitingOnKeys) {
     bool mustBlock = false;
 
     listIter li;
@@ -1989,8 +1916,7 @@ static bool expediteKeysForMultiExec(client *c, hashtable *waitingOnKeys) {
     /* For MULTI/EXEC, Valkey buffers all of the commands until hitting the EXEC.
      * At this point, the client holds all of the commands to be executed.  This function searches
      * for all of the keys used by any of the buffered write commands.  In addition, if SWAPDB or
-     * SELECT is used, this tracks the DBIDs through various swap/select operations.
-     */
+     * SELECT is used, this tracks the DBIDs through various swap/select operations. */
 
     /* There's a special concern for a NON-consistent iteration with replication.  If the keys are
      * all "future" keys (which haven't been processed by the iterator yet), then we don't expedite
@@ -2014,8 +1940,7 @@ static bool expediteKeysForMultiExec(client *c, hashtable *waitingOnKeys) {
      * these will be caught on the 2nd time around.
      *
      * Checking replication status before/after ensures that there can only be a single recursive
-     * call.
-     */
+     * call. */
     bool initiallyAnIteratorWillReplicate = anIteratorWillReplicateForThisCommand();
 
     bool mustBlock = false;
@@ -2068,29 +1993,36 @@ static bool expediteKeysForMultiExec(client *c, hashtable *waitingOnKeys) {
     zfree(cur_to_orig_db);
 
     if (!initiallyAnIteratorWillReplicate && anIteratorWillReplicateForThisCommand()) {
-        // We've decided to replicate.  Re-process the MULTI/EXEC just once more to make sure that
-        //  we didn't miss any keys at the beginning.  This can't continue to recurse because
-        //  `initiallyAnIteratorWillReplicate` will be TRUE in the recursive call.  Note that the
-        //  recursive call may add additional entries to `waitingOnKeys`.
+        /* We've decided to replicate.  Re-process the MULTI/EXEC just once more to make sure that
+         * we didn't miss any keys at the beginning.  This can't continue to recurse because
+         * `initiallyAnIteratorWillReplicate` will be TRUE in the recursive call.  Note that the
+         * recursive call may add additional entries to `waitingOnKeys`. */
         if (expediteKeysForMultiExec(c, waitingOnKeys)) mustBlock = true;
     }
 
     return mustBlock;
 }
 
-static bgIterator * bgIteratorCreate(
-        const char *name,
-        int flags,
-        bgIteratorReplDoneFunc repldone,
-        bgIteratorCleanupFunc cleanup,
-        void *privdata,
-        bgIterationType iter_type,
-        genericIterator *keyset_iter) {
+
+static bgIterator *bgIteratorCreate(const char *name,
+                                    bgIteratorConsistency consistency,
+                                    bgIteratorReplDoneFunc repldone,
+                                    bgIteratorCleanupFunc cleanup,
+                                    void *privdata,
+                                    bgIterationType iter_type,
+                                    genericIterator *keyset_iter) {
     serverAssert(onValkeyMainThread());
     serverAssert(server.cluster_enabled || iter_type == BGITERATION_TYPE_FULLSCAN);
-    serverAssert(server.cluster_enabled                 // Don't allow CONSISTENT & REPLICATION
-            || !(flags & BGITERATOR_FLAG_CONSISTENT)    //  unless cluster mode (avoids
-            || !(flags & BGITERATOR_FLAG_REPLICATION)); //  complications with SWAPDB & FLUSHDB)
+
+    int flags;
+    switch (consistency) {
+    case BGITERATOR_CONSISTENCY_NONE: flags = 0; break;
+    case BGITERATOR_CONSISTENCY_START: flags = BGITERATOR_FLAG_CONSISTENT; break;
+    case BGITERATOR_CONSISTENCY_EVENTUAL: flags = BGITERATOR_FLAG_REPLICATION; break;
+    default: serverAssert(false);
+    }
+    // Consistent, with replication - doesn't make sense.
+    serverAssert(!((flags & BGITERATOR_FLAG_CONSISTENT) && (flags & BGITERATOR_FLAG_REPLICATION)));
 
     bgIterator *it = zmalloc(sizeof(bgIterator));
     it->name = sdsnew(name);
@@ -2109,8 +2041,8 @@ static bgIterator * bgIteratorCreate(
     it->iteration_type = iter_type;
     it->consistent_modification_id = bgIteration_epoch++;
     it->keyset_iter = keyset_iter;
-    it->early_iterate_entries = dictCreate(&dictEntryPtrDictType);
-    dictExpand(it->early_iterate_entries, BGITER_EARLY_ITERATE_DICT_INITIAL_SIZE);
+    it->early_iterate_entries = hashtableCreate(&dbEntryPtrHashtableType);
+    hashtableExpand(it->early_iterate_entries, BGITER_EARLY_ITERATE_DICT_INITIAL_SIZE);
     it->current_item = NULL;
     it->client_is_active = false;
     it->completed = false;
@@ -2138,7 +2070,7 @@ static bgIterator * bgIteratorCreate(
         serverAssert(bgIterator_timeproc_id != AE_ERR);
     }
 
-    if (dictAdd(nameToIterator, (void*)it->name, it) != DICT_OK) {
+    if (dictAdd(nameToIterator, it->name, it) != DICT_OK) {
         // Can't have 2 iterators with the same name!
         serverAssert(false);
     }
@@ -2151,37 +2083,34 @@ static bgIterator * bgIteratorCreate(
 }
 
 
-
-//=============================================================================================
-//                        PUBLIC INTERFACE:  Iterator creation and use
-//=============================================================================================
+/* =============================================================================================
+ *                        PUBLIC INTERFACE:  Iterator creation and use
+ * ============================================================================================= */
 
 // PUBLIC API
-bgIterator * bgIteratorCreateFullScanIter(
-        const char *name,
-        int flags,
-        bgIteratorReplDoneFunc repldone,
-        bgIteratorCleanupFunc cleanup,
-        void *privdata) {
-    return bgIteratorCreate(name, flags, repldone, cleanup, privdata, BGITERATION_TYPE_FULLSCAN,
-                            fullScanIteratorCreate());
+bgIterator *bgIteratorCreateFullScanIter(const char *name,
+                                         bgIteratorConsistency consistency,
+                                         bgIteratorReplDoneFunc repldone,
+                                         bgIteratorCleanupFunc cleanup,
+                                         void *privdata) {
+    return bgIteratorCreate(name, consistency, repldone, cleanup, privdata,
+                            BGITERATION_TYPE_FULLSCAN, fullScanIteratorCreate());
 }
 
 // PUBLIC API
-bgIterator * bgIteratorCreateSlotsIter(
-        const char *name,
-        int flags,
-        const int *slots,
-        int slots_count,
-        bgIteratorReplDoneFunc repldone,
-        bgIteratorCleanupFunc cleanup,
-        void *privdata) {
-    return bgIteratorCreate(name, flags, repldone, cleanup, privdata, BGITERATION_TYPE_CLUSTERSLOT,
-                            clusterSlotIteratorCreate(slots, slots_count));
+bgIterator *bgIteratorCreateSlotsIter(const char *name,
+                                      bgIteratorConsistency consistency,
+                                      const int *slots,
+                                      int slots_count,
+                                      bgIteratorReplDoneFunc repldone,
+                                      bgIteratorCleanupFunc cleanup,
+                                      void *privdata) {
+    return bgIteratorCreate(name, consistency, repldone, cleanup, privdata,
+                            BGITERATION_TYPE_CLUSTERSLOT, clusterSlotIteratorCreate(slots, slots_count));
 }
 
 // PUBLIC API
-bgIterator * bgIteratorFind(const char *name) {
+bgIterator *bgIteratorFind(const char *name) {
     serverAssert(onValkeyMainThread());
 
     sds sdsname = sdsnew(name);
@@ -2200,14 +2129,14 @@ const char *bgIteratorName(bgIterator *it) {
 
 // PUBLIC API
 void bgIteratorGetStatus(bgIterator *it, bgIteratorStatus *status) {
-    status->dbentries_queued      = it->dbentries_queued;
-    status->dbentries_processed   = it->dbentries_processed;
-    status->replication_queued    = it->replication_queued;
+    status->dbentries_queued = it->dbentries_queued;
+    status->dbentries_processed = it->dbentries_processed;
+    status->replication_queued = it->replication_queued;
     status->replication_processed = it->replication_processed;
-    status->swapdb_queued         = it->swapdb_queued;
-    status->swapdb_processed      = it->swapdb_processed;
-    status->flushdb_queued        = it->flushdb_queued;
-    status->flushdb_processed     = it->flushdb_processed;
+    status->swapdb_queued = it->swapdb_queued;
+    status->swapdb_processed = it->swapdb_processed;
+    status->flushdb_queued = it->flushdb_queued;
+    status->flushdb_processed = it->flushdb_processed;
     status->dbentry_clones_queued = it->dbentry_clones_queued;
     status->dbentry_clones_processed = it->dbentry_clones_processed;
 
@@ -2217,8 +2146,9 @@ void bgIteratorGetStatus(bgIterator *it, bgIteratorStatus *status) {
     status->runtime_ms = elapsedMs(it->monotonic_start_time);
 
     monotime nonvolatile_item_start_time = it->monotonic_item_start_time;
-    status->current_item_ms =
-            (nonvolatile_item_start_time == 0) ? 0 : elapsedMs(nonvolatile_item_start_time);
+    status->current_item_ms = (nonvolatile_item_start_time == 0)
+                                  ? 0
+                                  : elapsedMs(nonvolatile_item_start_time);
 }
 
 
@@ -2235,7 +2165,7 @@ void bgIteratorTerminate(bgIterator *it) {
     }
 
     bgIteratorItem *terminationItem = itemFreeList_getElementOrAllocate();
-    *terminationItem = (bgIteratorItem){ .type = BGITERATOR_ITEM_TERMINATED };
+    *terminationItem = (bgIteratorItem){.type = BGITERATOR_ITEM_TERMINATED};
     mutexQueueAdd(it->items_for_iterator, terminationItem);
 
     it->terminated = true;
@@ -2249,19 +2179,20 @@ bool bgIteratorIsTerminating(bgIterator *it) {
 
 
 // PUBLIC API
-bgIteratorItem * bgIteratorRead(bgIterator *it) {
-    serverAssert(it->current_item == NULL
-            || (it->current_item->type != BGITERATOR_ITEM_COMPLETE
-                && it->current_item->type != BGITERATOR_ITEM_TERMINATED));
+bgIteratorItem *bgIteratorRead(bgIterator *it) {
+    serverAssert(it->current_item == NULL ||
+                 (it->current_item->type != BGITERATOR_ITEM_COMPLETE &&
+                  it->current_item->type != BGITERATOR_ITEM_TERMINATED));
 
     // First, clean up the previous item read
     if (it->current_item != NULL) {
         returnCurrentItemToValkey(it);
 
-        // To support unit tests.  Normal clients call bgIteratorRead from an alternate thread.
-        //  Without this, a unit test could get stuck waiting on the completion event because
-        //  feed won't get invoked.  For production, this is called regularly from the main thread.
-        if (onValkeyMainThread()) bgIteration_feedIterators_task(NULL, 0, NULL);
+        /* To support unit tests.  Normal clients call bgIteratorRead from an alternate thread.
+         * Without this, a unit test could get stuck waiting on the completion event because
+         * feed won't get invoked.  For production, feed is called regularly from the main thread.
+         * Note - this is checking that the exact same thread is used and shouldn't count modules. */
+        if (pthread_equal(server.main_thread_id, pthread_self()) != 0) bgIteration_feedIterators_task(NULL, 0, NULL);
     } else {
         it->client_is_active = true;
     }
@@ -2277,8 +2208,8 @@ bgIteratorItem * bgIteratorRead(bgIterator *it) {
 // PUBLIC API
 void bgIteratorClose(bgIterator *it) {
     if (it->current_item != NULL) {
-        if (it->current_item->type == BGITERATOR_ITEM_COMPLETE
-         || it->current_item->type == BGITERATOR_ITEM_TERMINATED) {
+        if (it->current_item->type == BGITERATOR_ITEM_COMPLETE ||
+            it->current_item->type == BGITERATOR_ITEM_TERMINATED) {
             // Normal confirmation of background completion
         } else {
             // Client is initiating the termination
@@ -2286,13 +2217,13 @@ void bgIteratorClose(bgIterator *it) {
             returnCurrentItemToValkey(it);
 
             it->current_item = itemFreeList_getElementOrAllocate();
-            *(it->current_item) = (bgIteratorItem){ .type = BGITERATOR_ITEM_TERMINATED };
+            *(it->current_item) = (bgIteratorItem){.type = BGITERATOR_ITEM_TERMINATED};
         }
     } else {
         // terminated before first item read
         it->terminated = true;
         it->current_item = itemFreeList_getElementOrAllocate();
-        *(it->current_item) = (bgIteratorItem){ .type = BGITERATOR_ITEM_TERMINATED };
+        *(it->current_item) = (bgIteratorItem){.type = BGITERATOR_ITEM_TERMINATED};
     }
 
     // We don't allocate extension items from the free list
@@ -2303,10 +2234,9 @@ void bgIteratorClose(bgIterator *it) {
 }
 
 
-
-//=============================================================================================
-//                        PUBLIC INTERFACE:  Valkey main-thread support hooks
-//=============================================================================================
+/* =============================================================================================
+ *                        PUBLIC INTERFACE:  Valkey main-thread support hooks
+ * ============================================================================================= */
 
 // PUBLIC API
 void bgIteration_init(void) {
@@ -2363,10 +2293,10 @@ void bgIteration_keyDelete(int dbid, const_sds key) {
         bgIterator *it = listNodeValue(node);
         if (it->completed || it->terminated || !it->keyset_iter->isKeyInScope(it->keyset_iter, key)) continue;
 
-        if (it->iteration_flags & BGITERATOR_FLAG_CONSISTENT
-                && ((bgIterationEntryMetadata *)objectGetMetadata(de))->iterator_epoch <= it->consistent_modification_id) {
-            if (!it->keyset_iter->hasPassedItem(it->keyset_iter, key, dbid)
-                    && !(dictFind(it->early_iterate_entries, de) != NULL)) {
+        if (it->iteration_flags & BGITERATOR_FLAG_CONSISTENT &&
+            ((bgIterationEntryMetadata *)objectGetMetadata(de))->iterator_epoch <= it->consistent_modification_id) {
+            if (!it->keyset_iter->hasPassedItem(it->keyset_iter, key, dbid) &&
+                !hashtableFind(it->early_iterate_entries, de, NULL)) {
                 addEarlyIterationKey(it, de, dbid); // (may also add to inUseEntries)
             }
         }
@@ -2374,17 +2304,16 @@ void bgIteration_keyDelete(int dbid, const_sds key) {
 
     removePtrFromEarlyIterate(de);
 
-    // We might be within the context of a command execution.  This happens if the key is found to
-    //  be expired when attempting to execute the command.  In this case, we should treat the key as
-    //  missing.  If the key exists after the command executes, we can treat it like a new key.
-    // (If not in command execution, this is ok - it's reset at the beginning of command execution.)
+    /* We might be within the context of a command execution.  This happens if the key is found to
+     * be expired when attempting to execute the command.  In this case, we should treat the key as
+     * missing.  If the key exists after the command executes, we can treat it like a new key.
+     * (If not in command execution, this is ok - it's reset at the beginning of command execution.) */
     robj *oKey = createObject(OBJ_STRING, sdsdup(key));
     listAddNodeHead(curCmdMissingKeys, oKey);
 }
 
 
 // PUBLIC API
-// Notify bgIteration that a FLUSHALL is being performed outside of the normal client interface.
 void bgIteration_flushall(void) {
     handleFlushdb(-1);
 }
@@ -2398,12 +2327,12 @@ bool bgIteration_blockClientIfRequired(client *c) {
 
     if (BGITERATION_DEBUG) {
         debugBuffer = sdscatprintf(debugBuffer, "BLCK?: (%d)%s\n", c->db->id,
-                createSdsFromClientArgv(c->argc, c->argv));
+                                   createSdsFromClientArgv(c->argc, c->argv));
     }
 
-    // Before executing a command or atomic transaction, the replication flag is cleared for each
-    //  iterator.  If it's determined that the command should replicate, the flag will be set
-    //  as the command and keys are examined for expedite.
+    /* Before executing a command or atomic transaction, the replication flag is cleared for each
+     * iterator.  If it's determined that the command should replicate, the flag will be set
+     * as the command and keys are examined for expedite. */
     resetReplicationFlagForIterators(c);
 
     if (c->cmd->proc == flushdbCommand || c->cmd->proc == flushallCommand) {
@@ -2428,7 +2357,7 @@ bool bgIteration_blockClientIfRequired(client *c) {
         keyReference *keyrefs = result.keys;
         if (numkeys > 0) {
             mustBlock = expediteKeysForWriteOnAllIterators(
-                            c->db->id, c->cmd, c->argc, c->argv, keyrefs, numkeys, waitOnKeys);
+                c->db->id, c->cmd, c->argc, c->argv, keyrefs, numkeys, waitOnKeys);
             serverAssert(!(mustBlock && (c->flag.multi) && !(c->flag.script)));
 
             if (mustBlock && (c->flag.script)) {
@@ -2442,7 +2371,7 @@ bool bgIteration_blockClientIfRequired(client *c) {
                     receiveItemsBackFromIterators(true); // Blocking
                     hashtableEmpty(waitOnKeys, NULL);
                     mustBlock = expediteKeysForWriteOnAllIterators(
-                                    c->db->id, c->cmd, c->argc, c->argv, keyrefs, numkeys, waitOnKeys);
+                        c->db->id, c->cmd, c->argc, c->argv, keyrefs, numkeys, waitOnKeys);
                 }
             }
             getKeysFreeResult(&result);
@@ -2460,7 +2389,7 @@ bool bgIteration_blockClientIfRequired(client *c) {
 
     if (mustBlock) {
         serverAssert(hashtableSize(waitOnKeys) > 0);
-        robj **waitKeysArgv = zmalloc(sizeof(robj*) * hashtableSize(waitOnKeys));
+        robj **waitKeysArgv = zmalloc(sizeof(robj *) * hashtableSize(waitOnKeys));
 
         robj *key;
         hashtableIterator hi;
@@ -2488,29 +2417,28 @@ bool bgIteration_blockClientIfRequired(client *c) {
 
 
 // PUBLIC API
-void bgIteration_handleCommandReplication(
-        int dbid,
-        struct serverCommand *cmd,
-        int argc,
-        robj **argv) {
+void bgIteration_handleCommandReplication(int dbid,
+                                          struct serverCommand *cmd,
+                                          int argc,
+                                          robj **argv) {
     if (BGITERATION_DEBUG) {
         // DEBUG - enable this to capture replication not queued because iteration is inactive
         if (0 && !bgIteration_iterationActive() && (isWriteCmd(cmd) || cmd->proc == multiCommand)) {
             debugBuffer = sdscatprintf(debugBuffer, "REPL? INACT: (%d)%s\n", dbid,
-                    createSdsFromClientArgv(argc, argv));
+                                       createSdsFromClientArgv(argc, argv));
         }
     }
 
     if (!bgIteration_iterationActive()) return;
     serverAssert(onValkeyMainThread());
 
-    // Some commands are replicated which are not writes (like publish) these can be ignored.
-    //  Be careful with MULTI which is not a write command, but must be replicated.
+    /* Some commands are replicated which are not writes (like publish) these can be ignored.
+     *  Be careful with MULTI which is not a write command, but must be replicated. */
     if (!isWriteCmd(cmd) && cmd->proc != multiCommand) return;
 
     if (BGITERATION_DEBUG) {
         debugBuffer = sdscatprintf(debugBuffer, "REPL?: (%d)%s\n", dbid,
-                createSdsFromClientArgv(argc, argv));
+                                   createSdsFromClientArgv(argc, argv));
     }
 
     if (cmd->proc == swapdbCommand) {
@@ -2521,16 +2449,16 @@ void bgIteration_handleCommandReplication(
             handleSwapdb(id1, id2);
     }
 
-    // In the case that a key is touched in a different DB (COPY/MOVE) the key is recorded as
-    //  a "special" key and than handled below.
+    /* In the case that a key is touched in a different DB (COPY/MOVE) the key is recorded as
+     *  a "special" key and than handled below. */
     int special_dbid = 0;
     sds special_key = NULL;
     dbEntry *special_dbEntry = NULL;
     if (cmd->proc == moveCommand) {
-        // The MOVE command succeeded.  However MOVE requires special handling as it creates a new
-        //  key in a different database.  We need to make sure that we don't later try to iterate
-        //  on the key as it would be a duplicate key at that point.  So, instead, we will mark the
-        //  newly created key as "early iterated".
+        /* The MOVE command succeeded.  However MOVE requires special handling as it creates a new
+         * key in a different database.  We need to make sure that we don't later try to iterate
+         * on the key as it would be a duplicate key at that point.  So, instead, we will mark the
+         * newly created key as "early iterated". */
         bool success = getDbIdFromRobj(argv[MOVE_COMMAND_DBID_ARG_INDEX], &special_dbid);
         serverAssert(success); // the command already succeeded, so this should work!
 
@@ -2552,10 +2480,10 @@ void bgIteration_handleCommandReplication(
     }
 
     /* Implementation note regarding LUA and MULTI:  LUA scripts and MULTI-EXEC blocks must be
-     *  treated atomically.  We need to ensure that either ALL of the replication (or none of the
-     *  replication) for the atomic operation is processed by the iterator(s).  This is handled
-     *  naturally as we can only "complete" the iteration during the feeding process - and feeding
-     *  is only performed when handling timer events (after the LUA/MULTI has completed).  */
+     * treated atomically.  We need to ensure that either ALL of the replication (or none of the
+     * replication) for the atomic operation is processed by the iterator(s).  This is handled
+     * naturally as we can only "complete" the iteration during the feeding process - and feeding
+     * is only performed when handling timer events (after the LUA/MULTI has completed).  */
 
     listIter li;
     listNode *node;
@@ -2564,16 +2492,17 @@ void bgIteration_handleCommandReplication(
         bgIterator *it = listNodeValue(node);
         if (it->completed || it->terminated) continue;
 
-        // For consistent iteration, we only iterate values based on version.  But for
-        //  non-consistent iteration, we don't need to explicitly iterate any values newly created
-        //  during the iteration.  So we mark them as expedited.  We know we have a new key if it
-        //  was missing before the command, and exists now.
+        /* For consistent iteration, we only iterate values based on version.  But for
+         * non-consistent iteration, we don't need to explicitly iterate any values newly created
+         * during the iteration.  So we mark them as expedited.  We know we have a new key if it
+         * was missing before the command, and exists now. */
+
         if (!(it->iteration_flags & BGITERATOR_FLAG_CONSISTENT)) {
             // Handle the special case of a key moved to a different DB
             if (special_dbEntry != NULL) {
-                if (it->cur_cmd_may_replicate
-                        && !it->keyset_iter->hasPassedItem(it->keyset_iter, special_key, special_dbid)) {
-                    dictAdd(it->early_iterate_entries, special_dbEntry, NULL);
+                if (it->cur_cmd_may_replicate &&
+                    !it->keyset_iter->hasPassedItem(it->keyset_iter, special_key, special_dbid)) {
+                    hashtableAdd(it->early_iterate_entries, special_dbEntry);
                     if (BGITERATION_DEBUG) {
                         sds entryString = createEntryString(special_dbid, special_dbEntry);
                         debugBuffer = sdscatprintf(debugBuffer, "EARLY(special): %s\n", entryString);
@@ -2581,11 +2510,12 @@ void bgIteration_handleCommandReplication(
                     }
                 }
 
-                // Note: In the cases where there's a special command, we are copying or moving an
-                //       item to a different DB.  In these limited cases, we can only possibly be
-                //       creating a single key.  And if we've handled it here, we don't need to
-                //       handle it as a "missing key" below.  If we were to try to handle it as a
-                //       standard "missing key", we would get the DBID incorrect.
+                /* Note: In the cases where there's a special command, we are copying or moving an
+                 *       item to a different DB.  In these limited cases, we can only possibly be
+                 *       creating a single key.  And if we've handled it here, we don't need to
+                 *       handle it as a "missing key" below.  If we were to try to handle it as a
+                 *       standard "missing key", we would get the DBID incorrect. */
+
             } else if (listLength(curCmdMissingKeys) > 0) {
                 listIter missingIt;
                 listNode *missingNode;
@@ -2596,13 +2526,14 @@ void bgIteration_handleCommandReplication(
                     dbEntry *de = dbFind(server.db[dbid], (sds)key);
                     if (de != NULL) {
                         // It exists now!
-                        if (it->cur_cmd_may_replicate
-                                && !it->keyset_iter->hasPassedItem(it->keyset_iter, key, dbid)) {
-                            // If the current command is allowed to replicate, and there is a new
-                            //  key which we haven't yet reached in iteration, it needs to be added
-                            //  to the set of early iterate entries.  (We know that it's not already
-                            //  in that set because it's a newly created key!)
-                            dictAdd(it->early_iterate_entries, de, NULL);
+                        if (it->cur_cmd_may_replicate &&
+                            !it->keyset_iter->hasPassedItem(it->keyset_iter, key, dbid)) {
+                            /* If the current command is allowed to replicate, and there is a new
+                             * key which we haven't yet reached in iteration, it needs to be added
+                             * to the set of early iterate entries.  (We know that it's not already
+                             * in that set because it's a newly created key!) */
+                            bool wasAdded = hashtableAdd(it->early_iterate_entries, de);
+                            serverAssert(wasAdded);
                             if (BGITERATION_DEBUG) {
                                 sds entryString = createEntryString(dbid, de);
                                 debugBuffer = sdscatprintf(debugBuffer, "EARLY(NEW): %s\n", entryString);
@@ -2653,8 +2584,7 @@ void bgIteration_handleCommandReplication(
          * In the case of a client driven DEL command, the key will have already been deleted when
          *  we hit this routine.  In the case of EXPIRE/EVICT, they propagate happens before the key
          *  is deleted.  So if the key is missing, we can use the cached replication decision.  But
-         *  if the key still exists (indicating EXPIRE/EVICT) we evaluate it specially.
-         */
+         *  if the key still exists (indicating EXPIRE/EVICT) we evaluate it specially. */
         bool shouldReplicateDelCommand = false;
         bool isDelCommand = isDeleteCmd(cmd);
         if (isDelCommand) {
@@ -2665,8 +2595,8 @@ void bgIteration_handleCommandReplication(
                     // NOTE:  It's weird, but helpful, for both EXPIRE and EVICT the propagation happens
                     //        BEFORE the actual delete.  So if the dbEntry still exists, we are doing
                     //        an expire/evict which is not preceded by blockClientIfRequired().
-                    if (it->keyset_iter->hasPassedItem(it->keyset_iter, key, dbid)
-                            || (dictFind(it->early_iterate_entries, de) != NULL)) {
+                    if (it->keyset_iter->hasPassedItem(it->keyset_iter, key, dbid) ||
+                        hashtableFind(it->early_iterate_entries, de, NULL)) {
                         shouldReplicateDelCommand = true;
                     }
                 } else {
@@ -2678,17 +2608,14 @@ void bgIteration_handleCommandReplication(
         }
 
         bool replicate = (it->iteration_flags & BGITERATOR_FLAG_REPLICATION &&
-                ((!isDelCommand && it->cur_cmd_may_replicate)
-                        || shouldReplicateDelCommand));
+                          ((!isDelCommand && it->cur_cmd_may_replicate) || shouldReplicateDelCommand));
 
         if (replicate) {
             /* We will replicate the command in these cases:
              * 1) For consistent iteration - it->cur_cmd_may_replicate is always true
              * 2) For non-consistent, if any of the keys have been processed, expediteKeysForWrite
              *    will ensure that ALL of the keys have been expedited - and we should replicate
-             * 3) For non-consistent, if NONE of the keys have been processed, no need to replicate
-             */
-
+             * 3) For non-consistent, if NONE of the keys have been processed, no need to replicate */
             if (BGITERATION_DEBUG) {
                 debugBuffer = sdscat(debugBuffer, " (queued)\n");
             }
@@ -2716,13 +2643,26 @@ size_t bgIteration_memoryInuseForReplication(void) {
 // PUBLIC API
 bool bgIteration_isEntryInuse(dbEntry *de) {
     serverAssert(onValkeyMainThread());
+    if (!bgIteration_iterationActive()) return false;
     return isEntryInuseByAnyIterator(de);
 }
 
 
 // PUBLIC API
-uint32_t bgIteration_getEpoch(void) {
-    return bgIteration_epoch;
+void bgIteration_dbEntryModified(dbEntry *de) {
+    if (bgIteration_iterationActive()) {
+        bgIterationEntryMetadata *md = (bgIterationEntryMetadata *)objectGetMetadata(de);
+        if (md) md->iterator_epoch = bgIteration_epoch;
+    }
+}
+
+
+// PUBLIC API
+void bgIteration_keyModified(int dbid, const_sds key) {
+    if (bgIteration_iterationActive()) {
+        dbEntry *de = dbFind(server.db[dbid], (sds)key);
+        if (de) bgIteration_dbEntryModified(de);
+    }
 }
 
 
@@ -2737,11 +2677,12 @@ void bgIteration_updateDbEntryPtr(dbEntry *old, dbEntry *new) {
     listRewind(allIterators, &li);
     while ((node = listNext(&li)) != NULL) {
         bgIterator *it = listNodeValue(node);
-        if (dictDelete(it->early_iterate_entries, old) == DICT_OK) {
+        if (hashtableDelete(it->early_iterate_entries, old)) {
             if (BGITERATION_DEBUG) {
                 debugBuffer = sdscatprintf(debugBuffer, "EARLY LIST UPDATE %p -> %p\n", (void *)old, (void *)new);
             }
-            dictAdd(it->early_iterate_entries, new, NULL);
+            bool wasAdded = hashtableAdd(it->early_iterate_entries, new);
+            serverAssert(wasAdded);
         }
     }
 }
diff --git a/src/bgiteration.h b/src/bgiteration.h
index dd6da71608f..78643311fe0 100644
--- a/src/bgiteration.h
+++ b/src/bgiteration.h
@@ -21,8 +21,7 @@
  * implements the logic of the iteration client.
  *
  * Iteration clients are expected to read through the keyspace until the iteration is complete or
- * terminated.  An iteration client may not perform modifications on a key.
- */
+ * terminated.  An iteration client may not perform modifications on a key. */
 
 /* Avoids dependency on server.h */
 typedef struct serverObject dbEntry; // An object with key/value inserted into main dictionary
@@ -33,21 +32,30 @@ typedef struct client client;
 typedef struct bgIterator bgIterator;
 
 
-/* Flag indicates that a consistent iteration is required.  This is used to create a point-in-time
- * iteration.  The iteration client will see all keys AS THEY EXISTED at the time when the iterator
- * was created.
- * Note:  The DBID provided with the DICTENTRY events is the original DBID (at the time of iteration
- *        start).  SWAPDB events are NOT provided during a consistent iteration.  */
-#define BGITERATOR_FLAG_CONSISTENT (1 << 0)
-
-/* Flag indicating that the replication stream for keys which have already been processed should be
- * forwarded to the iteration client.  Most useful for non-consistent iteration to track changes
- * to keys already processed.  By tracking changes, this allows an non-consistent iteration client
- * to achieve a consistent view at the END of the iteration.
- * NOTE:  Replication events will be provided ordered and synchronized with any SWAPDB events.
- * LIMITATION:  Since SWAPDB events are not provided during CONSISTENT iteration, it is not
- *              permitted to use both CONSISTENT and REPLICATION on a non-clustermode instance.  */
-#define BGITERATOR_FLAG_REPLICATION (1 << 1)
+/* Consistency type for iteration. */
+typedef enum {
+    /* With no consistency requirements, dbEntries are provided to the iteration client as they
+     * appear at the time of iteration.  No replication is provided.  The only guarantee is that
+     * dbEntries which existed at the start of iteration, and remained through the duration of
+     * iteration, will be provided to the iteration client once (and only once).  If a dbEntry is
+     * modified during iteration, either the old or the new value may be provided. */
+    BGITERATOR_CONSISTENCY_NONE = 0,
+
+    /* With consistency at the start of iteration, a point-in-time iteration is performed.  The
+     * iteration client will see all keys AS THEY EXISTED at the time when the iterator was created.
+     * Note:  The DBID provided with the DICTENTRY events is the original DBID (at the time of iteration
+     *        start).  SWAPDB events will not be provided.  */
+    BGITERATOR_CONSISTENCY_START = 1,
+
+    /* With an eventually consistent iteration, dbEntries will be followed by relevant replication.
+     * This will allow a client to achieve a consistent state at the END of the iteration.  Once a
+     * dbEntry has been provided to the iteration client, any replication related to that entry will
+     * also be forwarded to the iteration client.  With eventual consistency, keys are provided as
+     * they are at the time of iteration.  This mode requires that the iteration client be aware of
+     * SWAPDB events.  If a SWAPDB is performed, the client will receive a SWAPDB event.
+     * Replication events will be provided ordered and synchronized with any SWAPDB events. */
+    BGITERATOR_CONSISTENCY_EVENTUAL = 2
+} bgIteratorConsistency;
 
 
 /* When running an iterator with replication, a replication-done function (callback) may be
@@ -60,8 +68,7 @@ typedef struct bgIterator bgIterator;
  * Returns true when an iterator stops accepting any replication item into the queue for the client.
  * If false is returned, replication will continue, and bgiteration will periodically call the callback
  * until true is returned. In this context, returning false indicates that the client is not ready to
- * stop receiving replication, it is requesting that replication be continued.
- */
+ * stop receiving replication, it is requesting that replication be continued. */
 typedef bool (*bgIteratorReplDoneFunc)(void *privdata);
 
 
@@ -71,8 +78,7 @@ typedef bool (*bgIteratorReplDoneFunc)(void *privdata);
  * TERMINATED:  will be passed as TRUE if the iteration process was terminated early (either by
  *              the main thread calling bgIteratorTerminate() or the iteration client calling
  *              bgIteratorClose()).
- * PRIVDATA:    this pointer is for data private to the iteration client.
- */
+ * PRIVDATA:    this pointer is for data private to the iteration client. */
 typedef void (*bgIteratorCleanupFunc)(bool terminated, void *privdata);
 
 
@@ -90,11 +96,10 @@ typedef void (*bgIteratorCleanupFunc)(bool terminated, void *privdata);
  * to implement the iteration client which will read from the returned bgIterator.
  *
  * There is no need to delete/destroy a bgIterator.  It will automatically be cleaned up after the
- * last item is read.
- */
+ * last item is read. */
 bgIterator *bgIteratorCreateFullScanIter(
     const char *name,
-    int flags,
+    bgIteratorConsistency consistency,
     bgIteratorReplDoneFunc repldone,
     bgIteratorCleanupFunc cleanup,
     void *privdata);
@@ -119,11 +124,10 @@ bgIterator *bgIteratorCreateFullScanIter(
  * just copy its data and leave the array untouched.
  *
  * There is no need to delete/destroy a bgIterator.  It will automatically be cleaned up after the
- * last item is read.
- */
+ * last item is read. */
 bgIterator *bgIteratorCreateSlotsIter(
     const char *name,
-    int flags,
+    bgIteratorConsistency consistency,
     const int *slots,
     int slots_count,
     bgIteratorReplDoneFunc repldone,
@@ -132,8 +136,7 @@ bgIterator *bgIteratorCreateSlotsIter(
 
 
 /* Find an existing bgIterator by name.
- * Returns NULL if the iterator does not exist (or has completed).
- */
+ * Returns NULL if the iterator does not exist (or has completed). */
 bgIterator *bgIteratorFind(const char *name);
 
 
@@ -162,8 +165,7 @@ typedef struct {
 
 /* Get the status of a background iteration.
  *
- * The caller-provided bgIteratorStatus will be populated.
- */
+ * The caller-provided bgIteratorStatus will be populated. */
 void bgIteratorGetStatus(bgIterator *iter, bgIteratorStatus *status);
 
 
@@ -172,8 +174,7 @@ void bgIteratorGetStatus(bgIterator *iter, bgIteratorStatus *status);
  * An iteration is terminated by the Valkey main thread.  It is expected that the iteration client
  * will continue to read, receiving BGITERATOR_ITEM_TERMINATED or BGITERATOR_ITEM_COMPLETE to
  * complete the iteration.  (This is necessary to ensure proper cleanup.)
- * NOTE:  If the iteration client wants to terminate iteration, it may call bgIteratorClose().
- */
+ * NOTE:  If the iteration client wants to terminate iteration, it may call bgIteratorClose(). */
 void bgIteratorTerminate(bgIterator *iter);
 
 
@@ -182,8 +183,7 @@ void bgIteratorTerminate(bgIterator *iter);
  * This checks if the iterator is in the process of terminating.  For the Valkey main thread, this
  * can be used to determine if a call has already been made to bgIteratorTerminate.  For an
  * iteration client, it normally learns about terminate by reading the next item, this allows
- * out-of-band detection of termination which can be useful when processing a large key.
- */
+ * out-of-band detection of termination which can be useful when processing a large key. */
 bool bgIteratorIsTerminating(bgIterator *iter);
 
 
@@ -254,8 +254,7 @@ typedef struct {
  * NOTE: Reading an item returns previously read items to Valkey.  It is unsafe to reference an item
  * previously read.
  *
- * (All memory management is the responsibility of the bgIterator - not the reader.)
- */
+ * (All memory management is the responsibility of the bgIterator - not the reader.) */
 bgIteratorItem *bgIteratorRead(bgIterator *iter);
 
 
@@ -267,8 +266,7 @@ bgIteratorItem *bgIteratorRead(bgIterator *iter);
  * BGITERATOR_ITEM_TERMINATED and signals that the background activity is complete.
  *
  * This may also be called by the iteration client to force terminate an iteration early.  The
- * bgIterator will be marked as terminated.
- */
+ * bgIterator will be marked as terminated. */
 void bgIteratorClose(bgIterator *iter);
 
 
@@ -276,10 +274,7 @@ void bgIteratorClose(bgIterator *iter);
  * BGITERATION HOOKS REQUIRED TO SUPPORT ITERATION - CALLS INSERTED INTO MAIN VALKEY CODE
  ********************************************************************************************/
 
-typedef struct {
-    uint32_t iterator_epoch; // iterator epoch of last modification
-} bgIterationEntryMetadata;
-
+#define BGITERATION_ENTRY_METADATA_SIZE 4
 
 /* Must be called once (and only once) at server startup.  */
 void bgIteration_init(void);
@@ -292,15 +287,13 @@ bool bgIteration_iterationActive(void);
 /* Notify bgIteration that a key is being deleted.  In Valkey, key deletion can occur in a READ
  * command if the key is expired.  Note that this notification is more about status than memory.
  * Since the dbEntry is a reference counted object, the dbEntry can't be physically deleted if
- * bgIteration is still actively using it.
- */
+ * bgIteration is still actively using it. */
 void bgIteration_keyDelete(int dbid, const_sds key);
 
 
 /* Iteration needs to know if a FLUSHALL is being performed.  For normal clients, this comes through
  * the standard "blockClientIfRequired" interface.  This interface is for cases where Valkey
- * performs the FLUSHALL operation independently of clients (e.g. when syncing with master).
- */
+ * performs the FLUSHALL operation independently of clients (e.g. when syncing with master). */
 void bgIteration_flushall(void);
 
 
@@ -311,8 +304,7 @@ void bgIteration_flushall(void);
  *
  * We can't update the dbEntry if the entry is actually in use (bgIteration_isEntryInuse)!
  *
- * To simplify calling code, this function does nothing if old_entry == new_entry.
- */
+ * To simplify calling code, this function does nothing if old_entry == new_entry. */
 void bgIteration_updateDbEntryPtr(dbEntry *old_entry, dbEntry *new_entry);
 
 
@@ -332,16 +324,14 @@ void bgIteration_updateDbEntryPtr(dbEntry *old_entry, dbEntry *new_entry);
  *                  performs SWAPDB, a synchronous block may be performed (returning false) on
  *                  individual commands within the script.
  *
- * Note: this function should be called for all commands (not just writes).
- */
+ * Note: this function should be called for all commands (not just writes). */
 bool bgIteration_blockClientIfRequired(client *c);
 
 
 /* After execution of a write command, the Valkey main thread must provide the command to iterators
  * which are interested in the replication feed.  It is required that all commands have been passed
  * through bgIteration_blockClientIfRequired(), however, it is permitted that the command can be
- * re-written for propagation.
- */
+ * re-written for propagation. */
 void bgIteration_handleCommandReplication(
     int dbid,
     struct serverCommand *cmd,
@@ -351,8 +341,7 @@ void bgIteration_handleCommandReplication(
 
 /* The memory that bgIteration uses while temporarily buffering replication data is not included in
  * the maxmemory computation used for eviction.  This function provides insight into the current
- * amount of memory used for buffered replication data.
- */
+ * amount of memory used for buffered replication data. */
 size_t bgIteration_memoryInuseForReplication(void);
 
 
@@ -360,7 +349,10 @@ size_t bgIteration_memoryInuseForReplication(void);
 bool bgIteration_isEntryInuse(dbEntry *de);
 
 
-/* Get the current iteration epoch, for tagging metadata on keys. */
-uint32_t bgIteration_getEpoch(void);
+/* Notify bgIteration that a dbEntry has been added/modified.
+ *  - If caller has a dbEntry*, dbEntryModified is more efficient
+ *  - If caller has a dbid/key, a lookup is performed to find the dbEntry */
+void bgIteration_dbEntryModified(dbEntry *de);
+void bgIteration_keyModified(int dbid, const_sds key);
 
 #endif
diff --git a/src/db.c b/src/db.c
index cdf41c6a8b7..4b6f3c11a39 100644
--- a/src/db.c
+++ b/src/db.c
@@ -432,8 +432,7 @@ void setKey(client *c, serverDb *db, robj *key, robj **valref, int flags) {
     } else {
         dbSetValue(db, key, valref, 1, NULL);
     }
-    bgIterationEntryMetadata *md = (bgIterationEntryMetadata *)objectGetMetadata(*valref);
-    if (md) md->iterator_epoch = bgIteration_getEpoch();
+    bgIteration_dbEntryModified(*valref);
     if (!(flags & SETKEY_KEEPTTL)) removeExpire(db, key);
     if (!(flags & SETKEY_NO_SIGNAL)) signalModifiedKey(c, db, key);
 }
@@ -762,15 +761,7 @@ long long dbTotalServerKeyCount(void) {
 void signalModifiedKey(client *c, serverDb *db, robj *key) {
     touchWatchedKey(db, key);
     trackingInvalidateKey(c, key, 1);
-
-    /* If bgIteration is running, need to maintain the iteration epoch. */
-    if (bgIteration_iterationActive()) {
-        dbEntry *o = dbFind(db, objectGetVal(key));
-        if (o) {
-            bgIterationEntryMetadata *md = (bgIterationEntryMetadata *)objectGetMetadata(o);
-            if (md) md->iterator_epoch = bgIteration_getEpoch();
-        }
-    }
+    bgIteration_keyModified(db->id, objectGetVal(key));
 }
 
 void signalFlushedDb(int dbid, int async) {
diff --git a/src/defrag.c b/src/defrag.c
index 670f83bee73..e6ecad0227e 100644
--- a/src/defrag.c
+++ b/src/defrag.c
@@ -43,6 +43,7 @@
 #include "eval.h"
 #include "script.h"
 #include "module.h"
+#include "bgiteration.h"
 #include <stdbool.h>
 #include <stddef.h>
 
@@ -708,6 +709,8 @@ static void defragKey(defragKeysCtx *ctx, robj **elemref) {
     unsigned char *newzl;
     ob = *elemref;
 
+    if (bgIteration_isEntryInuse(ob)) return;
+
     /* Try to defrag robj and/or string value. */
     if ((newob = activeDefragStringOb(ob))) {
         *elemref = newob;
@@ -815,6 +818,11 @@ static void defragPubsubScanCallback(void *privdata, void *elemref) {
  * and 1 if time is up and more work is needed. */
 static int defragLaterItem(robj *ob, unsigned long *cursor, monotime endtime, int dbid) {
     if (ob) {
+        if (bgIteration_isEntryInuse(ob)) {
+            *cursor = 0;
+            return 0;
+        }
+
         if (ob->type == OBJ_LIST && ob->encoding == OBJ_ENCODING_QUICKLIST) {
             return scanLaterList(ob, cursor, endtime);
         } else if (ob->type == OBJ_SET && ob->encoding == OBJ_ENCODING_HASHTABLE) {
diff --git a/src/expire.c b/src/expire.c
index b31f57465cc..40b1410f1b7 100644
--- a/src/expire.c
+++ b/src/expire.c
@@ -39,6 +39,7 @@
 #include "cluster.h"
 #include "cluster_migrateslots.h"
 #include "util.h"
+#include "bgiteration.h"
 
 /*-----------------------------------------------------------------------------
  * Incremental collection of expired keys.
@@ -167,6 +168,14 @@ void fieldExpireScanCallback(void *privdata, void *volaKey, int didx) {
     robj *o = volaKey;
     serverAssert(o);
     serverAssert(hashTypeHasVolatileFields(o));
+
+    data->has_more_expired_entries = false;
+
+    if (bgIteration_isEntryInuse(o)) {
+        data->sampled++;
+        return;
+    }
+
     mstime_t now = server.mstime;
     size_t expired_fields = dbReclaimExpiredFields(o, data->db, now, data->max_entries, didx);
     if (expired_fields) {
diff --git a/src/server.c b/src/server.c
index 6b2942c3714..14e32beb6a2 100644
--- a/src/server.c
+++ b/src/server.c
@@ -3023,11 +3023,11 @@ void initServer(void) {
 
     /* Set object metadata size before creating any database key objects */
     if (server.forkless_options_supported) {
-        /* NOTE: At this time, there is only one reason for dbEntry metadata.  bgIteration.  However,
+        /* NOTE: At this time, there is only one reason for dbEntry metadata: bgIteration.  However,
          * if/when new metadata options are added, we will need to compute the size of a variable
          * size metadata, and provide appropriate accessors to access the specific portion of the
          * metadata (each of which may/may not exist, based on immutable startup parameters).  */
-        objectSetMetadataSize(sizeof(bgIterationEntryMetadata));
+        objectSetMetadataSize(BGITERATION_ENTRY_METADATA_SIZE);
     }
 
     createDatabaseIfNeeded(0); /* The default database should always exist */
@@ -3711,10 +3711,57 @@ static void propagateNow(int dbid, robj **argv, int argc, int target, int slot)
     if (propagate_to_slot_migration) clusterFeedSlotExportJobs(dbid, argv, argc, slot);
 }
 
-// If true, a MULTI has been sent to bgIterator.
-//  Remember to send the matching EXEC in propagatePendingCommands().
-static bool sentMultiToBgIterator = false;
-static int lastDbidSentToBgIterator;
+/* BgIteration requires that replicaton is sent after each command, however the
+ * alsoPropagate mechanism queues replication until the end of the transaction
+ * (when propagatePendingCommands is invoked).  Also, the propagation mechanism
+ * strips out multi/exec, adding them back during propagatePendingCommands (if
+ * necessary).  This function ensures that replication, including multi/exec are
+ * sequenced with the commands for bgIteration.
+ *
+ * Called from alsoPropagate with regular params.
+ * Called from propagatePendingCommands with dbid = -1 (to close multi/exec). */
+static void propagateToBgIteration(int dbid, int argc, robj **argv, int target) {
+    /* STATIC indicates that we have sent the MULTI, and need to match it with
+     *  an EXEC during propagatePendingCommands. */
+    static bool sentMultiToBgIterator = false;
+    /* STATIC indicates that last DBID that was sent, so that we can use the
+     *  same DBID when sending a generated EXEC. */
+    static int lastDbidSentToBgIterator;
+
+    if (dbid >= 0) {
+        // Called from alsoPropagate() to replicate a command
+        if (target & PROPAGATE_REPL && bgIteration_iterationActive()) {
+            if (!sentMultiToBgIterator && (scriptIsRunning() || server.in_exec)) {
+                /* For a script or multi/exec, we should be sending the MULTI at
+                 * the beginning of the execution unit.  There shouldn't be any
+                 * commands in the propagation queue yet. */
+                serverAssert(server.also_propagate.numops == 0);
+                /* If this is the first propagated command of a script or multi,
+                 * make it a transaction.  It may turn out that there is only 1
+                 * command in the MULTI block, but we can't know that now.
+                 * Unlike regular replication, we can't defer all of the
+                 * replication until we know for sure.  We must call bgIteration
+                 * after each command. */
+                static struct serverCommand *cmd_multi = NULL; // STATIC
+                if (cmd_multi == NULL) cmd_multi = lookupCommandOrOriginal(&shared.multi, 1);
+                bgIteration_handleCommandReplication(dbid, cmd_multi, 1, &shared.multi);
+                sentMultiToBgIterator = true;
+            }
+            struct serverCommand *cmd = lookupCommandOrOriginal(argv, argc);
+            bgIteration_handleCommandReplication(dbid, cmd, argc, argv);
+            lastDbidSentToBgIterator = dbid;
+        }
+    } else {
+        // Called from propagatePendingCommands() to finalize a transaction
+        if (sentMultiToBgIterator) {
+            // If a MULTI was sent to bgIterator via alsoPropagate(), then send the matching EXEC.
+            static struct serverCommand *cmd_exec = NULL; // STATIC
+            if (cmd_exec == NULL) cmd_exec = lookupCommandOrOriginal(&shared.exec, 1);
+            bgIteration_handleCommandReplication(lastDbidSentToBgIterator, cmd_exec, 1, &shared.exec);
+            sentMultiToBgIterator = false;
+        }
+    }
+}
 
 /* Used inside commands to schedule the propagation of additional commands
  * after the current command is propagated to AOF / Replication.
@@ -3728,28 +3775,7 @@ static int lastDbidSentToBgIterator;
  * stack allocated).  The function automatically increments ref count of
  * passed objects, so the caller does not need to. */
 void alsoPropagate(int dbid, robj **argv, int argc, int target, int slot) {
-    if (target & PROPAGATE_REPL && bgIteration_iterationActive()) {
-        // Note that bgIterator must be invoked immediately after each command.  This is required
-        //  by the bgIterator state machine.  It's NOT ok to call bgIterator from propagateNow as
-        //  that handles all of the commands for a transaction at the end.
-        // THIS FUNCTION (alsoPropagate) is called after each command.
-        if (!sentMultiToBgIterator && (scriptIsRunning() || server.in_exec)) {
-            // For a script or multi/exec, we should be sending the MULTI at the beginning of the
-            //  execution unit.  There shouldn't be any commands in the propagation queue yet.
-            serverAssert(server.also_propagate.numops == 0);
-            // If this is the first propagated command of a script or multi, make it a transaction.
-            //  It may turn out that there is only 1 command in the MULTI block, but we can't know
-            //  that now.  Unlike regular replication, we can't defer all of the replication until
-            //  we know for sure.  We must call bgIterator after each command.
-            static struct serverCommand *cmd_multi = NULL; // STATIC to avoid repeated lookups
-            if (cmd_multi == NULL) cmd_multi = lookupCommandOrOriginal(&shared.multi, 1);
-            bgIteration_handleCommandReplication(dbid, cmd_multi, 1, &shared.multi);
-            sentMultiToBgIterator = true;
-        }
-        struct serverCommand *cmd = lookupCommandOrOriginal(argv, argc);
-        bgIteration_handleCommandReplication(dbid, cmd, argc, argv);
-        lastDbidSentToBgIterator = dbid;
-    }
+    propagateToBgIteration(dbid, argc, argv, target);
 
     robj **argvcopy;
     int j;
@@ -3817,16 +3843,11 @@ void updateCommandLatencyHistogram(struct hdr_histogram **latency_histogram, int
  * multiple separated commands. Note that alsoPropagate() is not affected
  * by CLIENT_PREVENT_PROP flag. */
 static void propagatePendingCommands(void) {
-    // Note: This is done before the check on server.also_propagate.numops.  Numops might be zero
-    //       if there is no replica but we might be running bgIteration for something other than
-    //       replication.  If we sent the multi (to bgIteration), we need to send the matching exec.
-    if (sentMultiToBgIterator) {
-        // If a MULTI was sent to bgIterator via alsoPropagate(), then send the matching EXEC.
-        static struct serverCommand *cmd_exec = NULL; // STATIC to avoid repeated lookups
-        if (cmd_exec == NULL) cmd_exec = lookupCommandOrOriginal(&shared.exec, 1);
-        bgIteration_handleCommandReplication(lastDbidSentToBgIterator, cmd_exec, 1, &shared.exec);
-        sentMultiToBgIterator = false;
-    }
+    /* This is done before the check on server.also_propagate.numops.  Numops
+     * might be zero if there is no replica but we might be running bgIteration
+     * for something other than replication.  If we sent the multi (to
+     * bgIteration), we need to send the matching exec. */
+    propagateToBgIteration(-1, 0, NULL, 0);
 
     if (server.also_propagate.numops == 0) return;
 
diff --git a/src/unit/test_bgiteration.cpp b/src/unit/test_bgiteration.cpp
new file mode 100644
index 00000000000..5c6b38ce115
--- /dev/null
+++ b/src/unit/test_bgiteration.cpp
@@ -0,0 +1,3076 @@
+/*
+ * Copyright Valkey Contributors.
+ * All rights reserved.
+ * SPDX-License-Identifier: BSD 3-Clause
+ */
+
+// Just for the moment, until https://github.com/valkey-io/valkey/issues/3450 is resolved
+// clang-format off
+#include "generated_wrappers.hpp"
+#include <vector>
+
+using namespace ::testing;
+
+extern "C" {
+    #include "stdlib.h"
+    #include "bgiteration.h"
+    #include "server.h"
+    #define using usingvar // compile hack
+    #include "module.h" // uses "using" keyword
+    #undef using
+    extern hashtableType commandSetType;
+    extern dictType keylistDictType;
+    void bgIteration_feedIterators(void);
+    void createSharedObjects(void);
+    void hashtableDump(hashtable *ht);
+    void bgIteration_unitTestDisableCloning(void);
+    void bgIteration_unitTestEnableCloning(int item_bytes, int pool_bytes);
+    static size_t mockHashtableScan(hashtable *ht, size_t cursor, hashtableScanFunction fn, void *privdata);
+    size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid);
+}
+
+
+// The private data is a pointer to arbitrary data.  This value is used just to
+//  test that the correct value is passed through.
+#define PRIVDATA reinterpret_cast<void*>(12345)
+
+typedef int32_t bgIterationEntryMetadata; // opaque 4 bytes
+static_assert(sizeof(bgIterationEntryMetadata) == BGITERATION_ENTRY_METADATA_SIZE);
+
+// A bgIteration cleanup function used for testing.
+static int cleanupCount;
+static bool cleanupTerminated;
+static void iteratorCleanupFn(bool terminated, void *privdata) {
+    EXPECT_EQ(privdata, PRIVDATA);
+    cleanupCount++;
+    cleanupTerminated = terminated;
+}
+
+// A bgIteration repldone function used for testing.
+static int replDoneConfirmed;
+static bool iteratorRepldoneFn(void *privdata) {
+    EXPECT_EQ(privdata, PRIVDATA);
+    replDoneConfirmed++;
+    return true;
+}
+
+// A more complicated repldone function that can delay the replcation done condition.
+static int replDoneRejected;
+static bool iteratorRepldoneFnNotBeingReadyInitially(void *privdata) {
+    EXPECT_EQ(privdata, PRIVDATA);
+    // This is to test the behavior when Repl Done function is not ready to be executed.
+    if (replDoneRejected == 0) {
+        replDoneRejected++;
+        return false;
+    }
+    replDoneConfirmed++;
+    return true;
+}
+
+
+/* This mock for hashtableScan will return the items in lexical order.  It assumes that the entries
+ * are robjs containing an sds string for the key.  The key is expected to begin with a capital
+ * letter [A-Z].  The caller passes 0 as the cursor to start the iteration.  The returned cursor
+ * value will indicate the prior letter returned (1=A, ...).  After entries starting with 'Z' have
+ * been returned, the cursor of 0 will indicate that the scan is complete.  Note that all entries
+ * starting with the same letter will be returned in a single call. */
+static size_t mockHashtableScan(hashtable *ht, size_t cursor, hashtableScanFunction fn, void *privdata) {
+    // Just in case, if it's not one of our hashtables, use the unmocked function
+    bool our_ht = (server.db[0]->keys && ht == kvstoreGetHashtable(server.db[0]->keys, 0))
+               || (server.db[1]->keys && ht == kvstoreGetHashtable(server.db[1]->keys, 0));
+    if (!our_ht) return __real_hashtableScan(ht, cursor, fn, privdata);
+
+    // Collect all entries from the hashtable
+    std::vector<dbEntry *> entries;
+    hashtableIterator *iter = hashtableCreateIterator(ht, 0);
+    dbEntry *entry;
+    while (hashtableNext(iter, (void **)&entry)) {
+        char first = objectGetKey(entry)[0];
+        assert(first >= 'A' && first <= 'Z');
+        entries.push_back(entry);
+    }
+    hashtableReleaseIterator(iter);
+
+    // Sort by key lexicographically
+    std::sort(entries.begin(), entries.end(), [](dbEntry *a, dbEntry *b) {
+        return strcmp(objectGetKey(a), objectGetKey(b)) < 0;
+    });
+
+    // cursor 0 means start at 'A', otherwise start after the cursor letter
+    char startLetter = (char)('A' + cursor);
+
+    // Find the first letter to emit
+    char emitLetter = 0;
+    for (dbEntry *e : entries) {
+        char first = objectGetKey(e)[0];
+        if (first >= startLetter) {
+            emitLetter = first;
+            break;
+        }
+    }
+
+    if (emitLetter == 0) return 0;
+
+    // Call fn for all entries starting with emitLetter
+    for (dbEntry *e : entries) {
+        char first = objectGetKey(e)[0];
+        if (first == emitLetter) fn(privdata, (void *)e);
+    }
+
+    size_t nextCursor = (size_t)(emitLetter - 'A' + 1);
+    return (nextCursor > 25) ? 0 : nextCursor;
+}
+
+
+static bool mockHashtableScanHasPassedKey(hashtable *ht, const void *key, size_t cursor) {
+    // Just in case, if it's not one of our hashtables, use the unmocked function
+    if (ht != kvstoreGetHashtable(server.db[0]->keys, 0)
+        && ht != kvstoreGetHashtable(server.db[1]->keys, 0))
+        return __real_hashtableScanHasPassedKey(ht, key, cursor);
+
+    return ((const char *)key)[0] < (char)('A' + cursor);
+}
+
+
+static const char *logfile = "";
+
+/* Most of the bgIteration unit tests are based on a CMD instance with 2 DBs.  There are 8 keys in
+ * each DB.  The hashtableScan function is mocked to return the keys in a predictable order.
+ *
+ * There are a number of helper functions to simulate certain key modification actions within our
+ * test configuration.  Note that this is isolated from the actual call to processCommand.
+ *
+ * Because most of bgIteration is based on an ordered processing of keys, it doesn't matter if we
+ * are simulating CMD or CME, full scan, or slot-based.  The majority of tests are independent of
+ * these concerns.
+ *
+ * However, there are some tests which are are unique to these configurations and use a specialized
+ * derived class to handle the differences.  We do not want to duplicate all of the tests for
+ * the different configurations, but we do want to ensure that each configuration works properly.
+ *   - bgIterationTestCluster - handles tests unique to full scan in cluster mode
+ *   - bgIterationTestClusterSlots - handles tests unique to cluster slot-based iteration
+ */
+class BgIterationTest : public ::testing::Test {
+    protected:
+        static const int DB_COUNT = 2;
+        static const int ITEMS_PER_DB = 8;
+
+    private:
+        /* With the mock hashtableScan, we get keys in a predictable order.  DB0 works with buckets
+         * containing groups of keys (which hashtableScan returns in a single call).  DB1 returns
+         * each key individually, as more separate buckets.  Convention (for test readability) is
+         * that keys beginning [A-M] would be in DB0 and keys beginning [N-Z] in DB1.  Letters are
+         * intentionally skipped to allow for possible insertions. */
+        const char *keys[DB_COUNT][ITEMS_PER_DB] = {{"B0", "B1", "B2", "E0", "E1", "H0", "H1", "H2"},
+                                                    {"N0", "O0", "Q0", "R0", "T0", "U0", "W0", "Y0"}};
+
+    protected:
+        static const int TOTAL_ITEMS = DB_COUNT * ITEMS_PER_DB;
+        static const int LAST_ITEM = TOTAL_ITEMS - 1;
+
+        MockValkey mock;
+        RealValkey real;
+        client *c = nullptr; // for general use in the tests (with common cleanup)
+        robj **orig_argv = nullptr; // Used when simulating multi
+        int orig_argc = 0; // Used when simulating multi
+
+
+        struct serverCommand dummy_cmd = {0};
+
+        // Helper functions for accessing the keys.  We can access by db(0..1) and seq(0..7)
+        //  or by item number (0..15).
+        // NOTE: These virtual functions can be overridden in subclasses which may have different item layout.
+        virtual const char * getKeyAtDbSeq(int db, int seq) {
+            assert(db < DB_COUNT);
+            assert(seq < ITEMS_PER_DB);
+            return keys[db][seq];
+        }
+
+        virtual int getDbFromItemNum(int itemNum) {
+            assert(itemNum < DB_COUNT * ITEMS_PER_DB);
+            return itemNum / ITEMS_PER_DB;
+        }
+
+        virtual int getSeqFromItemNum(int itemNum) {
+            assert(itemNum < DB_COUNT * ITEMS_PER_DB);
+            return itemNum % ITEMS_PER_DB;
+        }
+
+        const char * keyStr(int itemNum) {
+            return getKeyAtDbSeq(getDbFromItemNum(itemNum), getSeqFromItemNum(itemNum));
+        }
+
+        int itemNumFromKey(const char *key) {
+            for (int itemNum = 0;  itemNum < DB_COUNT * ITEMS_PER_DB;  itemNum++) {
+                if (strcmp(key, keyStr(itemNum)) == 0) return itemNum;
+            }
+            return -1;
+        }
+
+
+        // Do some general initialization before starting the suite.  Normally, the tests are run in
+        //  isolation - and this isn't much different than SetUp().  But if running the
+        //  entire test suite together (just manually running the test executable), this gets called
+        //  only once.
+        static void SetUpTestSuite() {
+            monotonicInit();
+
+            bzero(&server, sizeof(server));
+            server.hz = 100;
+            server.logfile = const_cast<char*>(logfile);
+            createSharedObjects();
+
+            moduleInitModulesSystem();
+
+            server.commands = hashtableCreate(&commandSetType);
+            server.orig_commands = hashtableCreate(&commandSetType);
+            populateCommandTable();
+        }
+
+
+        static void TearDownTestSuite() {
+            hashtableRelease(server.commands);
+            hashtableRelease(server.orig_commands);
+        }
+
+
+        void initializeServerDb(int dbid, int slot_count_bits = 0) {
+            server.db[dbid] = static_cast<serverDb *>(zcalloc(sizeof(serverDb)));
+            server.db[dbid]->id = dbid;
+            server.db[dbid]->keys = kvstoreCreate(&kvstoreKeysHashtableType, slot_count_bits, 0);
+            server.db[dbid]->expires = kvstoreCreate(&kvstoreExpiresHashtableType, slot_count_bits, 0);
+            server.db[dbid]->watched_keys = dictCreate(&keylistDictType);
+        }
+
+
+        robj *createStringObjectFromCString(const char *s) {
+            return createStringObject(s, strlen(s));
+        }
+
+
+        void addKeyToDb(int dbid, const char *key, const char *val) {
+            robj *key_obj = createStringObjectFromCString(key);
+            robj *val_obj = createStringObjectFromCString(val);
+            dbAdd(server.db[dbid], key_obj, &val_obj);
+            decrRefCount(key_obj);
+        }
+
+
+        virtual void setupDatabase() {
+            /* For these unit tests, a standard database is constructed.  But we will use our own
+             * mocked scan function to ensure a consistent iteration order */
+
+            server.dbnum = DB_COUNT;
+            server.cluster_enabled = false;
+            server.db = static_cast<serverDb **>(zcalloc(sizeof(serverDb *) * server.dbnum));
+
+            for (int dbid = 0;  dbid < server.dbnum;  dbid++) {
+                initializeServerDb(dbid);
+                for (int keynum = 0; keynum < ITEMS_PER_DB; keynum++) {
+                    addKeyToDb(dbid, keys[dbid][keynum], keys[dbid][keynum]);
+                }
+            }
+
+            EXPECT_CALL(mock, hashtableScan(_,_,_,_)).WillRepeatedly(Invoke(mockHashtableScan));
+            EXPECT_CALL(mock, hashtableScanHasPassedKey(_,_,_)).WillRepeatedly(Invoke(mockHashtableScanHasPassedKey));
+
+            if (0) debugPrintBucketInfo();
+        }
+
+
+        void SetUp() override {
+            server.main_thread_id = pthread_self();
+            server.forkless_options_supported = 1;
+            objectSetMetadataSize(BGITERATION_ENTRY_METADATA_SIZE);
+
+            bgIteration_unitTestDisableCloning();
+
+            setupDatabase();
+
+            EXPECT_CALL(mock, aeCreateTimeEvent(_,_,_,_,_)).WillRepeatedly(Return(0));
+            bgIteration_init();
+
+            cleanupCount = 0;
+            replDoneConfirmed = 0;
+            replDoneRejected = 0;
+
+            // By default, do nothing for these
+            EXPECT_CALL(mock, blockClientInUseOnKeys(_,_,_)).WillRepeatedly(Return());
+            EXPECT_CALL(mock, unblockClientsInUseOnKey(_)).WillRepeatedly(Return());
+
+            // By default, expect no permission issues
+            EXPECT_CALL(mock, ACLCheckAllUserCommandPerm(_,_,_,_,_,_)).WillRepeatedly(Return(ACL_OK));
+        }
+
+
+        void TearDown() override {
+            bgIteration_feedIterators();    // process returning stuff before deleting DB
+            bgIteration_feedIterators();    // in case an iterator was closed there might be more
+            for (int i = 0;  i < server.dbnum;  i++) {
+                if (server.db[i]->keys) kvstoreRelease(server.db[i]->keys);
+                if (server.db[i]->expires) kvstoreRelease(server.db[i]->expires);
+                dictRelease(server.db[i]->watched_keys);
+                zfree(server.db[i]);
+            }
+            zfree(server.db);
+
+            if (c != NULL) freeTestClient(c);
+        }
+
+
+        // Deletes an item from the DB (often at the start of a test) - but does NOT notify
+        //  bgIteration.  bgIteration_keyDelete() should be explicitly called where needed.
+        void simpleDelItem(int itemNum) {
+            int db = getDbFromItemNum(itemNum);
+
+            sds delKey = sdsnew(keyStr(itemNum));
+            int rc = kvstoreHashtableDelete(server.db[db]->keys, 0, delKey);
+            ASSERT_EQ(rc, 1);
+            sdsfree(delKey);
+        }
+
+
+        // Find the actual dbEntry object by itemNum
+        dbEntry * getItem(int itemNum) {
+            int db = getDbFromItemNum(itemNum);
+            sds key = sdsnew(keyStr(itemNum));
+            dbEntry *de = dbFind(server.db[db], key);
+            sdsfree(key);
+            return de;
+        }
+
+
+        // The test expects that the next item read will be BGITERATOR_ITEM_COMPLETE
+        void expectReadComplete(bgIterator *iter) {
+            bgIteration_feedIterators();
+            bgIteratorItem *item = bgIteratorRead(iter);
+            EXPECT_EQ(item->type, BGITERATOR_ITEM_COMPLETE);
+            bgIteratorClose(iter);
+
+            int oldCleanupCount = cleanupCount;
+            bgIteration_feedIterators();
+            EXPECT_EQ(cleanupCount, oldCleanupCount + 1);
+        }
+
+
+        // The test is cleaning up and isn't validating the remaining cleanup
+        void expectAnythingCleanup(bgIterator *iter) {
+            while (true) {
+                bgIteration_feedIterators();
+                bgIteratorItem *item = bgIteratorRead(iter);
+                if ((item->type == BGITERATOR_ITEM_COMPLETE
+                        || item->type == BGITERATOR_ITEM_TERMINATED)) {
+                    bgIteratorClose(iter);
+                    break;
+                }
+            }
+            bgIteration_feedIterators();    // Recognize the closed iterator
+            EXPECT_EQ(cleanupCount, 1);
+        }
+
+
+        void expectDictEntryMetadataMatch(dbEntry *de1, dbEntry *de2) {
+            bgIterationEntryMetadata *dm1 = static_cast<bgIterationEntryMetadata *>(objectGetMetadata(de1));
+            bgIterationEntryMetadata *dm2 = static_cast<bgIterationEntryMetadata *>(objectGetMetadata(de2));
+
+            EXPECT_NE(dm1, nullptr);
+            EXPECT_NE(dm2, nullptr);
+            EXPECT_EQ(*dm1, *dm2);
+        }
+
+
+        // Useful when debugging new tests.  It reads/prints all remaining items then crashes.
+        void cleanupIteratorDebugPrint(bgIterator *iter) {
+            bool done = false;
+            printf("[DEBUG] Printing bgIterator '%s' items:\n", bgIteratorName(iter));
+            while (!done) {
+                bgIteration_feedIterators();
+                bgIteratorItem *item = bgIteratorRead(iter);
+                switch (item->type) {
+                    case BGITERATOR_ITEM_DBENTRY:
+                        {
+                            auto obj = item->u.dbe.de;
+                            const char * keyStr = objectGetKey(obj);
+                            printf("Entry: %s -> %s [itemNum: %i]\n",
+                                   keyStr,
+                                   static_cast<char *>(objectGetVal(obj)),
+                                   itemNumFromKey(keyStr));
+                            break;
+                    }
+                    case BGITERATOR_ITEM_REPLICATION:
+                        printf("Repl: DB=%d : ", item->dbid);
+                        for (int i = 0;  i < item->u.repl.argc;  i++)
+                            printf("%s ", static_cast<char*>(objectGetVal(item->u.repl.argv[i])));
+                        printf("\n");
+                        break;
+                    case BGITERATOR_ITEM_COMPLETE:
+                    case BGITERATOR_ITEM_TERMINATED:
+                        bgIteratorClose(iter);
+                        done = true;
+                        break;
+                    default:
+                        printf("unhandled: %d\n", item->type);
+                }
+            }
+            bgIteration_feedIterators();    // Recognize the closed iterator
+            ASSERT_TRUE(false); // Halt the test here
+        }
+
+
+        // Make a copy of the metadata
+        void * cloneMetadata(dbEntry *de) {
+            int size = objectGetMetadataSize(de);
+            void *metadata = zmalloc(size);
+            memcpy(metadata, objectGetMetadata(de), size);
+            return metadata;
+        }
+
+
+        // Compare a previous metadata copy to an existing entry
+        void compareAndFreeClonedMetadata(dbEntry *de, void *metadata) {
+            EXPECT_EQ(memcmp(objectGetMetadata(de), metadata, objectGetMetadataSize(de)), 0);
+            zfree(metadata);
+        }
+
+
+        // The test expects the next item will be a specific key
+        //  The item value is verified against the default unless provided as a parameter.
+        void expectReadKey(bgIterator *iter, int itemNum, const char *value=nullptr) {
+            int db = getDbFromItemNum(itemNum);
+
+            bgIteration_feedIterators();
+            bgIteratorItem *item = bgIteratorRead(iter);
+            bgIteration_feedIterators();
+
+            ASSERT_EQ(item->type, BGITERATOR_ITEM_DBENTRY);
+            EXPECT_EQ(item->dbid, db);
+            EXPECT_FALSE(item->u.dbe.is_cloned);
+            EXPECT_STREQ(objectGetKey(item->u.dbe.de), keyStr(itemNum));
+            if (value) {
+                EXPECT_THAT(item->u.dbe.de, robjEqualsStr(value));
+            } else {
+                EXPECT_THAT(item->u.dbe.de, robjEqualsStr(keyStr(itemNum)));
+            }
+        }
+
+
+        // The test expects the next item will be a specific key amd that the item is cloned.
+        //  Metadata is tested (to make sure the clone includes the proper metadata).
+        //  The item value is verified against the default unless provided as a parameter.
+        void expectReadClonedKey(bgIterator *iter, int itemNum, void *metadata, const char *value=nullptr) {
+            int db = getDbFromItemNum(itemNum);
+
+            bgIteration_feedIterators();
+            bgIteratorItem *item = bgIteratorRead(iter);
+            bgIteration_feedIterators();
+
+            ASSERT_EQ(item->type, BGITERATOR_ITEM_DBENTRY);
+            EXPECT_EQ(item->dbid, db);
+            EXPECT_TRUE(item->u.dbe.is_cloned);
+            compareAndFreeClonedMetadata(item->u.dbe.de, metadata);
+            EXPECT_STREQ(objectGetKey(item->u.dbe.de), keyStr(itemNum));
+            if (value) {
+                EXPECT_THAT(item->u.dbe.de, robjEqualsStr(value));
+            } else {
+                EXPECT_THAT(item->u.dbe.de, robjEqualsStr(keyStr(itemNum)));
+            }
+        }
+
+
+        // Test expects the next key, but specified by key name, not itemNum.
+        void expectReadDbKeyValue(bgIterator *iter, int db, const char *key, const char *value) {
+            bgIteration_feedIterators();
+            bgIteratorItem *item = bgIteratorRead(iter);
+            bgIteration_feedIterators();
+
+            ASSERT_EQ(item->type, BGITERATOR_ITEM_DBENTRY);
+            EXPECT_EQ(item->dbid, db);
+            EXPECT_STREQ(objectGetKey(item->u.dbe.de), key);
+            EXPECT_THAT(item->u.dbe.de, robjEqualsStr(value));
+        }
+
+
+        // Test expect to read a sequence of key items
+        void expectReadKeySequence(bgIterator *iter, int startItem, int endItem) {
+            for (int i = startItem;  i <= endItem;  i++) expectReadKey(iter, i);
+        }
+
+
+        // Just like expectReadKey, but also tests that a previous item is becoming unblocked.
+        void expectReadKeyWithUnblock(bgIterator *iter, int itemNum, int unblockItem, const char *value=nullptr) {
+            bool blocked = true;
+            EXPECT_CALL(mock, unblockClientsInUseOnKey(robjEqualsStr(keyStr(unblockItem))))
+                    .WillOnce(Assign(&blocked, false));
+            expectReadKey(iter, itemNum, value);
+            EXPECT_FALSE(blocked);
+        }
+
+
+        // Test expects to read a replication item matching the command help by client 'c'
+        void expectReadReplication(bgIterator *iter, client *c) {
+            bgIteration_feedIterators();
+            bgIteratorItem *item = bgIteratorRead(iter);
+            bgIteration_feedIterators();
+
+            ASSERT_EQ(item->type, BGITERATOR_ITEM_REPLICATION);
+            EXPECT_EQ(item->dbid, c->db->id);
+            EXPECT_EQ(item->u.repl.cmd, c->cmd);
+            EXPECT_EQ(item->u.repl.argc, c->argc);
+            for (int i = 0;  i < c->argc;  i++) {
+                EXPECT_STREQ(static_cast<char*>(objectGetVal(item->u.repl.argv[i])),
+                             static_cast<char*>(objectGetVal(c->argv[i])));
+            }
+        }
+
+
+        // We expect to read a MULTI command which should have been inserted.
+        void expectReadMultiReplication(bgIterator *iter) {
+            bgIteration_feedIterators();
+            bgIteratorItem *item = bgIteratorRead(iter);
+            bgIteration_feedIterators();
+
+            ASSERT_EQ(item->type, BGITERATOR_ITEM_REPLICATION);
+            EXPECT_EQ(item->u.repl.cmd, lookupCommandByCString("multi"));
+        }
+
+
+        // We expect to read an EXEC command which should have been inserted.
+        void expectReadExecReplication(bgIterator *iter) {
+            bgIteration_feedIterators();
+            bgIteratorItem *item = bgIteratorRead(iter);
+            bgIteration_feedIterators();
+
+            ASSERT_EQ(item->type, BGITERATOR_ITEM_REPLICATION);
+            EXPECT_EQ(item->u.repl.cmd, lookupCommandByCString("exec"));
+        }
+
+
+        // Expecting that a DEL command should have been replicated.
+        void expectReadReplicationDel(bgIterator *iter, int itemNum) {
+            int db = getDbFromItemNum(itemNum);
+
+            bgIteration_feedIterators();
+            bgIteratorItem *item = bgIteratorRead(iter);
+            bgIteration_feedIterators();
+
+            ASSERT_EQ(item->type, BGITERATOR_ITEM_REPLICATION);
+            EXPECT_EQ(item->dbid, db);
+            EXPECT_EQ(item->u.repl.cmd, lookupCommandByCString("DEL"));
+            EXPECT_EQ(item->u.repl.argc, 2);
+            EXPECT_THAT(item->u.repl.argv[0], robjEqualsStr("DEL"));
+            EXPECT_THAT(item->u.repl.argv[1], robjEqualsStr(keyStr(itemNum)));
+        }
+
+
+        // Expecting that a special SWAPDB item has been inserted.
+        void expectReadSwapDB(bgIterator *iter, int db1, int db2) {
+            bgIteration_feedIterators();
+            bgIteratorItem *item = bgIteratorRead(iter);
+            bgIteration_feedIterators();
+
+            ASSERT_EQ(item->type, BGITERATOR_ITEM_SWAPDB);
+            EXPECT_EQ(item->dbid, db1);
+            EXPECT_EQ(item->u.dbid2, db2);
+        }
+
+
+        static void debugPrintBucketInfoCb(void *privdata, void *entry) {
+            UNUSED(privdata);
+            dbEntry *de = (dbEntry *)entry;
+            printf("--- %s\n", objectGetKey(de));
+        }
+
+        void debugPrintBucketInfo() {
+            printf("*******DEBUG*******\n");
+            for (int db = 0;  db < server.dbnum;  db++) {
+                int num_ht = kvstoreNumHashtables(server.db[db]->keys);
+                for (int slot = 0;  slot < num_ht;  slot++) {
+                    hashtable *ht = kvstoreGetHashtable(server.db[db]->keys, slot);
+                    if (!ht) continue;
+
+                    printf("DB: %d, slot: %d\n", db, slot);
+                    size_t cursor = 0;
+                    do {
+                        cursor = hashtableScan(ht, cursor, debugPrintBucketInfoCb, NULL);
+                        printf("-----------\n");
+                    } while (cursor != 0);
+                }
+            }
+            ASSERT_TRUE(false);
+        }
+
+
+        // Creates a client with a write command (SET) for the given itemNum
+        client *getWriteClient(int itemNum, const char *value) {
+            int db = getDbFromItemNum(itemNum);
+
+            client *c = static_cast<client*>(zcalloc(sizeof(client)));
+
+            c->cmd = lookupCommandByCString("set");
+            c->db = server.db[db];
+
+            c->argc = 3;
+            c->argv = static_cast<robj**>(zcalloc(sizeof(robj*) * c->argc));
+            c->argv[0] = createStringObjectFromCString(c->cmd->fullname);
+            c->argv[1] = createStringObjectFromCString(keyStr(itemNum));
+            c->argv[2] = createStringObjectFromCString(value);
+
+            return c;
+        }
+
+
+        // Create a client with a write command that touches multiple keys
+        client *getWriteMultiKeysClient(
+                const char * cmdName,
+                int dstItemNum,
+                const std::vector<int> & srcItemsNum) {
+
+            assert(!srcItemsNum.empty());
+
+            const int db = getDbFromItemNum(dstItemNum);
+            std::for_each(srcItemsNum.cbegin(), srcItemsNum.cend(), [&db, this](int srcItemNum) {
+                assert(db == getDbFromItemNum(srcItemNum));
+            });
+
+            client *c = static_cast<client*>(zcalloc(sizeof(client)));
+
+            c->cmd = lookupCommandByCString(cmdName);
+            assert(c->cmd != nullptr);
+            c->db = server.db[db];
+
+            c->argc = 2 + srcItemsNum.size();
+            c->argv = static_cast<robj**>(zcalloc(sizeof(robj*) * c->argc));
+            c->argv[0] = createStringObjectFromCString(c->cmd->fullname);
+            c->argv[1] = createStringObjectFromCString(keyStr(dstItemNum));
+            for (unsigned int i = 0;  i < srcItemsNum.size();  i++) {
+                c->argv[2 + i] = createStringObjectFromCString(keyStr(srcItemsNum[i]));
+            }
+
+            return c;
+        }
+
+
+        client *getWrite2KeysClient(const char * cmdName, int dstItemNum, int srcItemNum) {
+            return getWriteMultiKeysClient(cmdName, dstItemNum, {srcItemNum});
+        }
+
+
+        client *getWrite3KeysClient(
+                const char * cmdName, int dstItemNum, int src1ItemNum, int src2ItemNum) {
+            return getWriteMultiKeysClient(cmdName, dstItemNum, {src1ItemNum, src2ItemNum});
+        }
+
+
+        // Create a client with a MULTI/EXEC block.
+        //  This parses a series of commands separated by ';'
+        //  Example: getMultiClient("SET A0 xxx; SELECT 1; SET A1 xxx; SET B1 xxx")
+        client *getMultiClient(const char *commands, int dbid = 0) {
+            char *commandsCopy = zstrdup(commands);  // a mutable copy
+            char *commandStr, *commandStrSave;
+            char *token, *tokenSave;
+
+            client *c = static_cast<client*>(zcalloc(sizeof(client)));
+            c->db = server.db[dbid];
+            initClientMultiState(c);
+            c->flag.multi = 1;
+            c->mstate->cmd_flags |= CMD_WRITE;
+
+            commandStr = strtok_r(commandsCopy, ";", &commandStrSave);
+            while (commandStr != NULL) {
+
+                token = strtok_r(commandStr, " ", &tokenSave);
+                c->cmd = lookupCommandByCString(token);
+
+                c->argv = static_cast<robj**>(zcalloc(sizeof(robj*) * 5));   // command + 4 args
+
+                for (int i = 0;  token != NULL;  i++) {
+                    c->argv[i] = createStringObjectFromCString(token);
+                    c->argc = i+1;
+                    token = strtok_r(NULL, " ", &tokenSave);
+                }
+
+                queueMultiCommand(c, 0);
+                freeClientArgv(c);
+
+                commandStr = strtok_r(NULL, ";", &commandStrSave);
+            }
+
+            c->cmd = lookupCommandByCString("exec");
+            c->argc = 1;
+            c->argv = static_cast<robj**>(zcalloc(sizeof(robj*) * c->argc));
+            c->argv[0] = createStringObjectFromCString("EXEC");
+
+            zfree(commandsCopy);
+            return c;
+        }
+
+
+        // Initially, a MULTI client is set up to execute the EXEC command (which examines the
+        //  contents of the multi/exec block).  This function advances the client to begin executing
+        //  the individual commands within the multi/exec block.
+        void advanceMultiClientToCommand(client *c, int cmdNum) {
+            assert(cmdNum >= 0 && cmdNum < c->mstate->count);
+            if (cmdNum == 0) {
+                // Save off the EXEC
+                orig_argc = c->argc;
+                orig_argv = c->argv;
+            }
+            c->argc = c->mstate->commands[cmdNum].argc;
+            c->argv = c->mstate->commands[cmdNum].argv;
+            c->argv_len = c->mstate->commands[cmdNum].argv_len;
+            c->cmd = c->realcmd = c->mstate->commands[cmdNum].cmd;
+        }
+
+
+        // A client with a fictional command:
+        //  SETGET <write_key> <value> <read_key>
+        //  - writes a value to the first key (making this CMD_WRITE | CMD_WRITE_FIRSTKEY_ONLY)
+        //  - reads a second key
+        client *getSetGetClient(int itemNum1, const char *value1, int itemNum2) {
+            // Fictional command which writes to 1st key and reads the 2nd
+            int db = getDbFromItemNum(itemNum1);
+            assert(db == getDbFromItemNum(itemNum2));   // (this would be a testcase error)
+
+            client *c = static_cast<client*>(zcalloc(sizeof(client)));
+            struct serverCommand *cmd
+                    = static_cast<struct serverCommand*>(zcalloc(sizeof(struct serverCommand)));
+
+            cmd->fullname = const_cast<char*>("SETGET");
+            cmd->arity = 4;
+            cmd->flags = CMD_WRITE | CMD_WRITE_FIRSTKEY_ONLY;
+
+            cmd->legacy_range_key_spec.begin_search_type = KSPEC_BS_INDEX;
+            cmd->legacy_range_key_spec.bs.index.pos = 1;    // firstkey
+            cmd->legacy_range_key_spec.fk.range.lastkey = -1;
+            cmd->legacy_range_key_spec.fk.range.keystep = 2;
+
+            c->cmd = cmd;
+            c->db = server.db[db];
+
+            c->argc = 4;
+            c->argv = static_cast<robj**>(zcalloc(sizeof(robj*) * c->argc));
+            c->argv[0] = createStringObjectFromCString(cmd->fullname);
+            c->argv[1] = createStringObjectFromCString(keyStr(itemNum1));
+            c->argv[2] = createStringObjectFromCString(value1);
+            c->argv[3] = createStringObjectFromCString(keyStr(itemNum2));
+
+            return c;
+        }
+
+
+        // Client with a fictional write command with no keys specified
+        client *getNoKeysWriteClient() {
+            // Fictional command which is marked WRITE, but has no keys.
+            client *c = static_cast<client*>(zcalloc(sizeof(client)));
+            struct serverCommand *cmd
+                    = static_cast<struct serverCommand*>(zcalloc(sizeof(struct serverCommand)));
+
+            cmd->fullname = const_cast<char*>("NOKEYSWRITE");
+            cmd->arity = 1;
+            cmd->flags = CMD_WRITE;
+
+            cmd->legacy_range_key_spec.begin_search_type = KSPEC_BS_INVALID;    // No keys
+
+            c->cmd = cmd;
+            c->db = server.db[0];
+
+            c->argc = 1;
+            c->argv = static_cast<robj**>(zcalloc(sizeof(robj*) * c->argc));
+            c->argv[0] = createStringObjectFromCString(cmd->fullname);
+
+            return c;
+        }
+
+
+        void freeClientArgv(client *c) {
+            for (int i = 0; i < c->argc; i++) decrRefCount(c->argv[i]);
+            zfree(c->argv);
+            c->argv = NULL;
+            c->argc = 0;
+        }
+
+
+        // During testing, we create some fake commands.  This checks if the command is real or fake.
+        //  A fake command is dynamically allocated and can be freed.  Real commands are static.
+        bool isRealValkeyCommand(struct serverCommand *cmd) {
+            return lookupCommandByCString(cmd->declared_name);
+        }
+
+
+        void freeTestClient(client *c) {
+            // If the current command references one of the multi commands, set it back to the EXEC
+            if (c->mstate != NULL) {
+                for (int i = 0; i < c->mstate->count; i++) {
+                    if (c->argv == c->mstate->commands[i].argv) {
+                        c->argc = orig_argc;
+                        c->argv = orig_argv;
+                        orig_argc = 0;
+                        orig_argv = nullptr;
+                        break;
+                    }
+                }
+            }
+            freeClientMultiState(c);
+            freeClientArgv(c);
+
+            if (!isRealValkeyCommand(c->cmd)) zfree(c->cmd);
+
+            zfree(c);
+        }
+
+
+        // Simulate what happens when a write command is blocked
+        void simulateBlockedWrite(client *c, int expectedNumberBlockedKeys = 1) {
+            EXPECT_CALL(mock, blockClientInUseOnKeys(c,expectedNumberBlockedKeys,_)).Times(1);
+            bool blocked = bgIteration_blockClientIfRequired(c);
+            EXPECT_TRUE(blocked);
+        }
+
+
+        // Simulate what happens when a write command isn't blocked
+        void simulateUnblockedWrite(client *c) {
+            EXPECT_CALL(mock, blockClientInUseOnKeys(c,_,_)).Times(0);
+            bool blocked = bgIteration_blockClientIfRequired(c);
+            EXPECT_FALSE(blocked);
+        }
+
+
+        // Simulate what happens when a write command is NOT blocked, because the key can be cloned
+        //  and expedited.  This requires a scenario where we would normally need to block the
+        //  client so that bgIteration can process the item.
+        void simulateClonedWrite(bgIterator *it, client *c) {
+            bgIteratorStatus status;
+            bgIteratorGetStatus(it, &status);
+            unsigned long initialClones = status.dbentry_clones_queued;
+
+            // Client should not get blocked
+            EXPECT_CALL(mock, blockClientInUseOnKeys(c,_,_)).Times(0);
+            bool blocked = bgIteration_blockClientIfRequired(c);
+            EXPECT_FALSE(blocked);
+
+            // Ensure that cloning took place
+            bgIteratorGetStatus(it, &status);
+            EXPECT_EQ(status.dbentry_clones_queued, (initialClones + 1));
+
+            // Ensure that the real item isn't inuse (because we cloned it instead)
+            dbEntry *de = dbFind(c->db, static_cast<sds>(objectGetVal(c->argv[1])));
+            ASSERT_FALSE(bgIteration_isEntryInuse(de));
+        }
+
+
+        // Simulates what happens when a write command (SET) actually executes.  This requires a
+        //  scenario where we would NOT be blocked on the write.  It actually alters the value of
+        //  the key and updates the metadata.
+        void simulateUnblockedWriteWithModification(client *c) {
+            EXPECT_CALL(mock, blockClientInUseOnKeys(c,_,_)).Times(0);
+            bool blocked = bgIteration_blockClientIfRequired(c);
+            EXPECT_FALSE(blocked);
+
+            // Fake execution of the command - touch the iterator_epoch counter and swap the value
+            // We need to duplicate the value because setKey() can reallocate it.
+            robj *value = dupStringObject(c->argv[2]);
+            setKey(c, c->db, c->argv[1], &value, SETKEY_ADD_OR_UPDATE);
+
+            // Let's make sure that setKey updated the iteration epoch (as it should have)
+            dbEntry *de = dbFind(c->db, static_cast<sds>(objectGetVal(c->argv[1])));
+            bgIterationEntryMetadata *md = static_cast<bgIterationEntryMetadata*>(objectGetMetadata(de));
+            bgIterationEntryMetadata md_after_setkey = *md;
+            // Now update the md again, and it should still match
+            bgIteration_dbEntryModified(de);
+            EXPECT_EQ(md, objectGetMetadata(de)); // the md location shouldn't have changed
+            EXPECT_EQ(md_after_setkey, *md); // the md value should still be the same
+
+            bgIteration_handleCommandReplication(c->db->id, c->cmd, c->argc, c->argv);
+        }
+
+
+        // Simulate the expiration (active expiration) of a key.  This is independent of command execution.
+        void simulateExpiration(int itemNum) {
+            ASSERT_NE(getItem(itemNum), nullptr); // Should be there before expire
+
+            // NOTE: This seems weird, but Valkey propagates the delete before actually expiring the
+            //       key.  BgIterator expects this behavior and expects the key to exist when the
+            //       DEL is received for propagation.
+
+            // Send bgIteration the DEL
+            int db = getDbFromItemNum(itemNum);
+            robj *argv[2];
+            argv[0] = createStringObjectFromCString("DEL");
+            argv[1] = createStringObjectFromCString(keyStr(itemNum));
+            serverCommand *cmd = lookupCommandByCString("DEL");
+            bgIteration_handleCommandReplication(db, cmd, 2, argv);
+            bgIteration_keyDelete(db, static_cast<sds>(objectGetVal(argv[1])));
+            decrRefCount(argv[0]);
+            decrRefCount(argv[1]);
+
+            simpleDelItem(itemNum);     // Simulate the actual del
+
+            EXPECT_EQ(getItem(itemNum), nullptr);
+        }
+
+
+        // Simulates an expiration, but validates behavior for an item inuse by bgIteration.
+        void simulateExpirationOfInuse(int itemNum) {
+            // An inuse item will have a refcount > 1.  BgIteration should have incremented the
+            //  refcount while it is inuse.
+            dbEntry *de = getItem(itemNum);
+            ASSERT_NE(de, nullptr); // Should be there before expire
+            EXPECT_TRUE(bgIteration_isEntryInuse(de));
+            EXPECT_EQ(de->refcount, 2u);
+
+            simulateExpiration(itemNum);
+
+            // At this point, the item is removed from the DB, but still exists, and the refcount
+            //  has been reduced to 1.  This allows a background thread to continue using the item.
+            EXPECT_EQ(de->refcount, 1u);
+        }
+
+
+        // Simulates an expiration, but the item is a future item which will be expedited.
+        void simulateExpirationWithExpedite(int itemNum) {
+            // An inuse item will have a refcount > 1.  BgIteration should have incremented the
+            //  refcount while it is inuse.
+            dbEntry *de = getItem(itemNum);
+            ASSERT_NE(de, nullptr); // Should be there before expire
+            EXPECT_FALSE(bgIteration_isEntryInuse(de)); // Not yet inuse
+            EXPECT_EQ(de->refcount, 1u);
+
+            simulateExpiration(itemNum);
+
+            // At this point, the item is removed from the DB, but still exists, and the refcount
+            //  has been reduced to 1.  This allows a background thread to continue using the item.
+            EXPECT_TRUE(bgIteration_isEntryInuse(de)); // It's inuse now
+            EXPECT_EQ(getItem(itemNum), nullptr);      // but it's not in the DB anymore
+            EXPECT_EQ(de->refcount, 1u);
+        }
+
+
+        // Simulate execution of a SWAPDB command
+        void simulateSwapDB(int dbid0, int dbid1) {
+            char dbStr[2] = {0};
+
+            client *c = static_cast<client*>(zcalloc(sizeof(client)));
+
+            c->cmd = lookupCommandByCString("swapdb");
+            c->db = server.db[0];
+
+            c->argc = 3;
+            c->argv = static_cast<robj**>(zcalloc(sizeof(robj*) * c->argc));
+            c->argv[0] = createStringObjectFromCString(c->cmd->fullname);
+            dbStr[0] = '0' + dbid0;
+            c->argv[1] = createStringObjectFromCString(dbStr);
+            dbStr[0] = '0' + dbid1;
+            c->argv[2] = createStringObjectFromCString(dbStr);
+
+            bool blocked = bgIteration_blockClientIfRequired(c);
+            EXPECT_FALSE(blocked);  // SWAPDB should never block
+
+            // The real SWAP does more than this, but this is enough for unit tests
+            serverDb *aux = server.db[dbid0];
+            server.db[dbid0] = server.db[dbid1];
+            server.db[dbid1] = aux;
+
+            bgIteration_handleCommandReplication(0, c->cmd, c->argc, c->argv);
+
+            freeTestClient(c);
+        }
+
+
+        // Simulate execution of a FLUSHDB or FLUSHALL command
+        void simulateFlushDB(int db, int anInUseItem) {
+            client *c = static_cast<client*>(zcalloc(sizeof(client)));
+
+            if (db == -1) {
+                c->cmd = lookupCommandByCString("flushall");
+                c->db = server.db[0];
+            } else {
+                c->cmd = lookupCommandByCString("flushdb");
+                c->db = server.db[db];
+            }
+
+            c->argc = 1;
+            c->argv = static_cast<robj**>(zcalloc(sizeof(robj*) * c->argc));
+            c->argv[0] = createStringObjectFromCString(c->cmd->fullname);
+
+            dbEntry *de_in_use = getItem(anInUseItem);
+            EXPECT_EQ(de_in_use->refcount, 2u);
+
+            bool blocked = bgIteration_blockClientIfRequired(c);
+            EXPECT_FALSE(blocked);  // FLUSHDB should never block
+
+            // The real FLUSH does more than this, but this is enough for unit tests
+
+            // Now flush the items
+            for (int d = 0;  d < server.dbnum;  d++) {
+                if (db == -1 || db == d) {
+                    kvstoreRelease(server.db[d]->keys);
+                    server.db[d]->keys = NULL;
+                }
+            }
+
+            EXPECT_EQ(de_in_use->refcount, 1u);
+
+            // and replicate
+
+            bgIteration_handleCommandReplication(0, c->cmd, c->argc, c->argv);
+
+            freeTestClient(c);
+        }
+};
+
+
+TEST_F(BgIterationTest, dbIsOK) {
+    // Just run the setup/teardown code to make sure the DB is OK.
+}
+
+
+/////////////////////////////////////////////////////
+// Simple Full-scan iterator tests
+/////////////////////////////////////////////////////
+
+// A simple full scan that just checks basic flow.
+TEST_F(BgIterationTest, createAndCleanup) {
+    bgIterator *it = bgIteratorCreateFullScanIter("simple",
+            BGITERATOR_CONSISTENCY_NONE, NULL, iteratorCleanupFn, PRIVDATA);
+    EXPECT_EQ(bgIteratorFind("simple"), it);
+    EXPECT_STREQ(bgIteratorName(it), "simple");
+
+    bgIteratorStatus status;
+    bgIteratorGetStatus(it, &status);
+
+    EXPECT_EQ(status.dbentries_queued, 0u);
+    EXPECT_EQ(status.dbentries_processed, 0u);
+    EXPECT_EQ(status.replication_queued, 0u);
+    EXPECT_EQ(status.replication_processed, 0u);
+    EXPECT_EQ(status.swapdb_queued, 0u);
+    EXPECT_EQ(status.swapdb_processed, 0u);
+    EXPECT_EQ(status.flushdb_queued, 0u);
+    EXPECT_EQ(status.flushdb_processed, 0u);
+
+    EXPECT_EQ(status.queue_length, 0u);
+    EXPECT_GT(status.queue_length_target, 0u);
+
+    EXPECT_LT(status.runtime_ms, 5u);
+    EXPECT_EQ(status.current_item_ms, 0u);
+
+    expectAnythingCleanup(it);
+
+    EXPECT_EQ(bgIteratorFind("simple"), nullptr);
+}
+
+
+// Close client before reading anything
+TEST_F(BgIterationTest, testClientCloseBeforeRead) {
+    bgIterator *it = bgIteratorCreateFullScanIter("simple",
+            BGITERATOR_CONSISTENCY_NONE, NULL, iteratorCleanupFn, PRIVDATA);
+    bgIteration_feedIterators();
+
+    bgIteratorClose(it); // Immediately close before reading
+
+    bgIteration_feedIterators(); // Recognize the closed iterator
+
+    // Check that the cleanup callback was executed properly
+    EXPECT_EQ(cleanupCount, 1);
+    EXPECT_TRUE(cleanupTerminated);
+}
+
+
+// Test that the full scan hits each item in the expected sequence.
+TEST_F(BgIterationTest, orderedIteration) {
+    bgIterator *it = bgIteratorCreateFullScanIter("simple",
+            BGITERATOR_CONSISTENCY_NONE, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKeySequence(it, 0, LAST_ITEM);
+
+    // Quick status check.  At this point, the final item hasn't been returned yet.
+    bgIteratorStatus status;
+    bgIteratorGetStatus(it, &status);
+    EXPECT_EQ(status.dbentries_queued, static_cast<unsigned int>(TOTAL_ITEMS));
+    EXPECT_EQ(status.dbentries_processed, static_cast<unsigned int>(TOTAL_ITEMS) - 1);
+
+    expectReadComplete(it); // Returns the final item, and reads the completion item
+
+    // Check that the cleanup callback was executed properly
+    EXPECT_EQ(cleanupCount, 1);
+    EXPECT_FALSE(cleanupTerminated);
+}
+
+
+// Test that two simultaneous iterations work properly.
+TEST_F(BgIterationTest, twoOrderedIterations) {
+    bgIterator *it1 = bgIteratorCreateFullScanIter("simple1",
+            BGITERATOR_CONSISTENCY_NONE, NULL, iteratorCleanupFn, PRIVDATA);
+    bgIterator *it2 = bgIteratorCreateFullScanIter("simple2",
+            BGITERATOR_CONSISTENCY_NONE, NULL, iteratorCleanupFn, PRIVDATA);
+    EXPECT_EQ(bgIteratorFind("simple1"), it1);
+    EXPECT_EQ(bgIteratorFind("simple2"), it2);
+
+    int it1Count = 0;
+    int it2Count = 0;
+    while (it1Count < TOTAL_ITEMS || it2Count < TOTAL_ITEMS) {
+        // Randomly read from either iterator
+        if ((rand() % 2) == 0) {
+            if (it1Count < TOTAL_ITEMS) expectReadKey(it1, it1Count++);
+        } else {
+            if (it2Count < TOTAL_ITEMS) expectReadKey(it2, it2Count++);
+        }
+    }
+
+    // Nothing left but to read the final completions
+    expectReadComplete(it1);
+    EXPECT_EQ(cleanupCount, 1);
+    EXPECT_FALSE(cleanupTerminated);
+    expectReadComplete(it2);
+    EXPECT_EQ(cleanupCount, 2);
+    EXPECT_FALSE(cleanupTerminated);
+}
+
+
+/////////////////////////////////////////////////////
+// MODIFY A FUTURE ITEM
+// The next tests validate the basic pattern when a key, not yet iterated, is modified.
+// Each variation of iteration flags is tested.
+// Note that these tests execute without cloning (cloning is tested elsewhere).
+/////////////////////////////////////////////////////
+
+// Modify a future item, without replication or consistency.
+// Our expectation for this case is that the modification should proceed without blocking, the item
+//  shouldn't be expedited, and we will see the modified item once the iterator reaches it.
+TEST_F(BgIterationTest, modFutureItem) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_NONE, NULL, iteratorCleanupFn, PRIVDATA);
+
+    // Read the 1st key - let's get the party started
+    expectReadKey(it, 0);
+
+    // At this point, key 0 is read.  Keys 1 & 2 are queued (they are all in the same bucket).
+    // Fake a modification to a later key so that we can see if it gets processed out of order.
+    c = getWriteClient(6, "xxx");
+
+    // We DONT expect the client to be blocked - not consistent
+    simulateUnblockedWriteWithModification(c);
+
+    // Now continue reading, 1, 2, 3, 4, 5
+    expectReadKeySequence(it, 1, 5);
+
+    // Let's validate that key 6 shows the new value
+    expectReadKey(it, 6, "xxx");
+
+    // Continue...
+    expectReadKeySequence(it, 7, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+// Modify a future item, without replication but with consistency.  (Like a SAVE operation)
+// Our expectation for this case is that the modification SHOULD be blocked, as we have to save the
+//  the item in it's state before the modification.  To reduce blocking time, the item should be
+//  moved to the head of the queue - there's no replication in this case, so out-of-order processing
+//  isn't a concern.
+TEST_F(BgIterationTest, modFutureItem_start) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_START, NULL, iteratorCleanupFn, PRIVDATA);
+
+    // Read the 1st key - let's get the party started
+    expectReadKey(it, 0);
+
+    // At this point, key 0 is read.  Keys 1 & 2 are queued (they are all in the same bucket).
+    // Fake a modification to a later key so that we can see if it gets processed out of order.
+    c = getWriteClient(6, "xxx");
+    // Since this is consistent, we will block the client, disallowing the write.
+    simulateBlockedWrite(c);
+
+    // On a consistent iterator, the event is expedited in-front of items already in queue!
+    //  Read key 6 out of order.
+    expectReadKey(it, 6);
+
+    // Now, when we read key 1, key 6 is released back to Valkey, and the client will be unblocked.
+    expectReadKeyWithUnblock(it, 1, 6);
+    simulateUnblockedWriteWithModification(c); // Now the write can proceed
+
+    // Continue...
+    expectReadKeySequence(it, 2, 5);
+    // 6 has already been processed
+    expectReadKeySequence(it, 7, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+// Modify a future item, with replication but without consistency.  (Like a Threadsave Full Sync operation)
+// Our expectation for this case is that the modification should proceed without blocking, as the
+//  mode is inconsistent.  We don't expect replication, as we haven't reached the item yet.  We'll
+//  see the modified item later.
+TEST_F(BgIterationTest, modFutureItem_eventual) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_EVENTUAL, NULL, iteratorCleanupFn, PRIVDATA);
+
+    // Read the 1st key - let's get the party started
+    expectReadKey(it, 0);
+
+    // At this point, key 0 is read.  Keys 1 & 2 are queued (they are all in the same bucket).
+    // Fake a modification to a later key so that we can see if it gets processed out of order.
+    c = getWriteClient(6, "xxx");
+
+    // We DONT expect the client to be blocked - not consistent
+    simulateUnblockedWriteWithModification(c);
+
+    // NOTE:  Since we haven't reached this item yet, and consistency is not required, there's no
+    //        need to replicate this command.  So everything should wrap up just fine - we will see
+    //        the new value when we get to it.
+
+    // Now continue reading, 1, 2, 3, 4, 5
+    expectReadKeySequence(it, 1, 5);
+
+    // Let's validate that key 6 shows the new value
+    expectReadKey(it, 6, "xxx");
+
+    // Continue...
+    expectReadKeySequence(it, 7, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+/////////////////////////////////////////////////////
+// MODIFY A CURRENT ITEM
+// The next tests validate the basic pattern when a key, currently in use, is modified.
+// Each variation of iteration flags is tested.
+// Note that these tests execute without cloning (cloning is tested elsewhere).
+/////////////////////////////////////////////////////
+
+// Modify a current item, without replication or consistency.
+// Our expectation for this case is that the modification SHOULD be blocked, the item shouldn't
+//  be expedited (it's already in use).
+TEST_F(BgIterationTest, modCurrentItem) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_NONE, NULL, iteratorCleanupFn, PRIVDATA);
+
+    // Read the 1st key - let's get the party started
+    expectReadKey(it, 0);
+
+    // At this point, key 0 is read.  Keys 1 & 2 are queued (they are all in the same bucket).
+    c = getWriteClient(2, "xxx");
+
+    // Must be blocked since key is queued
+    simulateBlockedWrite(c);
+
+    // Now continue reading
+    expectReadKey(it, 1);
+    expectReadKey(it, 2);
+    expectReadKeyWithUnblock(it, 3, 2);
+    simulateUnblockedWriteWithModification(c);     // the actual write won't affect anything (past key, no replication)
+
+    // Continue...
+    expectReadKeySequence(it, 4, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+// Modify a current item, without replication but with consistency.  (Like a SAVE operation)
+// Our expectation for this case is that the modification SHOULD be blocked, the item shouldn't
+//  be expedited (it's already in use).
+TEST_F(BgIterationTest, modCurrentItem_start) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_START, NULL, iteratorCleanupFn, PRIVDATA);
+
+    // Read the 1st key - let's get the party started
+    expectReadKey(it, 0);
+
+    // At this point, key 0 is read.  Keys 1 & 2 are queued (they are all in the same bucket).
+    c = getWriteClient(2, "xxx");
+
+    // Must be blocked since key is queued
+    simulateBlockedWrite(c);
+
+    // Now continue reading
+    expectReadKey(it, 1);
+    expectReadKey(it, 2);
+    expectReadKeyWithUnblock(it, 3, 2);
+    simulateUnblockedWriteWithModification(c);     // the actual write won't affect anything (past key, no replication)
+
+    // Continue...
+    expectReadKeySequence(it, 4, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+// Modify a current item, with replication but without consistency.  (Like a Threadsave Full Sync operation)
+// Our expectation for this case is that the modification SHOULD be blocked.  After the key is processed,
+//  the write will proceed, and the replication will be sent.
+TEST_F(BgIterationTest, modCurrentItem_eventual) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_EVENTUAL, NULL, iteratorCleanupFn, PRIVDATA);
+
+    // Read the 1st key - let's get the party started
+    expectReadKey(it, 0);
+
+    // At this point, key 0 is read.  Keys 1 & 2 are queued (they are all in the same bucket).
+    c = getWriteClient(2, "xxx");
+
+    // Must be blocked since key is queued
+    simulateBlockedWrite(c);
+
+    // Now continue reading
+    expectReadKey(it, 1);
+    expectReadKey(it, 2);
+    expectReadKeyWithUnblock(it, 3, 2);
+    simulateUnblockedWriteWithModification(c);     // the actual write will cause replication
+
+    expectReadKey(it, 4);  // 4 got put in queue when 3 was read
+
+    expectReadReplication(it, c);
+
+    // Continue...
+    expectReadKeySequence(it, 5, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+/////////////////////////////////////////////////////
+// MODIFY A PAST ITEM
+// The next tests validate the basic pattern when a key, not yet iterated on, is modified.
+// Each variation of iteration flags is tested.
+// Note that these tests execute without cloning (cloning is tested elsewhere).
+/////////////////////////////////////////////////////
+
+// Modify a past item, without replication or consistency.
+// Our expectation for this case is that the modification should proceed without blocking.
+//  No replication is generated and keys are processed similar to no modification.
+TEST_F(BgIterationTest, modPastItem) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_NONE, NULL, iteratorCleanupFn, PRIVDATA);
+
+    // Read the 1st key - let's get the party started
+    expectReadKey(it, 0);
+
+    // This read returns key 0 (making it a past item)
+    expectReadKey(it, 1);
+
+    // At this point, key 0 is returned.
+    c = getWriteClient(0, "xxx");
+    simulateUnblockedWriteWithModification(c);
+
+    // Continue...
+    expectReadKeySequence(it, 2, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+// Modify a past item, without replication but with consistency.  (Like a SAVE operation)
+// Our expectation for this case is that the modification should proceed without blocking.
+//  No replication is generated and keys are processed similar to no modification.
+TEST_F(BgIterationTest, modPastItem_start) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_START, NULL, iteratorCleanupFn, PRIVDATA);
+
+    // Read the 1st key - let's get the party started
+    expectReadKey(it, 0);
+
+    // This read returns key 0 (making it a past item)
+    expectReadKey(it, 1);
+
+    // At this point, key 0 is returned.
+    c = getWriteClient(0, "xxx");
+    simulateUnblockedWriteWithModification(c);
+
+    // Continue...
+    expectReadKeySequence(it, 2, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+// Modify a past item, with replication but without consistency.  (Like a Threadsave Full Sync operation)
+// Our expectation for this case is that the modification should proceed without blocking.
+//  Replication will be sent.
+TEST_F(BgIterationTest, modPastItem_eventual) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_EVENTUAL, NULL, iteratorCleanupFn, PRIVDATA);
+
+    // Read the 1st key - let's get the party started
+    expectReadKey(it, 0);
+
+    // This read returns key 0 (making it a past item)
+    expectReadKey(it, 1);
+
+    // At this point, key 0 is returned.
+    c = getWriteClient(0, "xxx");
+    simulateUnblockedWriteWithModification(c);
+
+    // Key 2 was already in queue (same bucket as key 1).  The replication will follow.
+    expectReadKey(it, 2);
+    expectReadReplication(it, c);
+
+    // Continue...
+    expectReadKeySequence(it, 3, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+/////////////////////////////////////////////////////
+// TESTS FOR ITEM CLONING
+/////////////////////////////////////////////////////
+
+// In a consistent iteration, verify that a simple string is properly cloned, and that a write can
+//  occur without blocking.  Validate the cloned item and metadata.
+TEST_F(BgIterationTest, modFutureItem_start_CloneExpeditedItem) {
+    // Initialize cloning configurations.
+    bgIteration_unitTestEnableCloning(50, 100);
+
+    bgIteratorStatus status;
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_START, NULL, iteratorCleanupFn, PRIVDATA);
+
+    // Read the 1st key - let's get the party started
+    expectReadKey(it, 0);
+
+    // At this point, key 0 is read.  Keys 1 & 2 are queued (they are all in the same bucket).
+    // Fake a modification to a later key so that we can see if it gets processed out of order.
+    c = getWriteClient(6, "xxx");
+
+    // Quick status check.  At this point, no clones exist yet.
+    bgIteratorGetStatus(it, &status);
+    EXPECT_EQ(status.dbentry_clones_queued, 0u);
+
+    // Since item 6 should be cloned, it will not block the client, allowing the write.
+    void *de6_md = cloneMetadata(getItem(6));
+    simulateClonedWrite(it, c); // This wouldn't block, and queues the cloned value
+    simulateUnblockedWriteWithModification(c); // This modifies the real entry in the de (touching metadata)
+
+    // At this point, one clone is in the queue.
+    bgIteratorGetStatus(it, &status);
+    EXPECT_EQ(status.dbentry_clones_queued, 1u);
+
+    // On a consistent iterator, the event is expedited in-front of items already in queue!
+    //  Read key 6 (which is cloned) out of order.  The value will still match the key.
+    expectReadClonedKey(it, 6, de6_md); // Also validates and frees the metadata
+
+    // Quick status check.  At this point, cloned items have not been marked as processed yet.
+    bgIteratorGetStatus(it, &status);
+    EXPECT_EQ(status.dbentry_clones_processed, 0u);
+
+    // Reading key 1 will release key 6, and the clone will finish processing.
+    expectReadKey(it, 1);
+    bgIteratorGetStatus(it, &status);
+    EXPECT_EQ(status.dbentry_clones_processed, 1u);
+
+    // Now, when we read key 2 should not have an impact on number of processed clones.
+    expectReadKey(it, 2);
+    bgIteratorGetStatus(it, &status);
+    EXPECT_EQ(status.dbentry_clones_processed, 1u);
+
+    // Continue...
+    expectReadKeySequence(it, 3, 5);
+    // 6 has already been processed
+    expectReadKeySequence(it, 7, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+// Check that cloning for simple strings is respecting the size limits and pool size.  On a
+//  consistent iteration, we expect to block or clone on all future keys.  We validate that we can
+//  clone if the item is small enough and the cloning pool has more space left.
+TEST_F(BgIterationTest, modFutureItem_start_LargeItemOrClonePoolFull) {
+    // Initialize cloning configurations to test the clone pool functionality first.
+    bgIteration_unitTestEnableCloning(50, 50);
+
+    bgIteratorStatus status;
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_START, NULL, iteratorCleanupFn, PRIVDATA);
+
+    // Read the 1st key - let's get the party started
+    expectReadKey(it, 0);
+
+    // At this point, key 0 is read.  Keys 1 & 2 are queued (they are all in the same bucket).
+    // Fake a modification to a later key so that we can see if it gets processed out of order.
+    client *c6 = getWriteClient(6, "xxx");
+    client *c7 = getWriteClient(7, "xxx");
+    client *c8 = getWriteClient(8, "xxx");
+
+    // Quick status check.  At this point, no clones exist yet.
+    bgIteratorGetStatus(it, &status);
+    EXPECT_EQ(status.dbentry_clones_queued, 0u);
+
+    // Since item 6 should be cloned, it will not block the client, allowing the write.
+    void *de6_md = cloneMetadata(getItem(6));
+    simulateClonedWrite(it, c6);
+    simulateUnblockedWriteWithModification(c6);
+
+    // At this point, one clone is in the queue.
+    bgIteratorGetStatus(it, &status);
+    EXPECT_EQ(status.dbentry_clones_queued, 1u);
+
+    // Now that cloning pool is full, item 7 will not be cloned and the client will be blocked.
+    simulateBlockedWrite(c7);
+    ASSERT_TRUE(bgIteration_isEntryInuse(getItem(7)));
+
+    // There is still only one cloned item in the queue.
+    bgIteratorGetStatus(it, &status);
+    EXPECT_EQ(status.dbentry_clones_queued, 1u);
+
+    // Now change cloning configurations to test that large items will not be cloned. We adjust
+    //  the clone pool size to allow two items, but set the maximum item size to be smaller than
+    //  the size of item 8. The clone pool size must be larger than the total size of the existing
+    //  clones plus the maximum item clone size.
+    bgIteration_unitTestEnableCloning(1, 101);
+
+    // This write will pass the clone pool check but fail the item size check, blocking the client.
+    simulateBlockedWrite(c8);
+    ASSERT_TRUE(bgIteration_isEntryInuse(getItem(8)));
+
+    // On a consistent iterator, the expedited item in-front of items already in queue!
+    //  Read key 6 out of order.
+    expectReadClonedKey(it, 6, de6_md);
+
+    // Now, when we expect to read key 7, which was expedited, key 6 will be released back to Valkey
+    //  and the clone will be deallocated here.
+    expectReadKey(it, 7);
+
+    // Now, when we read key 8, which was expedited, key 7 is released back to Valkey, and the client
+    // will be unblocked.
+    // (actually, unblock is called after every key [just in case] - but functionally we only care
+    //  about this one)
+    expectReadKeyWithUnblock(it, 8, 7);
+    simulateUnblockedWriteWithModification(c7);
+
+    // Now, when we read key 1, key 8 is released back to Valkey, and the client will be unblocked.
+    expectReadKeyWithUnblock(it, 1, 8);
+    simulateUnblockedWriteWithModification(c8);
+
+    // Since only one item was cloned, there should be one clone processed
+    bgIteratorGetStatus(it, &status);
+    EXPECT_EQ(status.dbentry_clones_processed, 1u);
+
+    // Continue...
+    expectReadKeySequence(it, 2, 5);
+    // 6, 7, and 8 have already been processed
+    expectReadKeySequence(it, 9, LAST_ITEM);
+    expectReadComplete(it);
+    freeTestClient(c6);
+    freeTestClient(c7);
+    freeTestClient(c8);
+}
+
+
+/////////////////////////////////////////////////////
+// TESTS RELATED TO MODIFICATION OF TWO ITEMS
+// When 2 keys are modified, we need to ensure that both keys have been sent before we can send
+//  replication.  This means that if replication is present, we may have to block/expedite for
+//  future keys, even in the inconsistent scenario.
+/////////////////////////////////////////////////////
+
+// Replication enabled, but NOT consistent.  In this case, if ANY of the keys have been iterated,
+//  ALL of the keys must be replicated so that the command can be processed properly on the replica.
+TEST_F(BgIterationTest, modPastFutureItem_eventual) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_EVENTUAL, NULL, iteratorCleanupFn, PRIVDATA);
+
+    // In this test, we need a past and future key IN THE SAME DB (they're used in the same command).
+    //  DB1 has lots of buckets.  After reading item 9,
+    //    8 will be past, 10 will be in queue, 11-15 will be future.
+    expectReadKeySequence(it, 0, 9);
+
+    // We're going to write to key 8 (past) and read from key 12 (future)
+    // Even though key 12 is for READ in this command, it must be expedited so that it exists before
+    //  the associated replication is sent.
+    c = getSetGetClient(8, "xxx", 12);
+    simulateBlockedWrite(c);
+
+    // Key 12 will be expedited, but not in front of existing items in queue (can only do that for
+    //  consistent iterators)
+
+    expectReadKey(it, 10);
+    expectReadKey(it, 12); // expedited
+    expectReadKeyWithUnblock(it, 11, 12); // 13 is now in queue
+
+    simulateUnblockedWriteWithModification(c);
+
+    // Continue...
+    expectReadKey(it, 13);
+    expectReadReplication(it, c);
+
+    expectReadKeySequence(it, 14, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+// Replication NOT enabled.  A read-only key doesn't need to be expedited, even if other keys have
+//  been processed already.  (This should work identically for both consistent/non-consistent.
+TEST_F(BgIterationTest, modPastFutureItem_start) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter1",
+            BGITERATOR_CONSISTENCY_START, NULL, iteratorCleanupFn, PRIVDATA);
+
+    // In this test, we need a past and future key IN THE SAME DB (they're used in the same command).
+    //  DB1 has lots of buckets.  After reading item 9,
+    //    8 will be past, 10 will be in queue, 11-15 will be future.
+    expectReadKeySequence(it, 0, 9);
+
+    // We're going to write to key 8 (past) and read from key 12 (future)
+    // Since there's no replication, we don't have to worry about expediting 12.  The write will
+    //  proceed without blocking.
+    c = getSetGetClient(8, "xxx", 12);
+    simulateUnblockedWriteWithModification(c);
+
+    // Key 12 will not be expedited.  Remaining keys should be received in normal order.
+    expectReadKeySequence(it, 10, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+TEST_F(BgIterationTest, modPastFutureItem) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter2",
+            BGITERATOR_CONSISTENCY_NONE, NULL, iteratorCleanupFn, PRIVDATA);
+
+    // In this test, we need a past and future key IN THE SAME DB (they're used in the same command).
+    //  DB1 has lots of buckets.  After reading item 9,
+    //    8 will be past, 10 will be in queue, 11-15 will be future.
+    expectReadKeySequence(it, 0, 9);
+
+    // We're going to write to key 8 (past) and read from key 12 (future)
+    // Since there's no replication, we don't have to worry about expediting 12.  The write will
+    //  proceed without blocking.
+    c = getSetGetClient(8, "xxx", 12);
+    simulateUnblockedWriteWithModification(c);
+
+    // Key 9 will not be expedited.  Remaining keys should be received in normal order.
+    expectReadKeySequence(it, 10, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+/////////////////////////////////////////////////////
+// TESTS RELATED TO MISSING ITEMS
+// Missing items are tricky.  A missing item might be logically located in the past or future, in
+//  relation to the current iteration position.  The command may (or may not) create the "missing"
+//  key.  Some general considerations:
+//    * In a consistent iteration, a missing key didn't exist at the time of consistency, or it was
+//      already processed (saved) at the time of the deletion.  If the missing key gets created, we
+//      must be sure to skip it if we later iterate over it.
+//    * In a non-consistent iteration with replication:
+//        * If the key location is already passed, the replication is sent, allowing the key to be
+//          created (or not) based on the replication.
+//        * If the key location is in the future, we can allow the command to proceed, without
+//          replication.  If the key is created, we will process it when the iterator gets to it.
+//
+// We expect:
+//  no-repl, no-consist:  past items are ignored - future items are processed when iterated
+//  no-repl, yes-consist:  past items are ignored - future items are ignored
+//  yes-repl, no-consist:  past item skipped, but replicated - future items are created by replication and skipped later
+//  yes-repl, yes-consist:  past item skipped, but replicated - future items are processed when iterated
+/////////////////////////////////////////////////////
+
+// no-repl, no-consist: creation of PAST item has no impact
+TEST_F(BgIterationTest, missingPastItem) {
+    simpleDelItem(0); // Delete the item before iterator creation
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_NONE, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKey(it, 1);
+    expectReadKey(it, 2);
+
+    c = getWriteClient(0, "xxx");
+    simulateUnblockedWriteWithModification(c);
+
+    expectReadKeySequence(it, 3, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+// no-repl, yes-consist: creation of PAST item has no impact
+TEST_F(BgIterationTest, missingPastItem_start) {
+    simpleDelItem(0); // Delete the item before iterator creation
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_START, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKey(it, 1);
+    expectReadKey(it, 2);
+
+    c = getWriteClient(0, "xxx");
+    simulateUnblockedWriteWithModification(c);
+
+    expectReadKeySequence(it, 3, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+// yes-repl, no-consist: creation of a PAST item will be replicated
+TEST_F(BgIterationTest, missingPastItem_eventual) {
+    simpleDelItem(0); // Delete the item before iterator creation
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_EVENTUAL, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKey(it, 1);
+    expectReadKey(it, 2);
+    expectReadKey(it, 3);
+
+    c = getWriteClient(0, "xxx");
+    simulateUnblockedWriteWithModification(c); // replication will be added after item 4 (3,4 in same bucket)
+
+    expectReadKey(it, 4);
+
+    expectReadReplication(it, c);
+
+    expectReadKeySequence(it, 5, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+// no-repl, no-consist: creation of FUTURE item is seen when reached by the iteration.
+TEST_F(BgIterationTest, missingFutureItem) {
+    // Using DB1 so we have lots of buckets
+    simpleDelItem(14); // Delete the item before iterator creation
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_NONE, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKey(it, 0);
+
+    const char * newValue = "xxx";
+    c = getWriteClient(14, newValue);
+    simulateUnblockedWriteWithModification(c);
+
+    expectReadKeySequence(it, 1, 13);
+
+    // We expect to see item 14.
+    //  Note that for an inconsistent DB view, it is logically undefined if this value is seen (or not).
+    //  But as implemented, we should see it and the test is helpful to understand if/when the
+    //  functionality changes.
+    expectReadKey(it, 14, newValue);
+
+    expectReadKey(it, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+// no-repl, yes-consist: creation of FUTURE item is ignored by consistent iteration.
+TEST_F(BgIterationTest, missingFutureItem_start) {
+    // Using DB1 so we have lots of buckets
+    simpleDelItem(14); // Delete the item before iterator creation
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_START, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKey(it, 0);
+
+    c = getWriteClient(14, "xxx");
+    simulateUnblockedWriteWithModification(c);
+
+    expectReadKeySequence(it, 1, 13);
+    // Key 14 is missing - it didn't exist at start of consistent iteration
+    expectReadKey(it, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+// yes-repl, no-consist: creation of FUTURE item is handled by the replication, and then the key is
+//  later skipped (treated like an early iteration case).
+TEST_F(BgIterationTest, missingFutureItem_eventual) {
+    // Using DB1 so we have lots of buckets
+    simpleDelItem(14); // Delete the item before iterator creation
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_EVENTUAL, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKey(it, 0); // Items 1 & 2 are in queue (same bucket)
+
+    c = getWriteClient(14, "xxx");
+    simulateUnblockedWriteWithModification(c);
+
+    expectReadKeySequence(it, 1, 2);
+
+    expectReadReplication(it, c); // Here's the replication creating item 14
+
+    expectReadKeySequence(it, 3, 13);
+    // We expect item 14 to be skipped, because it was created by the earlier replication
+    expectReadKey(it, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+/////////////////////////////////////////////////////
+// TESTS RELATED TO EXPIRATION
+// Expiration can be tricky.  When pre-evaluating a command with bgIteration_blockClientIfRequired,
+//  a key might exist, but be ready for expiration.  Then, as the command executes, the key expires
+//  and gets deleted before the write operation.  Consider SET K V.
+//  In the unexpired case, this appears to bgIteration as a single SET command (which replaces the value).
+//  In the expired case, bgIteration will receive a DEL followed by a SET.
+//
+// Another case is a READ command.  A read command won't cause the client to be blocked.  However,
+//  if the key is expired, this will cause a DEL.  For consistent processing, this key might need to
+//  be expedited so that it can be processed before it gets deleted.  In this case, the key is
+//  unlinked from the main Valkey dictionary, but the actual deletion is deferred.
+/////////////////////////////////////////////////////
+
+TEST_F(BgIterationTest, expireKeys) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_NONE, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKey(it, 0);
+    expectReadKey(it, 1);
+
+    // At this point, key 1 is active, key 2 is in queue.
+
+    simulateExpiration(0);        // Past - we no longer care
+    simulateExpirationOfInuse(2); // Current - it's inuse
+    simulateExpiration(5);        // Future - we don't care (non-consistent)
+
+    expectReadKeySequence(it, 2, 4);
+    // key 5 has been deleted
+    expectReadKeySequence(it, 6, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+TEST_F(BgIterationTest, expireKeys_eventual) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_EVENTUAL, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKey(it, 0);
+    expectReadKey(it, 1);
+
+    // At this point, key 1 is active, key 2 is in queue.
+
+    simulateExpiration(0);        // Past - we expect replication
+    simulateExpirationOfInuse(2); // Current - it's inuse, but we expect replication
+    simulateExpiration(5);        // Future - we don't care (non-consistent)
+
+    expectReadKey(it, 2); // this was already queued
+    expectReadReplicationDel(it, 0); // Past item should replicate
+    expectReadReplicationDel(it, 2); // Current item should replicate
+    // Item 5 is a future item and doesn't need to replicate
+
+    expectReadKeySequence(it, 3, 4);
+    // Item 5 has been deleted
+    expectReadKeySequence(it, 6, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+TEST_F(BgIterationTest, expireKeys_start) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_START, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKey(it, 0);
+    expectReadKey(it, 1);
+
+    // At this point, key 1 is active, key 2 is in queue.
+
+    simulateExpiration(0);             // Past - we no longer care
+    simulateExpirationOfInuse(2);      // Current - we must defer
+    simulateExpirationWithExpedite(5); // Future - will become inuse and expedited for consistency
+
+    expectReadKey(it, 5);  // Expedited to front
+
+    expectReadKeySequence(it, 2, 4);
+    // Item 5 has been deleted
+    expectReadKeySequence(it, 6, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+// Special case during a non-consistent iteration with replication and expiration.
+//  1. A future key is created (and processed by its replication) - considered early iterated
+//  2. Later the key is expired and deleted during command processing (causes DEL to be sent) - no longer early iterated
+//  3. The key is recreated as part of the command processing (and this command was replicated) - again early iterated
+//  4. Finally, when we iterate to the key, it shouldn't be sent, because it was replicated in step 3.
+TEST_F(BgIterationTest, expireKeys_eventual_FutureKeyCreatedThenExpiredDuringSet) {
+    simpleDelItem(8);   // Start with a missing future item
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_EVENTUAL, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKey(it, 0);  // Get the iterator started
+
+    c = getWriteClient(8, "xxx");
+    simulateUnblockedWriteWithModification(c); // Not blocked because this is a future key (but we expect repl)
+
+    // Now do it again, but break out the steps so that we can simulate an expiration
+    bool blocked = bgIteration_blockClientIfRequired(c);
+    EXPECT_FALSE(blocked); // Shouldn't be blocked because this is a future key
+
+    // Now, as the SET command tries to execute, simulate that the key is expired.  Expiration
+    //  processing sends the replication FIRST!
+    robj *argv[2];
+    argv[0] = createStringObjectFromCString("DEL");
+    argv[1] = c->argv[1];
+    serverCommand *cmd = lookupCommandByCString("DEL");
+    bgIteration_handleCommandReplication(getDbFromItemNum(8), cmd, 2, argv);
+    decrRefCount(argv[0]);
+
+    // Now the call to keyDelete happens (after the replication).
+    bgIteration_keyDelete(getDbFromItemNum(8), static_cast<sds>(objectGetVal(c->argv[1])));
+    simpleDelItem(8);     // Simulate the actual del
+
+    // Now the SET will run, re-creating the item (which is still a future item)
+    // We need to duplicate the value because setKey() can reallocate it.
+    robj *value = dupStringObject(c->argv[2]);
+    setKey(c, c->db, c->argv[1], &(value), SETKEY_ADD_OR_UPDATE);
+
+    // Finally, replication will be sent because this is creating a new key
+    bgIteration_handleCommandReplication(getDbFromItemNum(8), c->cmd, c->argc, c->argv);
+
+    // Test that everything comes as expected
+    expectReadKeySequence(it, 1, 2);    // All one bucket - queued after key 0 read
+
+    expectReadReplication(it, c);       // Repl from the first SET command
+    expectReadReplicationDel(it, 8);    // This is the expected replication of the DEL from expire
+    expectReadReplication(it, c);       // Repl from the second SET command (recreating deleted key)
+
+    expectReadKeySequence(it, 3, 7);    // continue with normal iteration
+    // KEY 8 SHOULD BE OMITTED - This was already replicated
+    expectReadKeySequence(it, 9, LAST_ITEM);
+
+    expectReadComplete(it);
+}
+
+
+/////////////////////////////////////////////////////
+// THE REMAINING TESTS ARE GENERAL / UNCATEGORIZED
+/////////////////////////////////////////////////////
+
+// Iteration can be terminated from the main thread or from the child client.
+//  This tests termination driven from the main thread.
+TEST_F(BgIterationTest, earlyTerminationFromMain) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_NONE, NULL, iteratorCleanupFn, PRIVDATA);
+    expectReadKey(it, 0);
+
+    // At this point, keys 1 & 2 are in queue.  A termination should release those keys.
+    bool blocked1 = true;
+    bool blocked2 = true;
+    // We expect no general unblocks, we account for each specific unblock below.
+    EXPECT_CALL(mock, unblockClientsInUseOnKey(_)).Times(0);
+    // We should expect to see unblock called for items 1 & 2, as they are released from the queue.
+    EXPECT_CALL(mock, unblockClientsInUseOnKey(robjEqualsStr(keyStr(1))))
+            .WillOnce(Assign(&blocked1, false));
+    EXPECT_CALL(mock, unblockClientsInUseOnKey(robjEqualsStr(keyStr(2))))
+            .WillOnce(Assign(&blocked2, false));
+    bgIteratorTerminate(it);        // queues the items for release
+    EXPECT_TRUE(bgIteratorIsTerminating(it));
+    bgIteration_feedIterators();    // actually performs the release
+    EXPECT_FALSE(blocked1);
+    EXPECT_FALSE(blocked2);
+
+    bool blocked0 = true;
+    EXPECT_CALL(mock, unblockClientsInUseOnKey(robjEqualsStr(keyStr(0))))
+            .WillOnce(Assign(&blocked0, false));
+    bgIteratorItem *item = bgIteratorRead(it);
+    EXPECT_FALSE(blocked0);
+    EXPECT_EQ(item->type, BGITERATOR_ITEM_TERMINATED);
+
+    bgIteratorClose(it);            // background thread completes the termination
+
+    EXPECT_EQ(cleanupCount, 0);
+    bgIteration_feedIterators();    // main thread, cleans up iterator and calls cleanup function
+    EXPECT_EQ(cleanupCount, 1);
+    EXPECT_TRUE(cleanupTerminated);
+}
+
+
+// Iteration can be terminated from the main thread or from the child client.
+//  This tests termination driven from the child client (the background thread).
+TEST_F(BgIterationTest, earlyTerminationFromChild) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_NONE, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKey(it, 0);
+
+    // At this point, keys 1 & 2 are in queue.  A termination should release those keys.
+    bgIteratorClose(it);            // background thread initiates the termination
+    EXPECT_TRUE(bgIteratorIsTerminating(it));
+
+    bool blocked0 = true;
+    bool blocked1 = true;
+    bool blocked2 = true;
+    // Expecting no extra unblocks
+    EXPECT_CALL(mock, unblockClientsInUseOnKey(_)).Times(0);
+    // We expect item 0 (the in progress item) to be released
+    EXPECT_CALL(mock, unblockClientsInUseOnKey(robjEqualsStr(keyStr(0))))
+            .WillOnce(Assign(&blocked0, false));
+    // We expect items 1-4 (the queued items) to be released
+    EXPECT_CALL(mock, unblockClientsInUseOnKey(robjEqualsStr(keyStr(1))))
+            .WillOnce(Assign(&blocked1, false));
+    EXPECT_CALL(mock, unblockClientsInUseOnKey(robjEqualsStr(keyStr(2))))
+            .WillOnce(Assign(&blocked2, false));
+    bgIteration_feedIterators();
+    EXPECT_FALSE(blocked0);
+    EXPECT_FALSE(blocked1);
+    EXPECT_FALSE(blocked2);
+    EXPECT_EQ(cleanupCount, 1);
+    EXPECT_TRUE(cleanupTerminated);
+}
+
+
+// Edge case.  Executing a command (like SUNIONSTORE) which REPLACES the first key and reads the
+//  second key.  In this case, bgIteration will get notified of the key deletion during execution of
+//  SETUNIONSTORE.  Given that both keys are in the future (not iterated yet), we'll allow the
+//  command to execute, unblocked.  We won't replicate as we'll pick up the key when we get to it.
+TEST_F(BgIterationTest, writeWith2Keys_eventual_keyDeletedDuringSetReplace) {
+    // Using DB1 so we have lots of buckets
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_EVENTUAL, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKeySequence(it, 0, 8); // 9 is in queue
+
+    // Write command that has 2 keys. 1 existing key that we write to and 1 dependant future key.
+    c = getWrite2KeysClient("sunionstore", 12, 13);
+
+    simulateUnblockedWrite(c);
+
+    // Now the call to keyDelete happens
+    sds sdskey = sdsnew(keyStr(12));
+    bgIteration_keyDelete(getDbFromItemNum(12), sdskey);
+    sdsfree(sdskey);
+    simpleDelItem(12);     // So simulate the actual del
+
+    // Now the write will run, re-creating the item (which is still a future item)
+    const char * const newValueStr = "new value";
+    robj *newValueRobj = createStringObjectFromCString(newValueStr);
+    setKey(c, c->db, c->argv[1], &newValueRobj, SETKEY_ADD_OR_UPDATE);
+
+    // Finally, we are letting bgIteration know that the write command was executed
+    bgIteration_handleCommandReplication(getDbFromItemNum(12), c->cmd, c->argc, c->argv);
+
+    // Since the write command was not replicated, we expect all the keys to be read in the normal
+    //  order from the dictionary.
+    expectReadKeySequence(it, 9, 11);
+    expectReadKey(it, 12, newValueStr);
+    expectReadKeySequence(it, 13, LAST_ITEM);
+
+    expectReadComplete(it);
+}
+
+
+// Edge case.  When we have a new key which is created by a command, AND replication is enabled, we
+//  expect that we will replicate the command rather than serializing the key/value later.  As an
+//  example, consider SUNIONSTORE A B.  We want to create A by replicating the command.  We don't
+//  want to have to process A as a key later on.  But in this case, we can't run the command until
+//  B has been sent.  We expect the command to be blocked while we send B.
+TEST_F(BgIterationTest, writeWith2Keys_eventual_setNewKey_DependantFuture) {
+    // Using DB1 so we have lots of buckets
+    simpleDelItem(12);   // Deleting key 12 to then create it with a write command
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_EVENTUAL, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKeySequence(it, 0, 8); // 9 is in queue
+
+    // Write command that has 2 keys. 1 new key and 1 dependant future key.
+    c = getWrite2KeysClient("sunionstore", 12, 13);
+
+    // We are simulating a new key in the dict. This command should block on the dependant key.
+    // This adds key 13 in the queue since the command depends on it.
+    simulateBlockedWrite(c);
+
+    // Key 9 was already in the queue
+    expectReadKey(it, 9);
+
+    // Key 13 is processed out of order since the write depends on it
+    expectReadKey(it, 13);
+
+    // Reading key 10 will unblock key 13, allowing us to write.
+    expectReadKey(it, 10);
+
+    // Now that key 13 was processed and released by the iterator, the write command can be executed.
+    simulateUnblockedWriteWithModification(c);
+
+    // Key 11 was queued when we read key 10
+    expectReadKey(it, 11);
+
+    // The replication of the write command was enqueued after key 11
+    expectReadReplication(it, c);
+
+    // We shouldn't see key 12 - as that was processed via replication.
+    // We shouldn't see key 13 - as that was expedited earlier
+
+    // Now resuming processing of dict entries
+    expectReadKeySequence(it, 14, LAST_ITEM);
+
+    expectReadComplete(it);
+}
+
+
+// A new key is being created, but is dependent on another key which has already been processed.
+//  In this case, the command shouldn't be blocked.
+TEST_F(BgIterationTest, writeWith2Keys_eventual_setNewKey_DependantPast) {
+    // Using DB1 so we have lots of buckets
+    simpleDelItem(12);   // Deleting key 12 to then create it with a write command
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_EVENTUAL, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKeySequence(it, 0, 9); // 10 is in queue, done with 8
+
+    // Write command that has 2 keys. 1 new key and 1 dependant past key.
+    c = getWrite2KeysClient("sunionstore", 12, 8);
+
+    // We are simulating a new key in the dict.
+    // This command should not block since the dependant key has already been processed.
+    simulateUnblockedWriteWithModification(c);
+
+    // Key 10 was put in the queue before the write
+    expectReadKey(it, 10);
+
+    expectReadReplication(it, c);
+
+    expectReadKey(it, 11);
+
+    // Key 12 should be missing - it was processed by replication
+
+    expectReadKeySequence(it, 13, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+// A new key is being created, and has dependencies on 2 other keys - one already processed, one not.
+//  In this case, the command should be blocked so that the future key can be sent first.
+TEST_F(BgIterationTest, writeWith3Keys_eventual_setNewKey_1DependantPast1DependantFuture) {
+    // Using DB1 so we have lots of buckets
+    simpleDelItem(12);   // Deleting key 12 to then create it with a write command
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_EVENTUAL, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKeySequence(it, 0, 9); // 8 has been returned, 9 is active, 10 is in queue
+
+    // Write command that has 1 new key and 2 dependencies (past/future)
+    c = getWrite3KeysClient("sunionstore", 12, 8, 13);
+
+    // The write should be blocked, so that item 13 can be processed.
+    simulateBlockedWrite(c);
+
+    expectReadKey(it, 10); // 10 was already in queue
+    expectReadKey(it, 13); // 13 was expedited since the write depends on it
+    EXPECT_CALL(mock, unblockClientsInUseOnKey(robjEqualsStr(keyStr(13)))).Times(1);
+    expectReadKey(it, 11); // Releases 13 so the command can execute
+
+    simulateUnblockedWriteWithModification(c);
+
+    expectReadKey(it, 14); // was queued when reading 11 (12 is missing, 13 was expedited)
+
+    expectReadReplication(it, c);
+
+    expectReadKey(it, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+// Test an edge case with the same (future) key being repeated in the command, like:
+//  SUNIONSTORE A B B
+// In this test, A is a previously handled key, and B is a future key.  We expect the future key B to
+//  be expedited (once).
+TEST_F(BgIterationTest, writeWith3Keys_eventual_repeatedKey_1DependantPast1RepeatedFuture) {
+    // Using DB1 so we have lots of buckets
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_EVENTUAL, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKeySequence(it, 0, 9); // We're done with 8, and 10 is in queue
+
+    // Write command that has 3 keys. 1 past key and 1 repeated key in the future.
+    c = getWrite3KeysClient("sunionstore", 8, 12, 12);
+
+    // This command should block because 12 needs to be expedited.
+    simulateBlockedWrite(c);
+
+    expectReadKey(it, 10); // was already in queue
+    expectReadKey(it, 12); // expedited
+    expectReadKey(it, 11); // releases 12 (unblocking the command)
+
+    // Now that key 12 was processed and released by the iterator, the write command can be executed.
+    simulateUnblockedWriteWithModification(c);
+
+    expectReadKey(it, 13); // queued when we read 11
+
+    expectReadReplication(it, c);
+
+    // Now resuming processing of dict entries.
+    expectReadKeySequence(it, 14, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+/* Tests the replication of a write command that creates a new key and depends on a
+ * future key which is duplicated in the command. */
+TEST_F(BgIterationTest, writeWith3Keys_eventual_repeatedKey_1newKey1RepeatedFuture) {
+    simpleDelItem(3);   // Deleting key 3 to then create it with a write command
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_EVENTUAL, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKey(it, 0);
+    // At this point, keys 1 & 2 are in queue.
+
+    // Write command that has 3 keys. 1 new key and 1 repeated key in the future.
+    c = getWrite3KeysClient("sunionstore", 3, 5, 5);
+
+    // This command should block on key 5.
+    // This adds key 5 in the queue because:
+    // - the command depends on key 5 which hasn't been processed yet
+    // - the command creates a new key (key 3).
+    simulateBlockedWrite(c);
+
+    expectReadKeySequence(it, 1, 2); // These were already in queue
+
+    // Key 5 is processed out of order since the write depends on it
+    expectReadKey(it, 5);
+
+    // Keys 4 is the next in queue, and releases the expedited key 5
+    expectReadKey(it, 4);
+
+    // Now that key 4 was processed and released by the iterator, the write command can be executed.
+    simulateUnblockedWriteWithModification(c);
+
+    // Key 6 & 7 are next, having been queued after reading key 4.
+    expectReadKeySequence(it, 6, 7);
+
+    // The replication of the write command was enqueued after 5 was released (unblocking the command)
+    expectReadReplication(it, c);
+
+    // Now resuming processing of dict entries.
+    expectReadKeySequence(it, 8, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+/* A command modifying an in-progress key, but dependent on a future (repeated) key. */
+TEST_F(BgIterationTest, writeWith3Keys_start_repeatedKey_1DependantPast1RepeatedFuture) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_START, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKey(it, 0);
+    // At this point, keys 1 & 2 are in queue.
+
+    // Write command that has 3 keys. 0 is in progress.  4 is still future.
+    // How BLPOP works exactly is not relevant to bgIterator, we just chose BLPOP because it's a
+    //  multi-key command that (potentially) modifies all of its keys (ie is not CMD_WRITE_FIRSTKEY_ONLY).
+    c = getWriteMultiKeysClient("blpop", 0, {4, 4, 0});
+
+    // This command should block on 2 keys (0 and 4), since:
+    //  - key 0 is in use by the iterator (still in the queue since it has not been processed by the consumer yet)
+    //  - key 4 is in the future
+    // This adds key 4 in the queue since the command depends on it and it hasn't been processed yet.
+    simulateBlockedWrite(c, 2);
+
+    // Key 4 is processed out of order since the write depends on it.
+    // Key 4 is processed before key 1 even though key 1 was already in the queue
+    //  because key 4 was enqueued as a priority item with a no-replication iterator.
+    // Reading key 4 will release key 0 - releasing that lock on the command
+    EXPECT_CALL(mock, unblockClientsInUseOnKey(robjEqualsStr(keyStr(0)))).Times(1);
+    expectReadKey(it, 4); // This unblocks key 0
+
+    EXPECT_CALL(mock, unblockClientsInUseOnKey(robjEqualsStr(keyStr(4)))).Times(1);
+    expectReadKey(it, 1); // this was already in queue (releases key 4)
+
+    // Now that keys 4 and 0 were processed and released by the iterator, the write command can be executed.
+    simulateUnblockedWriteWithModification(c);
+
+    expectReadKeySequence(it, 2, 3);
+
+    // 4 is skipped because it was already expedited
+
+    expectReadKeySequence(it, 5, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+/* Test that creates a new key, repeating the future key in the command. */
+TEST_F(BgIterationTest, writeWith3Keys_repeatedKey_1repeatedNewKey) {
+    simpleDelItem(6);   // Deleting key 6 to then create it with a write command
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_NONE, NULL, iteratorCleanupFn, PRIVDATA);
+
+    // Getting started
+    expectReadKeySequence(it, 0, 3);
+    // Now, 0,1,2 are in the past.  3 is being processed, and 4 is in queue.
+
+    // Write command that has 3 keys. 1 new repeated key and 1 key in the past.
+    // How BLPOP works exactly is not relevant to bgIterator, we just chose BLPOP because it's a
+    //  multi-key command that (potentially) modifies all of its keys (ie is not CMD_WRITE_FIRSTKEY_ONLY).
+    c = getWriteMultiKeysClient("blpop", 6, {0, 6, 0});
+
+    // The write command is not blocked since key 0 & 6 are not in use, and no consistency requirements
+    simulateUnblockedWriteWithModification(c);
+
+    // Keys 2, 3 are next in the queue (it was put in the queue at the same time as key 1).
+    expectReadKeySequence(it, 4, 5);
+
+    // There are no consistency requirements - so the new key should just be iterated.
+    // Key 6 is now in the dict with the value of key 0.
+    expectReadKey(it, 6, keyStr(0));
+
+    // Processing the rest of the dict entries.
+    expectReadKeySequence(it, 7, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+/* In this test, the COPY command is copying from one DB to another.  We will create the
+ *  same key in both DBs.  We make sure that the proper key is created via replication, and
+ *  the proper key is created by iteration. */
+TEST_F(BgIterationTest, copyHandlesProperDb_eventual) {
+
+    // NOTE:  Adding E0 to dict 1.  Now there is a E0 in both dict 0 and dict 1.
+    addKeyToDb(1, "H0", "H0");
+
+    // The test:
+    //  We will simulate (with DB0 selected): COPY B1 H0 DB 1 REPLACE
+    //  This will overwrite DB1:H0 that was created above.
+    //  Since DB0:B1 is already in queue, we need to expedite the target (DB1:H0) as well
+    //  After DB1:H0 is "overwritten", it should be marked early iterate.
+    //  We expect DB0:H0 to NOT be marked early iterate, and should get processed normally.
+
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_EVENTUAL, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKey(it, 0); // B0
+    // At this point, keys 1(B1) & 2(B2) are in queue.
+
+    // COPY B1 H0 DB 1 REPLACE
+    c = static_cast<client*>(zcalloc(sizeof(client)));
+    c->cmd = lookupCommandByCString("copy");
+    c->db = server.db[0];
+    c->argc = 6;
+    c->argv = static_cast<robj**>(zcalloc(sizeof(robj*) * c->argc));
+    c->argv[0] = createStringObjectFromCString(c->cmd->fullname);
+    c->argv[1] = createStringObjectFromCString("B1");
+    c->argv[2] = createStringObjectFromCString("H0");
+    c->argv[3] = createStringObjectFromCString("DB");
+    c->argv[4] = createStringObjectFromCString("1");
+    c->argv[5] = createStringObjectFromCString("REPLACE");
+
+    // This should block on 2 keys.  DB0:B1 is in queue.  DB1:H0 needs to be expedited.
+    simulateBlockedWrite(c, 2);
+
+    // These 2 keys were already in queue
+    expectReadKey(it, 1); // DB0:B1
+    expectReadKey(it, 2); // DB0:B2
+
+    // And now we expect to see the expedited DB1:H0
+    expectReadDbKeyValue(it, 1, "H0", "H0");
+
+    expectReadKey(it, 3); // releases DB1:E0
+
+    // Now key 4 is still in the queue
+
+    simulateUnblockedWrite(c);  // We shouldn't be blocked this time
+
+    // Now, we'll simulate the actual activity of the COPY.  DB1:H0 will be deleted in order to
+    //  be overwritten.
+    sds sdskey = sdsnew("H0");
+    bgIteration_keyDelete(1, sdskey); // bgIteration would be signaled about the deletion
+    sdsfree(sdskey);
+    // At this point the key would actually be deleted and recreated by COPY (no need to actually do this)
+
+    // And finally the replication (this should queue replication)
+    bgIteration_handleCommandReplication(c->db->id, c->cmd, c->argc, c->argv);
+
+    // Now let's read everything...
+    expectReadKey(it, 4); // (this was previously in queue)
+    expectReadReplication(it, c);       // This is the new replication (creating DB1:H0)
+
+    // The rest should be normal.  We shouldn't see DB1:E0 as it was recreated by replication
+    expectReadKeySequence(it, 5, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+// Check that termination with replication in queue works OK.
+TEST_F(BgIterationTest, terminateWithReplication) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_EVENTUAL, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKey(it, 0);
+    expectReadKey(it, 1);  // makes sure we are done with key 0 (don't want to block)
+
+    c = getWriteClient(0, "xxx");
+    simulateUnblockedWriteWithModification(c);     // Should replicate
+
+    bgIteratorTerminate(it);
+
+    bgIteratorItem *item = bgIteratorRead(it);
+    ASSERT_EQ(item->type, BGITERATOR_ITEM_TERMINATED);
+
+    bgIteratorClose(it);            // background thread completes the termination
+
+    bgIteration_feedIterators();    // main thread, cleans up iterator and calls cleanup function
+    EXPECT_EQ(cleanupCount, 1);
+    EXPECT_TRUE(cleanupTerminated);
+}
+
+
+// SWAPDB tests - Get ready for the mind-bend...
+
+/* In the non-consistent iterator (without replication), items are identified with the DBID at
+ *  the time they are placed into the queue.  The SWAPDB event signals the change to the
+ *  iterating process - and this is properly sequenced with the DB info for each item. */
+TEST_F(BgIterationTest, swapDB) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_NONE, NULL, iteratorCleanupFn, PRIVDATA);
+    bgIteratorStatus status;
+
+    expectReadKey(it, 0);
+    // Keys 1 & 2 are in queue
+
+    simulateSwapDB(0, 1);       // The swap event will be queued after item 2
+    bgIteratorGetStatus(it, &status);
+    EXPECT_EQ(status.swapdb_queued, 1u);
+    EXPECT_EQ(status.swapdb_processed, 0u);
+
+    expectReadKey(it, 1); // These were already in queue,
+    expectReadKey(it, 2); //  ... and the iteration client hasn't seen the swap yet
+
+    expectReadSwapDB(it, 0, 1);
+    bgIteratorGetStatus(it, &status);
+    EXPECT_EQ(status.swapdb_queued, 1u);
+    EXPECT_EQ(status.swapdb_processed, 0u);     // still processing it...
+
+    // Since we've seen the swap event, items now have the new DBID
+
+    expectReadDbKeyValue(it, 1, keyStr(3), keyStr(3)); // item 3 should show in DB1
+    bgIteratorGetStatus(it, &status);
+    EXPECT_EQ(status.swapdb_queued, 1u);
+    EXPECT_EQ(status.swapdb_processed, 1u);     // done processing the swapdb
+
+    // Keys 4 is in the queue - let's swap back!
+    simulateSwapDB(1, 0);       // The swap event will be queued after item 4
+    bgIteratorGetStatus(it, &status);
+    EXPECT_EQ(status.swapdb_queued, 2u);        // 2nd one queued
+    EXPECT_EQ(status.swapdb_processed, 1u);
+
+    expectReadDbKeyValue(it, 1, keyStr(4), keyStr(4)); // item 4 should still show in DB1
+
+    expectReadSwapDB(it, 1, 0); // Now the iterator knows about the 2nd swap
+    bgIteratorGetStatus(it, &status);
+    EXPECT_EQ(status.swapdb_queued, 2u);
+    EXPECT_EQ(status.swapdb_processed, 1u);     // still processing it...
+
+    // Since we've seen the second swap, items should now show with their original DB
+
+    expectReadKey(it, 5);
+    bgIteratorGetStatus(it, &status);
+    EXPECT_EQ(status.swapdb_queued, 2u);
+    EXPECT_EQ(status.swapdb_processed, 2u);     // done processing all swaps
+
+    expectReadKeySequence(it, 6, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+/* In the consistent iterator (without replication) all items are presented to the iterating
+ * process using the DBID at the time of the iterator creation.  No changes are evident.
+ * Swap events are not presented to the iteration client. */
+TEST_F(BgIterationTest, swapDB_start) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_START, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKey(it, 0);
+    // Keys 1 & 2 are in queue
+
+    simulateSwapDB(0, 1);       // The swap occurs, but the iterator sees no change
+
+    expectReadKey(it, 1);
+    expectReadKey(it, 2);
+    expectReadKey(it, 3);
+
+    // Heck, let's go crazy with those swaps...
+    for (int itemNum = 4;  itemNum <= LAST_ITEM;  itemNum++) {
+        simulateSwapDB(0, 1);
+        expectReadKey(it, itemNum);
+    }
+
+    expectReadComplete(it);
+}
+
+
+/* In the non-consistent iterator WITH replication, items are identified with the DBID at the
+ *  time they are placed into the queue.  The SWAPDB event signals the change to the iterating
+ *  process - and this is properly sequenced with the DB info for each item. */
+TEST_F(BgIterationTest, swapDB_eventual) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_EVENTUAL, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKey(it, 0);
+    // Keys 1 & 2 are in queue
+
+    simulateSwapDB(0, 1);       // The swap event will be queued after item 2
+
+    expectReadKey(it, 1); // These were already in queue,
+    expectReadKey(it, 2); //  ... and the iteration client hasn't seen the swap yet
+
+    expectReadSwapDB(it, 0, 1);                         // We should see a SWAPDB event
+    bgIteratorItem *item = bgIteratorRead(it);          // followed by the associated replication
+    ASSERT_EQ(item->type, BGITERATOR_ITEM_REPLICATION);
+    bgIteration_feedIterators();
+
+    // Since we've seen the swap event, items now have the new DBID
+    expectReadDbKeyValue(it, 1, keyStr(3), keyStr(3));  // item 3 is now in DB1
+
+    // Key 4 is in the queue - let's swap back!
+    simulateSwapDB(1, 0);       // The swap event will be queued after item 4
+
+    expectReadDbKeyValue(it, 1, keyStr(4), keyStr(4));  // Still appears as DB1
+
+    expectReadSwapDB(it, 1, 0); // Now the iterator knows about the 2nd swap
+    item = bgIteratorRead(it);
+    ASSERT_EQ(item->type, BGITERATOR_ITEM_REPLICATION);
+    bgIteration_feedIterators();
+
+    expectReadKeySequence(it, 5, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+// There is no test for swapDB_YesReplication_YesConsistent because this configuration is not
+//  permitted with multiple DBs (not permitted with swaps).
+
+
+// FLUSHDB & FLUSHALL Tests
+
+TEST_F(BgIterationTest, flushDB_flushAll) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_NONE, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKey(it, 0);
+    expectReadKey(it, 1);
+
+    // key 1 is active in the iterator - this key won't be deallocated because of the refcount.
+    // keys 2 is in queue - but will be returned to Valkey before the flush.  It is yanked
+    //  back by Valkey and will not be seen by iterator.
+    simulateFlushDB(-1, 1);
+
+    bgIteratorItem *item = bgIteratorRead(it);
+    ASSERT_EQ(item->type, BGITERATOR_ITEM_TERMINATED);
+
+    bgIteratorClose(it);            // background thread completes the termination
+
+    bgIteration_feedIterators();    // main thread, cleans up iterator and calls cleanup function
+    EXPECT_EQ(cleanupCount, 1);
+    EXPECT_TRUE(cleanupTerminated);
+}
+
+TEST_F(BgIterationTest, flushDB_flushOne) {
+    bgIterator *it1 = bgIteratorCreateFullScanIter("iter1",
+            BGITERATOR_CONSISTENCY_NONE, NULL, iteratorCleanupFn, PRIVDATA);
+    bgIterator *it2 = bgIteratorCreateFullScanIter("iter2",
+            BGITERATOR_CONSISTENCY_START, NULL, iteratorCleanupFn, PRIVDATA);
+    bgIteratorStatus status;
+
+    // The test flushes DB0.  This is half the data.  Since <= half, a non-consistent iterator is
+    //  allowed to proceed.  But the consistent iterator will be terminated.
+
+    expectReadKey(it1, 0);
+    expectReadKey(it2, 0);
+    expectReadKey(it1, 1);
+    expectReadKey(it2, 1);
+
+    // key 1 is active in the iterator - this key won't be deallocated because of the refcount.
+    // keys 2 is in queue - but will be returned to Valkey before the flush.  These are yanked
+    //  back by Valkey and will not be seen by iterator.
+    simulateFlushDB(0, 1);
+    bgIteratorGetStatus(it1, &status);
+    EXPECT_EQ(status.flushdb_queued, 1u);
+    EXPECT_EQ(status.flushdb_processed, 0u);
+
+    // Testing the non-consistent one continues...
+    // Everything already on the iterator queue should be preserved (deleted from the DB).
+    //  Keys 2 is already queued (and preserved).
+    expectReadKey(it1, 2);
+
+    // Read the flushdb item on iterator 1.
+    bgIteratorItem *item = bgIteratorRead(it1);
+    ASSERT_EQ(item->type, BGITERATOR_ITEM_FLUSHDB);
+    ASSERT_EQ(item->dbid, 0);
+    bgIteratorGetStatus(it1, &status);
+    EXPECT_EQ(status.flushdb_queued, 1u);
+    EXPECT_EQ(status.flushdb_processed, 0u);    // still processing it
+
+    // And iterator 1 keeps processing with the 2nd DB
+    expectReadKey(it1, ITEMS_PER_DB);
+    bgIteratorGetStatus(it1, &status);
+    EXPECT_EQ(status.flushdb_queued, 1u);
+    EXPECT_EQ(status.flushdb_processed, 1u);    // done with all flushdb's
+
+    expectReadKeySequence(it1, ITEMS_PER_DB + 1, LAST_ITEM);
+    expectReadComplete(it1);
+    EXPECT_EQ(cleanupCount, 1);
+    EXPECT_FALSE(cleanupTerminated);
+
+    // But the consistent iterator should be terminated
+    item = bgIteratorRead(it2);
+    ASSERT_EQ(item->type, BGITERATOR_ITEM_TERMINATED);
+    bgIteratorClose(it2);           // background thread completes the termination
+    bgIteration_feedIterators();    // main thread, cleans up iterator and calls cleanup function
+    EXPECT_EQ(cleanupCount, 2);
+    EXPECT_TRUE(cleanupTerminated);
+}
+
+
+/* A multi with one future and one past key must expedite and replicate. */
+TEST_F(BgIterationTest, multiTwoKeysFirstFuture) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_EVENTUAL,
+            NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKey(it, 0);  // Causes keys 1 & 2 to be queued (same bucket)
+    expectReadKey(it, 1);  // Causes key 0 to be released
+
+    // Now, B0(0) is in the past.  H0(5) is in the future.  R0(11) [in DB1] is also future.
+
+    /* For a non-consistent iteration, with replication...
+     * Normally, H0 (future) wouldn't need to expedite - we'd just modify it in place (without
+     * replication and iterate on it later.  But, in this case, since it's wrapped in a multi, with
+     * B0 (past) - we need to expedite H0 so that the multi can all be handled in the same way.
+     * Key R0(11) [DB1] just makes thing a little trickier. */
+    c = getMultiClient("SET B0 xxx; SET H0 xxx; SELECT 1; SET R0 xxx");
+
+    // The EXEC should block on 2 keys, because H0(5) & R0(11) should be expedited
+    simulateBlockedWrite(c, 2);
+
+    expectReadKey(it, 2);  // (was already in queue)
+
+    // Note - it would be logically OK if these 2 were reversed, but this is how the current algorithm works.
+    expectReadKey(it, 5); // Key 5 (H0) was expedited
+    expectReadKey(it, 11); // Key 11 (R0) was expedited
+
+    // We don't need to actually simulate the multi.  Just checking that the keys were expedited.
+
+    // and clean up the rest...
+    expectReadKeySequence(it, 3, 4);
+    // Key 5 was already read above (expedited)
+    expectReadKeySequence(it, 6, 10);
+    // Key 11 was already read above (expedited)
+    expectReadKeySequence(it, 12, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+// Multi blocking on future items.  Consistent.
+TEST_F(BgIterationTest, multiBlocksOnFutureKey) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_START, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKey(it, 0);
+    // Keys 1 & 2 are in queue
+
+    // Since there's no replication, an expedited key will be moved to the front of the queue.
+    // Let's fake a modification to key 6 (H1)
+    // Dummy up a MULTI...
+    c = getMultiClient("SET H1 xxx");
+
+    // Since this is consistent, we will block the client, disallowing the write.
+    simulateBlockedWrite(c);
+
+    // H1 (key 6) will be expedited to the front of the queue (because no replication)
+    expectReadKey(it, 6);
+
+    // Now that we've read key 6, key 0 (B0) is passed and should not block
+    freeTestClient(c);
+    c = getMultiClient("SET B0 xxx");
+    simulateUnblockedWrite(c);
+
+    // and clean up the rest...
+    expectReadKeySequence(it, 1, 5);
+    expectReadKeySequence(it, 7, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+// Scenario.  We have a multi that doesn't need to be replicated because all of the keys exist
+//  but are all future keys.  Note that missing keys are considered already-iterated, so all
+//  must exist for this test.  Then:
+//   - we delete a key
+//   - we re-create the deleted (future) key - normally this would be replicated
+//   - we access another (future) key - we don't expect to get blocked!
+TEST_F(BgIterationTest, multiNotReplicatedButDelRecreateAccess) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_EVENTUAL, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKey(it, 0);
+    // Keys 1 & 2 are in queue
+
+    c = getMultiClient("DEL H1; SET H1 xxx; SET H2 yyy");
+    // Now let's process the multi.  Since H1 & H2 are both future (existing) items, we shouldn't
+    //  block or replicate.
+    simulateUnblockedWrite(c);  // the EXEC
+
+    // Simulate the DEL H1
+    server.in_exec = 1;    // Simulate actual execution of the MULTI/EXEC
+    advanceMultiClientToCommand(c, 0);  // DEL H1
+    EXPECT_CALL(mock, blockClientInUseOnKeys(c,_,_)).Times(0);
+    bool blocked = bgIteration_blockClientIfRequired(c);
+    EXPECT_FALSE(blocked);
+    simpleDelItem(6); // H1
+    sds delKey = sdsnew(keyStr(6));
+    bgIteration_keyDelete(0, delKey);
+    sdsfree(delKey);
+    bgIteration_handleCommandReplication(c->db->id, c->cmd, c->argc, c->argv); // shouldn't replicate
+
+    // Simulate SET H1 - the key doesn't exist, and would normally replicate and mark early iterate,
+    //  but this is in a transaction, and we are not replicating this transaction.
+    advanceMultiClientToCommand(c, 1);  // SET H1 xxx
+    simulateUnblockedWriteWithModification(c);
+
+    // Now write to another existing future key - this should work if we weren't confused by the DEL
+    advanceMultiClientToCommand(c, 2);  // SET H2 yyy
+    simulateUnblockedWriteWithModification(c);
+    server.in_exec = 0;
+
+    // Now we can continue iterating, and we should pick up keys 1...  (and no replication!)
+    expectReadKeySequence(it, 1, 5);
+    expectReadKey(it, 6, "xxx");
+    expectReadKey(it, 7, "yyy");
+    expectReadKeySequence(it, 8, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+// For this test, B0 is added into DB1 - so it exists in both DB 0 and 1.  We will process it
+//  in DB0, but it will be unprocessed in DB1.  See if we track SELECT properly.
+TEST_F(BgIterationTest, multiHandlesSelectProperly) {
+    addKeyToDb(1, "B0", "B0");
+
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_START, NULL, iteratorCleanupFn, PRIVDATA);
+
+    // Read the 1st key - B0 in DB0.
+    expectReadKey(it, 0);
+    // Now, we are done with B0 in DB0, but not in DB1
+    expectReadKey(it, 1); // Reads B1, and releases B0 in DB0
+
+    // These cases should NOT block...  (they access B0 in DB0)
+    c = getMultiClient("SET B0 xxx");
+    simulateUnblockedWrite(c);
+    freeTestClient(c);
+    c = getMultiClient("SELECT 0; SET B0 xxx");
+    simulateUnblockedWrite(c);
+    freeTestClient(c);
+    c = getMultiClient("SET B0 xxx; SELECT 1");
+    simulateUnblockedWrite(c);
+    freeTestClient(c);
+    c = getMultiClient("SELECT 1; SELECT 0; SET B0 xxx; SELECT 1");
+    simulateUnblockedWrite(c);
+    freeTestClient(c);
+
+    // These cases SHOULD block...  (they access B0 in DB1)
+    c = getMultiClient("SET B0 xxx");
+    c->db = server.db[1];
+    simulateBlockedWrite(c);
+    freeTestClient(c);
+    c = getMultiClient("SELECT 1; SET B0 xxx");
+    simulateBlockedWrite(c);
+    freeTestClient(c);
+    c = getMultiClient("SELECT 1; SET B0 xxx; SELECT 0");
+    simulateBlockedWrite(c);
+    freeTestClient(c);
+    c = getMultiClient("SELECT 0; SELECT 1; SET B0 xxx; SELECT 1");
+    simulateBlockedWrite(c);
+
+    expectAnythingCleanup(it);
+}
+
+// For this test, B0 is added into DB1 - so it exists in both DB0 and DB1.  We will process it
+//  in DB0, but it will be unprocessed in DB1.  See if we track select properly - WHEN WE HAVE NO
+//  PERMISSION TO EXECUTE SELECT!
+TEST_F(BgIterationTest, multiHandlesSelectNoPermissionProperly) {
+    addKeyToDb(1, "B0", "B0");
+
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_START, NULL, iteratorCleanupFn, PRIVDATA);
+
+    // Read the 1st key - B0 in DB0.
+    expectReadKey(it, 0);
+    // Now, we are done with B0 in DB0, but not in DB1
+    expectReadKey(it, 1); // Reads B1, and releases B0 in DB0
+
+    // No permission for any commands (specifically select/swapdb)
+    EXPECT_CALL(mock, ACLCheckAllUserCommandPerm(_,_,_,_,_,_))
+        .Times(AtLeast(1)).WillRepeatedly(Return(ACL_DENIED_CMD));
+
+    // These cases should NOT block...  (they access B0 in DB0)
+    //  The SELECTs below are inconsequential - with/without select, same result.
+    c = getMultiClient("SET B0 xxx");
+    simulateUnblockedWrite(c);
+    freeTestClient(c);
+    c = getMultiClient("SELECT 0; SET B0 xxx");
+    simulateUnblockedWrite(c);
+    freeTestClient(c);
+    c = getMultiClient("SET B0 xxx; SELECT 1");
+    simulateUnblockedWrite(c);
+    freeTestClient(c);
+    c = getMultiClient("SELECT 1; SELECT 0; SET B0 xxx; SELECT 1");
+    simulateUnblockedWrite(c);
+    freeTestClient(c);
+
+    // These cases SHOULD block IF SELECT IS WORKING...  (they access B0 in DB1)
+    c = getMultiClient("SET B0 xxx");
+    c->db = server.db[1];      // already starting on DB1
+    simulateBlockedWrite(c);    // will block, no select
+    freeTestClient(c);
+    c = getMultiClient("SELECT 1; SET B0 xxx");
+    simulateUnblockedWrite(c);  // will not block because accessing DB0 (select fails)
+    freeTestClient(c);
+    c = getMultiClient("SELECT 1; SET B0 xxx; SELECT 0");
+    simulateUnblockedWrite(c);  // will not block because accessing DB0 (select fails)
+    freeTestClient(c);
+    c = getMultiClient("SELECT 0; SELECT 1; SET B0 xxx; SELECT 1");
+    simulateUnblockedWrite(c);  // will not block because accessing DB0 (select fails)
+
+    expectAnythingCleanup(it);
+}
+
+// For this test, B0 is added into DB1 - so it exists in both DB0 and DB1.  We will process it
+//  in DB0, but it will be unprocessed in DB1.  See if we track SWAPDB properly.
+TEST_F(BgIterationTest, multiHandlesSwapdbProperly) {
+    addKeyToDb(1, "B0", "B0");
+
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_START, NULL, iteratorCleanupFn, PRIVDATA);
+
+    // Read the 1st key - B0 in DB0.
+    expectReadKey(it, 0);
+    // Now, we are done with B0 in DB0, but not in DB1
+    expectReadKey(it, 1); // Reads B1, and releases B0 in DB0
+
+    // These cases should NOT block...  (they access B0 in DB0)
+    c = getMultiClient("SET B0 xxx");
+    simulateUnblockedWrite(c);
+    freeTestClient(c);
+    c = getMultiClient("SET B0 xxx; SWAPDB 0 1");
+    simulateUnblockedWrite(c);
+    freeTestClient(c);
+    c = getMultiClient("SET B0 xxx; SWAPDB 0 1; SWAPDB 0 1; SET B0 xxx");
+    simulateUnblockedWrite(c);
+    freeTestClient(c);
+    c = getMultiClient("SWAPDB 0 1; SELECT 1; SET B0 xxx");
+    simulateUnblockedWrite(c);
+    freeTestClient(c);
+
+    // These cases SHOULD block...  (they access B0 in DB1)
+    c = getMultiClient("SET B0 xxx");
+    c->db = server.db[1];
+    simulateBlockedWrite(c);
+    freeTestClient(c);
+    c = getMultiClient("SWAPDB 1 0; SET B0 xxx; SWAPDB 0 1");
+    simulateBlockedWrite(c);
+    freeTestClient(c);
+    c = getMultiClient("SWAPDB 1 0; SELECT 0; SET B0 xxx; SWAPDB 0 1");
+    simulateBlockedWrite(c);
+    freeTestClient(c);
+    c = getMultiClient("SWAPDB 1 0; SWAPDB 1 0; SELECT 1; SET B0 xxx; SELECT 1");
+    simulateBlockedWrite(c);
+
+    expectAnythingCleanup(it);
+}
+
+// For this test, B0 is added into DB1 - so it exists in both DB0 and DB1.  We will process it
+//  in DB0, but it will be unprocessed in DB1.  See if we track select properly - WHEN WE HAVE NO
+//  PERMISSION TO EXECUTE SWAPDB!
+TEST_F(BgIterationTest, multiHandlesSwapdbNoPermissionProperly) {
+    addKeyToDb(1, "B0", "B0");
+
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_START, NULL, iteratorCleanupFn, PRIVDATA);
+
+    // Read the 1st key - B0 in DB0.
+    expectReadKey(it, 0);
+    // Now, we are done with B0 in DB0, but not in DB1
+    expectReadKey(it, 1); // Reads B1, and releases B0 in DB0
+
+    // No permission for any commands (specifically select/swapdb)
+    EXPECT_CALL(mock, ACLCheckAllUserCommandPerm(_,_,_,_,_,_))
+        .Times(AtLeast(1)).WillRepeatedly(Return(ACL_DENIED_CMD));
+
+    // These cases should NOT block...  (they access B0 in DB0)
+    //  The SELECTs & SWAPDBs below are inconsequential - with/without select/swapdb, same result.
+    c = getMultiClient("SET B0 xxx");
+    simulateUnblockedWrite(c);
+    freeTestClient(c);
+    c = getMultiClient("SET B0 xxx; SWAPDB 0 1");
+    simulateUnblockedWrite(c);
+    freeTestClient(c);
+    c = getMultiClient("SET B0 xxx; SWAPDB 0 1; SWAPDB 0 1; SET B0 xxx");
+    simulateUnblockedWrite(c);
+    freeTestClient(c);
+    c = getMultiClient("SWAPDB 0 1; SELECT 1; SET B0 xxx");
+    simulateUnblockedWrite(c);
+    freeTestClient(c);
+
+    // These cases SHOULD block IF SELECT/SWAPDB IS WORKING...  (they access B0 in DB1)
+    c = getMultiClient("SET B0 xxx");
+    c->db = server.db[1];
+    simulateBlockedWrite(c);
+    freeTestClient(c);
+    c = getMultiClient("SWAPDB 1 0; SET B0 xxx; SWAPDB 0 1");
+    simulateUnblockedWrite(c);  // will not block because accessing DB0 (swapdb fails)
+    freeTestClient(c);
+    c = getMultiClient("SWAPDB 1 0; SELECT 0; SET B0 xxx; SWAPDB 0 1");
+    simulateUnblockedWrite(c);  // will not block because accessing DB0 (swapdb/select fails)
+    freeTestClient(c);
+    c = getMultiClient("SWAPDB 1 0; SWAPDB 1 0; SELECT 1; SET B0 xxx; SELECT 1");
+    simulateUnblockedWrite(c);  // will not block because accessing DB0 (swapdb/select fails)
+
+    expectAnythingCleanup(it);
+}
+
+
+static void *pthreadWait200msAndReadTwoKeys(void *arg) {
+    bgIterator *it = static_cast<bgIterator*>(arg);
+
+    usleep(200000);
+    bgIteratorRead(it);
+    bgIteratorRead(it);
+    return nullptr;
+}
+
+static void asyncWait200msAndReadTwoKeys(bgIterator *it) {
+    int rc;
+    pthread_attr_t attr;
+    pthread_t thread;
+
+    rc = pthread_attr_init(&attr);
+    assert(rc == 0);
+    rc = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+    assert(rc == 0);
+
+    rc = pthread_create(&thread, &attr, pthreadWait200msAndReadTwoKeys, it);
+    assert(rc == 0);
+
+    rc = pthread_attr_destroy(&attr);
+    assert(rc == 0);
+}
+
+TEST_F(BgIterationTest, testLuaWithUndeclaredKey) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_START, NULL, iteratorCleanupFn, PRIVDATA);
+
+    // Read the 1st key - let's get the party started
+    expectReadKey(it, 0);
+
+    // At this point, key 0 is read.  Keys 1 & 2 are queued (they are all in the same bucket).
+    // If we fake a modification to key 3, we won't know if it's handled out of order.
+    // So we fake a modification to key 4
+    c = getWriteClient(4, "xxx");
+    c->flag.script = 1;
+
+    // Now for a LUA script, we have already blocked (on the eval/evalsha) for any declared keys
+    //  But here, we're about to modify an undeclared key.  We can't actually block in the middle
+    //  of the LUA script.  So this will behave as unblocked, but incur a synchronous wait.
+
+    // Key 4 will get expedited when we simulate the write.  After reading key 4, key 1 will need
+    //  to be read to return key 4 to Valkey, unblocking the synchronous wait.
+    asyncWait200msAndReadTwoKeys(it);
+
+    monotime blockTimer;
+    elapsedStart(&blockTimer);
+    simulateUnblockedWrite(c); // Not blocked, but delays internally
+    // Must have delayed at least 150ms (some time may have passed before timer start)
+    EXPECT_GT(elapsedMs(blockTimer), 150u);
+
+    // Continue...
+    expectReadKeySequence(it, 2, 3);
+    // 4 has already been processed
+    expectReadKeySequence(it, 5, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+
+// Make sure that replication received while processing the last key is sent
+TEST_F(BgIterationTest, replicationReceivedWhileProcessingLastKey) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_EVENTUAL, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKeySequence(it, 0, LAST_ITEM);
+
+    c = getWriteClient(0, "xxx");
+    simulateUnblockedWriteWithModification(c);         // Wouldn't be blocked because done with key 0
+    expectReadReplication(it, c);   // Replication happened while processing the last item, should be here.
+
+    simulateUnblockedWriteWithModification(c);         // This won't replicate because we are done processing
+    expectReadComplete(it);         // We expect to see the completion instead
+}
+
+TEST_F(BgIterationTest, repldoneFunctionCalled) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_EVENTUAL, iteratorRepldoneFn, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKeySequence(it, 0, LAST_ITEM);
+    c = getWriteClient(0, "xxx");
+    simulateUnblockedWriteWithModification(c);         // Wouldn't be blocked because done with key 0
+
+    // Since in testing, we are only feeding one item at a time, and synchronously, we won't call
+    //  the repldone function until after we release the last item.
+    EXPECT_EQ(replDoneConfirmed, 0);
+    expectReadReplication(it, c);   // Replication happened while processing the last item, should be here.
+    EXPECT_EQ(replDoneConfirmed, 1);    // Last key released, now done feeding replication
+
+    simulateUnblockedWriteWithModification(c);         // This won't replicate because we are done processing
+    expectReadComplete(it);         // We expect to see the completion instead
+}
+
+TEST_F(BgIterationTest, repldoneFunctionCalledTwice) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_EVENTUAL, iteratorRepldoneFnNotBeingReadyInitially, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKeySequence(it, 0, LAST_ITEM);
+    c = getWriteClient(0, "xxx");
+    simulateUnblockedWriteWithModification(c);         // Wouldn't be blocked because done with key 0
+
+    // Won't signal replDone until we've released the final item (which happens when reading the replication)
+    EXPECT_EQ(replDoneRejected, 0);
+    EXPECT_EQ(replDoneConfirmed, 0);
+    expectReadReplication(it, c);   // Releases the final item
+    EXPECT_EQ(replDoneRejected, 1); // replDone called once (and rejected by client)
+    EXPECT_EQ(replDoneConfirmed, 0);
+    simulateUnblockedWriteWithModification(c); // This will replicate (because replDone returned false)
+    expectReadReplication(it, c);   // ReplDone gets called again (and accepted this time)
+    EXPECT_EQ(replDoneConfirmed, 1);
+
+    simulateUnblockedWriteWithModification(c);         // This won't replicate because replication is done
+    expectReadComplete(it);         // We expect to see the completion instead
+}
+
+// Check that the memory reported for replication is correct
+TEST_F(BgIterationTest, checkReplicationByteCount) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_EVENTUAL, iteratorRepldoneFn, iteratorCleanupFn, PRIVDATA);
+
+    c = getWriteClient(0, "xxx");
+    size_t expectedReplicationSize = sizeof(bgIteratorItem);
+    for (int i = 0;  i < c->argc;  i++) {
+        expectedReplicationSize += objectComputeSize(NULL, c->argv[i], 0, 0);
+    }
+
+    expectReadKey(it, 0);
+    expectReadKey(it, 1);  // Releases and unblocks 0
+    EXPECT_EQ(bgIteration_memoryInuseForReplication(), 0u);
+
+    simulateUnblockedWriteWithModification(c);         // Wouldn't be blocked because done with key 0
+    EXPECT_EQ(bgIteration_memoryInuseForReplication(), expectedReplicationSize);
+    simulateUnblockedWriteWithModification(c);         // and write again (2nd replication)
+    EXPECT_EQ(bgIteration_memoryInuseForReplication(), 2 * expectedReplicationSize);
+
+    expectReadKey(it, 2);  // Keys 0..2 all in same bucket
+
+    expectReadReplication(it, c);
+    // After reading the 1st replication, it hasn't been returned yet (it's the active item)
+    EXPECT_EQ(bgIteration_memoryInuseForReplication(), 2 * expectedReplicationSize);
+    expectReadReplication(it, c);
+    // After reading the 2nd replication, the 1st has been returned
+    EXPECT_EQ(bgIteration_memoryInuseForReplication(), expectedReplicationSize);
+
+    expectReadKey(it, 3);
+    // Now all replication has been returned/freed
+    EXPECT_EQ(bgIteration_memoryInuseForReplication(), 0u);
+
+    expectReadKeySequence(it, 4, LAST_ITEM);
+    expectReadComplete(it);
+}
+
+// Test that for an arbitrary write command having no keys, replication should occur.
+TEST_F(BgIterationTest, checkNoKeysWriteIsReplicated) {
+    bgIterator *it = bgIteratorCreateFullScanIter("iter",
+            BGITERATOR_CONSISTENCY_EVENTUAL, NULL, iteratorCleanupFn, PRIVDATA);
+
+    expectReadKey(it, 0);
+
+    c = getNoKeysWriteClient();
+    EXPECT_CALL(mock, blockClientInUseOnKeys(c,_,_)).Times(0);
+    bool blocked = bgIteration_blockClientIfRequired(c);
+    EXPECT_FALSE(blocked);
+    bgIteration_handleCommandReplication(c->db->id, c->cmd, c->argc, c->argv);
+
+    expectReadKeySequence(it, 1, 2);    // These were already in queue
+
+    expectReadReplication(it, c);
+
+    expectReadKeySequence(it, 3, LAST_ITEM);
+    expectReadComplete(it);
+}
diff --git a/src/unit/wrappers.h b/src/unit/wrappers.h
index 0f80919d6f7..5bfc117fab2 100644
--- a/src/unit/wrappers.h
+++ b/src/unit/wrappers.h
@@ -67,6 +67,9 @@ void __wrap_unblockClientsInUseOnKey(robj *key);
 
 int __wrap_ACLCheckAllUserCommandPerm(user *u, struct serverCommand *cmd, robj **argv, int argc, int dbid, int *idxptr);
 
+size_t __wrap_hashtableScan(hashtable *ht, size_t cursor, hashtableScanFunction fn, void *privdata);
+bool __wrap_hashtableScanHasPassedKey(hashtable *ht, const void *key, size_t cursor);
+
 #undef protected
 #undef _Bool
 #undef typename