From dfc66cd65eed16a2a5866321cfef8c9f1d7b08fc Mon Sep 17 00:00:00 2001
From: Thomas Waldmann <tw@waldmann-edv.de>
Date: Sun, 5 Apr 2026 05:42:22 +0200
Subject: [PATCH 1/6] HashTable: use kv array for keys and values, see #45

To prepare having the kv array in a single mmapped file, we need to have keys and values in one array, which we can easily extend at the end.
---
 src/borghash/HashTable.pxd |  3 +--
 src/borghash/HashTable.pyx | 54 ++++++++++++++++++--------------------
 2 files changed, 26 insertions(+), 31 deletions(-)

diff --git a/src/borghash/HashTable.pxd b/src/borghash/HashTable.pxd
index d00222c..5efdbf1 100644
--- a/src/borghash/HashTable.pxd
+++ b/src/borghash/HashTable.pxd
@@ -8,8 +8,7 @@ cdef class HashTable:
     cdef uint32_t* table
     cdef uint32_t kv_capacity, kv_used
     cdef float kv_grow_factor
-    cdef uint8_t* keys
-    cdef uint8_t* values
+    cdef uint8_t* kv
     cdef int stats_get, stats_set, stats_del, stats_iter, stats_lookup, stats_linear
     cdef int stats_resize_table, stats_resize_kv
 
diff --git a/src/borghash/HashTable.pyx b/src/borghash/HashTable.pyx
index 67284e9..907d30e 100644
--- a/src/borghash/HashTable.pyx
+++ b/src/borghash/HashTable.pyx
@@ -1,8 +1,8 @@
 """
 HashTable: low-level hash table mapping fully random bytes keys to bytes values.
            Key and value lengths can be chosen, but are fixed thereafter.
-           The keys and values are stored in arrays separate from the hashtable.
-           The hashtable only stores the 32-bit indices into the key/value arrays.
+           The keys and values are stored together in an array separate from the hashtable.
+           The hashtable only stores the 32-bit indices into the key/value array.
 """
 from __future__ import annotations
 from typing import BinaryIO, Iterator, Any
@@ -50,8 +50,8 @@ cdef class HashTable:
                  kv_grow_factor: float = 1.3) -> None:
         # the load of the ht (.table) shall be between 0.25 and 0.5, so it is fast and has few collisions.
         # it is cheap to have a low hash table load, because .table only stores uint32_t indices into the
-        # .keys and .values array.
-        # the keys/values arrays have bigger elements and are not hash tables, thus collisions and load
+        # .kv array.
+        # the .kv array has bigger elements and is not a hash table, thus collisions and load
         # factor are no concern there. the kv_grow_factor can be relatively small.
         if key_size < 4:
             raise ValueError("key_size must be specified and must be >= 4.")
@@ -71,13 +71,12 @@ cdef class HashTable:
         self.table = NULL
         self._resize_table(self.initial_capacity)
         # ^^^ hash table ^^^
-        # vvv kv arrays vvv
+        # vvv kv array vvv
         self.kv_grow_factor = kv_grow_factor
         self.kv_used = 0
-        self.keys = NULL
-        self.values = NULL
+        self.kv = NULL
         self._resize_kv(int(self.initial_capacity * self.max_load_factor))
-        # ^^^ kv arrays ^^^
+        # ^^^ kv array ^^^
         # vvv stats vvv
         self.stats_get = 0
         self.stats_set = 0
@@ -92,8 +91,7 @@ cdef class HashTable:
 
     def __del__(self) -> None:
         free(self.table)
-        free(self.keys)
-        free(self.values)
+        free(self.kv)
 
     def clear(self) -> None:
         """Empty the HashTable and start from scratch."""
@@ -122,7 +120,7 @@ cdef class HashTable:
         self.stats_lookup += 1
         while (kv_index := self.table[index]) != FREE_BUCKET:
             self.stats_linear += 1
-            if kv_index != TOMBSTONE_BUCKET and memcmp(self.keys + kv_index * self.ksize, key_ptr, self.ksize) == 0:
+            if kv_index != TOMBSTONE_BUCKET and memcmp(self.kv + kv_index * (self.ksize + self.vsize), key_ptr, self.ksize) == 0:
                 if index_ptr:
                     index_ptr[0] = index
                 return 1  # found
@@ -142,7 +140,7 @@ cdef class HashTable:
         self.stats_set += 1
         if self._lookup_index(key_ptr, &index):
             kv_index = self.table[index]
-            memcpy(self.values + kv_index * self.vsize, value_ptr, self.vsize)
+            memcpy(self.kv + kv_index * (self.ksize + self.vsize) + self.ksize, value_ptr, self.vsize)
             return
 
         if self.kv_used >= self.kv_capacity:
@@ -154,8 +152,8 @@ cdef class HashTable:
             raise RuntimeError("KV array is full")
 
         kv_index = self.kv_used
-        memcpy(self.keys + kv_index * self.ksize, key_ptr, self.ksize)
-        memcpy(self.values + kv_index * self.vsize, value_ptr, self.vsize)
+        memcpy(self.kv + kv_index * (self.ksize + self.vsize), key_ptr, self.ksize)
+        memcpy(self.kv + kv_index * (self.ksize + self.vsize) + self.ksize, value_ptr, self.vsize)
         self.kv_used += 1
 
         self.used += 1
@@ -177,7 +175,7 @@ cdef class HashTable:
         self.stats_get += 1
         if self._lookup_index(<uint8_t*> key, &index):
             kv_index = self.table[index]
-            return self.values[kv_index * self.vsize:(kv_index + 1) * self.vsize]
+            return self.kv[kv_index * (self.ksize + self.vsize) + self.ksize : kv_index * (self.ksize + self.vsize) + self.ksize + self.vsize]
         else:
             raise KeyError("Key not found")
 
@@ -191,8 +189,7 @@ cdef class HashTable:
         self.stats_del += 1
         if self._lookup_index(key_ptr, &index):
             kv_index = self.table[index]
-            memset(self.keys + kv_index * self.ksize, 0, self.ksize)
-            memset(self.values + kv_index * self.vsize, 0, self.vsize)
+            memset(self.kv + kv_index * (self.ksize + self.vsize), 0, self.ksize + self.vsize)
             self.table[index] = TOMBSTONE_BUCKET
             self.used -= 1
             self.tombstones += 1
@@ -233,8 +230,8 @@ cdef class HashTable:
         for i in range(self.capacity):
             kv_index = self.table[i]
             if kv_index not in (FREE_BUCKET, TOMBSTONE_BUCKET):
-                key = self.keys[kv_index * self.ksize:(kv_index + 1) * self.ksize]
-                value = self.values[kv_index * self.vsize:(kv_index + 1) * self.vsize]
+                key = self.kv[kv_index * (self.ksize + self.vsize) : kv_index * (self.ksize + self.vsize) + self.ksize]
+                value = self.kv[kv_index * (self.ksize + self.vsize) + self.ksize : kv_index * (self.ksize + self.vsize) + self.ksize + self.vsize]
                 yield key, value
 
     cdef void _resize_table(self, size_t new_capacity):
@@ -250,7 +247,7 @@ cdef class HashTable:
         for i in range(current_capacity):
             kv_index = self.table[i]
             if kv_index not in (FREE_BUCKET, TOMBSTONE_BUCKET):
-                index = self._get_index(self.keys + kv_index * self.ksize)
+                index = self._get_index(self.kv + kv_index * (self.ksize + self.vsize))
                 while new_table[index] != FREE_BUCKET:
                     index = (index + 1) % new_capacity
                 new_table[index] = kv_index
@@ -264,8 +261,7 @@ cdef class HashTable:
         cdef size_t capacity = min(new_capacity, <size_t> RESERVED - 1)
         self.stats_resize_kv += 1
         # realloc is already highly optimized (in Linux). By using mremap internally only the peak address space usage is "old size" + "new size", while the peak memory usage is only "new size".
-        self.keys = <uint8_t*> realloc(self.keys, capacity * self.ksize * sizeof(uint8_t))
-        self.values = <uint8_t*> realloc(self.values, capacity * self.vsize * sizeof(uint8_t))
+        self.kv = <uint8_t*> realloc(self.kv, capacity * (self.ksize + self.vsize) * sizeof(uint8_t))
         self.kv_capacity = <uint32_t> capacity
 
     def k_to_idx(self, key: bytes) -> int:
@@ -283,15 +279,15 @@ cdef class HashTable:
 
     def idx_to_k(self, idx: int) -> bytes:
         """
-        For a given index, return the key stored at that index in the keys array.
+        For a given index, return the key stored at that index in the kv array.
         This is the reverse of k_to_idx (e.g., 32-bit index -> 256-bit key).
         """
         cdef uint32_t kv_index = <uint32_t> idx
-        return self.keys[kv_index * self.ksize:(kv_index + 1) * self.ksize]
+        return self.kv[kv_index * (self.ksize + self.vsize) : kv_index * (self.ksize + self.vsize) + self.ksize]
 
     def kv_to_idx(self, key: bytes, value: bytes) -> int:
         """
-        Return the key's/value's index in the keys/values array (index is stable while in memory).
+        Return the key's/value's index in the kv array (index is stable while in memory).
         This can be used to "abbreviate" a known key/value pair (e.g., 256-bit key + 32-bit value -> 32-bit index).
         """
         if len(key) != self.ksize:
@@ -302,19 +298,19 @@ cdef class HashTable:
         cdef uint32_t kv_index
         if self._lookup_index(<uint8_t*> key, &index):
             kv_index = self.table[index]
-            value_found = self.values[kv_index * self.vsize:(kv_index + 1) * self.vsize]
+            value_found = self.kv[kv_index * (self.ksize + self.vsize) + self.ksize : kv_index * (self.ksize + self.vsize) + self.ksize + self.vsize]
             if value == value_found:
                 return kv_index
         raise KeyError("Key/Value not found")
 
     def idx_to_kv(self, idx: int) -> tuple[bytes, bytes]:
         """
-        For a given index, return the key/value stored at that index in the keys/values array.
+        For a given index, return the key/value stored at that index in the kv array.
         This is the reverse of kv_to_idx (e.g., 32-bit index -> 256-bit key + 32-bit value).
         """
         cdef uint32_t kv_index = <uint32_t> idx
-        key = self.keys[kv_index * self.ksize:(kv_index + 1) * self.ksize]
-        value = self.values[kv_index * self.vsize:(kv_index + 1) * self.vsize]
+        key = self.kv[kv_index * (self.ksize + self.vsize) : kv_index * (self.ksize + self.vsize) + self.ksize]
+        value = self.kv[kv_index * (self.ksize + self.vsize) + self.ksize : kv_index * (self.ksize + self.vsize) + self.ksize + self.vsize]
         return key, value
 
     @property

From 21432deda37e9827734149fa2087cf5cb11cb0c6 Mon Sep 17 00:00:00 2001
From: Thomas Waldmann <tw@waldmann-edv.de>
Date: Sun, 5 Apr 2026 08:32:02 +0200
Subject: [PATCH 2/6] implement mmap support for HashTableNT, fixes #45

---
 src/borghash/HashTable.pxd     |   8 +-
 src/borghash/HashTable.pyx     |  89 ++++++++++++++++++--
 src/borghash/HashTableNT.pxd   |  16 ++--
 src/borghash/HashTableNT.pyx   | 147 ++++++++++++++++++++++++++++-----
 tests/hashtablent_mmap_test.py | 123 +++++++++++++++++++++++++++
 tests/hashtablent_test.py      |   2 +-
 6 files changed, 350 insertions(+), 35 deletions(-)
 create mode 100644 tests/hashtablent_mmap_test.py

diff --git a/src/borghash/HashTable.pxd b/src/borghash/HashTable.pxd
index 5efdbf1..d5d1841 100644
--- a/src/borghash/HashTable.pxd
+++ b/src/borghash/HashTable.pxd
@@ -1,18 +1,22 @@
 from libc.stdint cimport uint8_t, uint32_t
 
 cdef class HashTable:
-    cdef int ksize, vsize
+    cdef public int ksize, vsize
     cdef readonly size_t capacity, used
     cdef size_t initial_capacity, tombstones
     cdef float max_load_factor, min_load_factor, shrink_factor, grow_factor
     cdef uint32_t* table
-    cdef uint32_t kv_capacity, kv_used
+    cdef public uint32_t kv_capacity, kv_used
     cdef float kv_grow_factor
     cdef uint8_t* kv
+    cdef int fd
+    cdef size_t mmap_size
+    cdef uint32_t kv_offset
     cdef int stats_get, stats_set, stats_del, stats_iter, stats_lookup, stats_linear
     cdef int stats_resize_table, stats_resize_kv
 
     cdef size_t _get_index(self, uint8_t* key)
     cdef int _lookup_index(self, uint8_t* key_ptr, size_t* index_ptr)
+    cpdef void update_table_only(self, bytes key, uint32_t kv_index)
     cdef void _resize_table(self, size_t new_capacity)
     cdef void _resize_kv(self, size_t new_capacity)
diff --git a/src/borghash/HashTable.pyx b/src/borghash/HashTable.pyx
index 907d30e..0113708 100644
--- a/src/borghash/HashTable.pyx
+++ b/src/borghash/HashTable.pyx
@@ -10,6 +10,10 @@ from typing import BinaryIO, Iterator, Any
 from libc.stdlib cimport malloc, free, realloc
 from libc.string cimport memcpy, memset, memcmp
 from libc.stdint cimport uint8_t, uint32_t
+from libc.errno cimport errno
+from posix.unistd cimport close, ftruncate, lseek, SEEK_END
+from posix.fcntl cimport open as c_open, O_RDWR, O_CREAT
+from posix.mman cimport mmap, munmap, MAP_SHARED, PROT_READ, PROT_WRITE
 
 from collections.abc import Mapping
 
@@ -47,7 +51,8 @@ cdef class HashTable:
                  key_size: int = 0, value_size: int = 0, capacity: int = MIN_CAPACITY,
                  max_load_factor: float = 0.5, min_load_factor: float = 0.10,
                  shrink_factor: float = 0.4, grow_factor: float = 2.0,
-                 kv_grow_factor: float = 1.3) -> None:
+                 kv_grow_factor: float = 1.3,
+                 path: str = None, kv_offset: int = 0) -> None:
         # the load of the ht (.table) shall be between 0.25 and 0.5, so it is fast and has few collisions.
         # it is cheap to have a low hash table load, because .table only stores uint32_t indices into the
         # .kv array.
@@ -59,6 +64,15 @@ cdef class HashTable:
             raise ValueError("value_size must be specified and must be > 0.")
         self.ksize = key_size
         self.vsize = value_size
+        # vvv mmap vvv
+        self.fd = -1
+        self.mmap_size = 0
+        self.kv_offset = kv_offset
+        if path:
+            self.fd = c_open(path.encode('utf-8'), O_RDWR | O_CREAT, 0o644)
+            if self.fd == -1:
+                raise OSError(errno, f"Could not open {path}")
+        # ^^^ mmap ^^^
         # vvv hash table vvv
         self.max_load_factor = max_load_factor
         self.min_load_factor = min_load_factor
@@ -75,7 +89,21 @@ cdef class HashTable:
         self.kv_grow_factor = kv_grow_factor
         self.kv_used = 0
         self.kv = NULL
-        self._resize_kv(int(self.initial_capacity * self.max_load_factor))
+        if self.fd != -1:
+            # For mmap, we determine current size and capacity from file size.
+            file_size = lseek(self.fd, 0, SEEK_END)
+            if file_size > self.kv_offset:  # kv array is not empty
+                self.mmap_size = file_size
+                # map the full file, starting from offset 0
+                new_kv = mmap(NULL, self.mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, self.fd, 0)
+                if new_kv == <void*> -1:
+                    raise OSError(errno, "mmap failed")
+                self.kv = <uint8_t*> new_kv + self.kv_offset
+                self.kv_capacity = <uint32_t>((self.mmap_size - self.kv_offset) // (self.ksize + self.vsize))
+            else:
+                self._resize_kv(int(self.initial_capacity * self.max_load_factor))
+        else:
+            self._resize_kv(int(self.initial_capacity * self.max_load_factor))
         # ^^^ kv array ^^^
         # vvv stats vvv
         self.stats_get = 0
@@ -91,7 +119,12 @@ cdef class HashTable:
 
     def __del__(self) -> None:
         free(self.table)
-        free(self.kv)
+        if self.fd != -1:
+            if self.kv != NULL:
+                munmap(self.kv - self.kv_offset, self.mmap_size)
+            close(self.fd)
+        else:
+            free(self.kv)
 
     def clear(self) -> None:
         """Empty the HashTable and start from scratch."""
@@ -144,7 +177,8 @@ cdef class HashTable:
             return
 
         if self.kv_used >= self.kv_capacity:
-            self._resize_kv(int(self.kv_capacity * self.kv_grow_factor))
+            # "+ 1" ensures growth even for very small or 0 capacity.
+            self._resize_kv(int(self.kv_capacity * self.kv_grow_factor + 1))
         if self.kv_used >= self.kv_capacity:
             # Should never happen. See "RESERVED" constant - we allow almost 4Gi kv entries.
             # For a typical 256-bit key and a small 32-bit value that would already consume 176GiB+
@@ -234,6 +268,17 @@ cdef class HashTable:
                 value = self.kv[kv_index * (self.ksize + self.vsize) + self.ksize : kv_index * (self.ksize + self.vsize) + self.ksize + self.vsize]
                 yield key, value
 
+    cpdef void update_table_only(self, bytes key, uint32_t kv_index):
+        cdef size_t index
+        self._lookup_index(<uint8_t*> key, &index)
+        # index is either a bucket containing the key (if it already existed)
+        # or it is the first free/tombstone bucket in the probe sequence.
+        if self.table[index] == FREE_BUCKET or self.table[index] == TOMBSTONE_BUCKET:
+            self.used += 1
+        self.table[index] = kv_index
+        if self.used + self.tombstones > self.capacity * self.max_load_factor:
+            self._resize_table(int(self.capacity * self.grow_factor))
+
     cdef void _resize_table(self, size_t new_capacity):
         cdef size_t i, index
         cdef uint32_t kv_index
@@ -259,11 +304,41 @@ cdef class HashTable:
     cdef void _resize_kv(self, size_t new_capacity):
         # We must never use kv indices >= RESERVED; thus, we'll never need more capacity either.
         cdef size_t capacity = min(new_capacity, <size_t> RESERVED - 1)
+        cdef size_t new_mmap_size
+        cdef void* new_kv
         self.stats_resize_kv += 1
-        # realloc is already highly optimized (in Linux). By using mremap internally only the peak address space usage is "old size" + "new size", while the peak memory usage is only "new size".
-        self.kv = <uint8_t*> realloc(self.kv, capacity * (self.ksize + self.vsize) * sizeof(uint8_t))
+        if self.fd != -1:
+            new_mmap_size = self.kv_offset + capacity * (self.ksize + self.vsize) * sizeof(uint8_t)
+            if self.kv != NULL:
+                # Don't shrink automatically during resize if we already have space.
+                # This prevents truncating an existing file's data when it's opened
+                # with a smaller initial_capacity than the file already contains.
+                # HOWEVER, if capacity is kv_used, we might be in shrink_to_fit.
+                # Let's allow shrinking if capacity < self.kv_capacity.
+                if new_mmap_size <= self.mmap_size and capacity >= self.kv_capacity:
+                    return
+                munmap(self.kv - self.kv_offset, self.mmap_size)
+            if ftruncate(self.fd, new_mmap_size) == -1:
+                raise OSError(errno, "ftruncate failed")
+            new_kv = mmap(NULL, new_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, self.fd, 0)
+            if new_kv == <void*> -1:
+                raise OSError(errno, "mmap failed")
+            self.kv = <uint8_t*> new_kv + self.kv_offset
+            self.mmap_size = new_mmap_size
+        else:
+            # realloc is already highly optimized (in Linux). By using mremap internally, only the peak address space usage is "old size" + "new size", while the peak memory usage is only "new size".
+            self.kv = <uint8_t*> realloc(self.kv, capacity * (self.ksize + self.vsize) * sizeof(uint8_t))
         self.kv_capacity = <uint32_t> capacity
 
+    def shrink_to_fit(self) -> None:
+        """Shrink the KV array and the file to the actually used size."""
+        self._resize_kv(self.kv_used)
+        if self.fd != -1:
+            # _resize_kv already calls ftruncate to new_mmap_size,
+            # which is kv_offset + capacity * entry_size.
+            # Here capacity is self.kv_used.
+            pass
+
     def k_to_idx(self, key: bytes) -> int:
         """
         Return the key's index in the keys array (index is stable while in memory).
@@ -283,6 +358,8 @@ cdef class HashTable:
         This is the reverse of k_to_idx (e.g., 32-bit index -> 256-bit key).
         """
         cdef uint32_t kv_index = <uint32_t> idx
+        if kv_index >= self.kv_used:
+             raise KeyError(f"Index {kv_index} out of range (kv_used={self.kv_used})")
         return self.kv[kv_index * (self.ksize + self.vsize) : kv_index * (self.ksize + self.vsize) + self.ksize]
 
     def kv_to_idx(self, key: bytes, value: bytes) -> int:
diff --git a/src/borghash/HashTableNT.pxd b/src/borghash/HashTableNT.pxd
index d3b15f7..fdc2a2f 100644
--- a/src/borghash/HashTableNT.pxd
+++ b/src/borghash/HashTableNT.pxd
@@ -1,8 +1,10 @@
+from .HashTable cimport HashTable
+
 cdef class HashTableNT:
-    cdef int key_size
-    cdef object byte_order
-    cdef object value_type
-    cdef object value_format
-    cdef object value_struct
-    cdef int value_size
-    cdef object inner
+    cdef public int key_size
+    cdef public object byte_order
+    cdef public object value_type
+    cdef public object value_format
+    cdef public object value_struct
+    cdef public int value_size
+    cdef public HashTable inner
diff --git a/src/borghash/HashTableNT.pyx b/src/borghash/HashTableNT.pyx
index 02b0e92..f9d8310 100644
--- a/src/borghash/HashTableNT.pyx
+++ b/src/borghash/HashTableNT.pyx
@@ -11,11 +11,14 @@ import json
 import struct
 
 from .HashTable import HashTable, MIN_CAPACITY, _fill
+from posix.types cimport off_t
+from posix.unistd cimport lseek, SEEK_SET, SEEK_CUR, write as c_write
 
 MAGIC = b"BORGHASH"
 assert len(MAGIC) == 8
 VERSION = 1  # version of the on-disk (serialized) format produced by .write().
 HEADER_FMT = "<8sII"  # magic, version, meta length
+ALIGNMENT = 64  # usual length of cache line
 
 BYTE_ORDER = dict(big=">", little="<", network="!", native="=")  # struct format chars
 
@@ -23,8 +26,9 @@ _NoDefault = object()
 
 cdef class HashTableNT:
     def __init__(self, items=None, *,
-                 key_size: int, value_type: Any, value_format: Any,
-                 capacity: int = MIN_CAPACITY, byte_order="little") -> None:
+                 int key_size=0, value_type=None, value_format=None,
+                 int capacity = MIN_CAPACITY, str byte_order="little",
+                 str path = None, int kv_offset = 4096) -> None:
         if not isinstance(key_size, int) or not key_size >= 4:
             raise ValueError("key_size must be an integer and >= 4.")
         if type(value_type) is not type:
@@ -43,7 +47,8 @@ cdef class HashTableNT:
         self.byte_order = byte_order
         self.value_struct = struct.Struct(BYTE_ORDER[byte_order] + "".join(value_format))
         self.value_size = self.value_struct.size
-        self.inner = HashTable(key_size=self.key_size, value_size=self.value_size, capacity=capacity)
+        self.inner = HashTable(key_size=self.key_size, value_size=self.value_size, capacity=capacity,
+                               path=path, kv_offset=kv_offset)
         _fill(self, items)
 
     def clear(self) -> None:
@@ -123,7 +128,7 @@ cdef class HashTableNT:
         else:
             return self._to_namedtuple_value(binary_value)
 
-    def update(self, other=(), /, **kwds):
+    def update(self, other=(), **kwds):
         """Like dict.update(), but 'other' can also be a HashTableNT instance."""
         if isinstance(other, HashTableNT):
             for key, value in other.items():
@@ -176,18 +181,28 @@ cdef class HashTableNT:
             'value_format_fields': self.value_format._fields,
             'value_format': self.value_format,
             'capacity': self.inner.capacity,
-            'used': self.inner.used,  # count of keys / values
+            'used': self.inner.used,  # count of valid keys / values
+            'kv_used': self.inner.kv_used, # count of slots in the kv array
         }
         meta_bytes = json.dumps(meta).encode("utf-8")
-        meta_size = len(meta_bytes)
+        header_size = struct.calcsize(HEADER_FMT)
+        # Calculate kv_offset based on current meta_bytes length, then meta_size is everything in between.
+        kv_offset = (header_size + len(meta_bytes) + ALIGNMENT - 1) // ALIGNMENT * ALIGNMENT
+        meta_size = kv_offset - header_size
         header_bytes = struct.pack(HEADER_FMT, MAGIC, VERSION, meta_size)
         fd.write(header_bytes)
         fd.write(meta_bytes)
+        # Pad with zeros until kv_offset
+        fd.write(b'\x00' * (meta_size - len(meta_bytes)))
         count = 0
-        for key, value in self.inner.items():
-            fd.write(key)
-            fd.write(value)
-            count += 1
+        for i in range(self.inner.kv_used):
+             try:
+                 key, value = self.inner.idx_to_kv(i)
+             except KeyError:
+                 continue
+             fd.write(key)
+             fd.write(value)
+             count += 1
         assert count == self.inner.used
 
     @classmethod
@@ -203,27 +218,74 @@ cdef class HashTableNT:
         header_size = struct.calcsize(HEADER_FMT)
         header_bytes = fd.read(header_size)
         if len(header_bytes) < header_size:
-            raise ValueError(f"Invalid file, file is too short.")
+            raise ValueError("Invalid file, file is too short.")
         magic, version, meta_size = struct.unpack(HEADER_FMT, header_bytes)
         if magic != MAGIC:
-            raise ValueError(f"Invalid file, magic {MAGIC.decode()} not found.")
+            # Try old header format? No, we broke compatibility on purpose.
+            raise ValueError("Invalid file, magic %s not found." % MAGIC.decode())
         if version != VERSION:
-            raise ValueError(f"Unsupported file version {version}.")
+            raise ValueError("Unsupported file version %d." % version)
         meta_bytes = fd.read(meta_size)
         if len(meta_bytes) < meta_size:
-            raise ValueError(f"Invalid file, file is too short.")
-        meta = json.loads(meta_bytes.decode("utf-8"))
+            raise ValueError("Invalid file, file is too short.")
+        meta = json.loads(meta_bytes.decode("utf-8").rstrip('\x00'))
         value_type = namedtuple(meta['value_type_name'], meta['value_type_fields'])
         value_format_t = namedtuple(meta['value_format_name'], meta['value_format_fields'])
         value_format = value_format_t(*meta['value_format'])
         ht = cls(key_size=meta['key_size'], value_format=value_format, value_type=value_type,
                  capacity=meta['capacity'], byte_order=meta['byte_order'])
-        count = 0
         ksize, vsize = meta['key_size'], meta['value_size']
-        for i in range(meta['used']):
+        kv_used = meta.get('kv_used', meta['used'])
+        for i in range(kv_used):
             key = fd.read(ksize)
             value = fd.read(vsize)
-            ht._set_raw(key, value)
+            # A zero key means it's a deleted slot or uninitialized
+            if any(key):
+                 ht._set_raw(key, value)
+        return ht
+
+    @classmethod
+    def open_mmap(cls, path: str, value_type: Any = None, value_format: Any = None):
+        """Open an existing borghash file in mmap mode."""
+        with open(path, 'rb') as fd:
+            header_size = struct.calcsize(HEADER_FMT)
+            header_bytes = fd.read(header_size)
+            if len(header_bytes) < header_size:
+                raise ValueError("Invalid file, file is too short.")
+            magic, version, meta_size = struct.unpack(HEADER_FMT, header_bytes)
+            if magic != MAGIC:
+                raise ValueError("Invalid file, magic %s not found." % MAGIC.decode())
+            if version != VERSION:
+                raise ValueError("Unsupported file version %d." % version)
+            meta_bytes = fd.read(meta_size)
+            if len(meta_bytes) < meta_size:
+                raise ValueError("Invalid file, file is too short.")
+            meta = json.loads(meta_bytes.decode("utf-8").rstrip('\x00'))
+            kv_offset = header_size + meta_size
+
+        if value_type is None:
+            value_type = namedtuple(meta['value_type_name'], meta['value_type_fields'])
+        if value_format is None:
+            value_format_t = namedtuple(meta['value_format_name'], meta['value_format_fields'])
+            value_format = value_format_t(*meta['value_format'])
+            
+        ht = cls(key_size=meta['key_size'], value_format=value_format, value_type=value_type,
+                 capacity=meta['capacity'], byte_order=meta['byte_order'],
+                 path=path, kv_offset=kv_offset)
+
+        # In mmap mode, we must populate the hash table (self.inner.table) from the KV array.
+        # self.inner.kv is already mapped.
+        ksize, vsize = meta['key_size'], meta['value_size']
+        kv_used = meta.get('kv_used', meta['used']) # fallback for older versions/standard write()
+        ht.inner.kv_used = kv_used
+        for i in range(kv_used):
+            try:
+                 key = ht.inner.idx_to_k(i)
+                 # A zero key means it's a deleted slot or uninitialized
+                 if any(key):
+                      ht.inner.update_table_only(key, i)
+            except KeyError:
+                 continue
         return ht
 
     def size(self) -> int:
@@ -232,6 +294,53 @@ cdef class HashTableNT:
 
         The serialized size of the metadata is a bit hard to predict, but we cover that with one_time_overheads.
         """
-        one_time_overheads = 4096  # very rough
+        one_time_overheads = 4096  # header + meta + alignment padding
         N = self.inner.used
         return int(N * (self.key_size + self.value_size) + one_time_overheads)
+
+    def write_header(self):
+        """Write/update the file header and metadata. Required for mmapped files."""
+        if self.inner.fd == -1:
+             raise RuntimeError("Not a memory-mapped HashTableNT (no path/fd).")
+        # Save current position
+        cdef off_t current_pos = lseek(self.inner.fd, 0, SEEK_CUR)
+        # Seek to start
+        lseek(self.inner.fd, 0, SEEK_SET)
+        
+        meta = {
+            'key_size': self.key_size,
+            'value_size': self.value_size,
+            'byte_order': self.byte_order,
+            'used': self.inner.used,
+            'kv_used': self.inner.kv_used,
+            'capacity': self.inner.capacity,
+            'value_type_name': self.value_type.__name__,
+            'value_type_fields': self.value_type._fields,
+            'value_format_name': self.value_format.__class__.__name__,
+            'value_format_fields': self.value_format._fields,
+            'value_format': self.value_format,
+        }
+        meta_bytes = json.dumps(meta).encode("utf-8")
+        header_size = struct.calcsize(HEADER_FMT)
+        kv_offset = (header_size + len(meta_bytes) + ALIGNMENT - 1) // ALIGNMENT * ALIGNMENT
+        if kv_offset != self.inner.kv_offset:
+             # This is a bit tricky, if we change meta size, we'd need to shift the KV array.
+             # For now, let's just ensure we don't exceed the original kv_offset.
+             if kv_offset > self.inner.kv_offset:
+                  raise RuntimeError("Metadata too large for the current kv_offset.")
+             kv_offset = self.inner.kv_offset # stay at the original offset
+             
+        meta_size = kv_offset - header_size
+        header_bytes = struct.pack(HEADER_FMT, MAGIC, VERSION, meta_size)
+        
+        # We need a file object for write() or use os.write
+        # Since we're in Cython, we can use POSIX write()
+        c_write(self.inner.fd, <char*>header_bytes, len(header_bytes))
+        c_write(self.inner.fd, <char*>meta_bytes, len(meta_bytes))
+        padding = meta_size - len(meta_bytes)
+        if padding > 0:
+             zeros = b'\x00' * padding
+             c_write(self.inner.fd, <char*>zeros, padding)
+        
+        # Restore position
+        lseek(self.inner.fd, current_pos, SEEK_SET)
diff --git a/tests/hashtablent_mmap_test.py b/tests/hashtablent_mmap_test.py
new file mode 100644
index 0000000..7e6d41b
--- /dev/null
+++ b/tests/hashtablent_mmap_test.py
@@ -0,0 +1,123 @@
+from collections import namedtuple
+import os
+
+import pytest
+
+from borghash import HashTableNT
+from .hashtable_test import H2
+
+key_size = 32
+value_type = namedtuple("vt", "v1 v2 v3")
+value_format_t = namedtuple("vf", "v1 v2 v3")
+value_format = value_format_t(v1="I", v2="I", v3="I")
+
+key1, value1 = b"a" * 32, value_type(11, 12, 13)
+key2, value2 = b"b" * 32, value_type(21, 22, 23)
+key3, value3 = b"c" * 32, value_type(31, 32, 33)
+
+
+def test_mmap_open_existing(tmp_path):
+    path = str(tmp_path / "test.borghash")
+    # Create and write a file
+    ht = HashTableNT(key_size=key_size, value_type=value_type, value_format=value_format)
+    ht[key1] = value1
+    ht[key2] = value2
+    ht.write(path)
+
+    # Open in mmap mode
+    ht_mmap = HashTableNT.open_mmap(path)
+    assert len(ht_mmap) == 2
+    assert ht_mmap[key1] == value1
+    assert ht_mmap[key2] == value2
+
+
+def test_mmap_persistence(tmp_path):
+    path = str(tmp_path / "test_persistence.borghash")
+    ht = HashTableNT(key_size=key_size, value_type=value_type, value_format=value_format)
+    ht[key1] = value1
+    ht.write(path)
+    
+    # Open mmap, modify, and close
+    ht_mmap = HashTableNT.open_mmap(path)
+    ht_mmap[key2] = value2
+    del ht_mmap[key1]
+    # Update header/metadata in the file
+    ht_mmap.write_header()
+    
+    # Re-open normally to verify
+    ht_read = HashTableNT.read(path)
+    assert key1 not in ht_read
+    assert ht_read[key2] == value2
+
+
+def test_mmap_resize(tmp_path):
+    path = str(tmp_path / "test_resize.borghash")
+    # Small initial capacity to trigger resize early
+    ht = HashTableNT(key_size=key_size, value_type=value_type, value_format=value_format, capacity=100)
+    ht[key1] = value1
+    ht.write(path)
+    
+    ht_mmap = HashTableNT.open_mmap(path)
+    # Add many items to trigger KV and table resize
+    for i in range(200):
+        key = H2(i)
+        ht_mmap[key] = value_type(i, i+1, i+2)
+    
+    ht_mmap.write_header()  # update used count in metadata
+    
+    assert len(ht_mmap) == 201
+    assert ht_mmap[key1] == value1
+    
+    # Close and reopen to ensure resized file is valid
+    ht_reopened = HashTableNT.open_mmap(path)
+    assert len(ht_reopened) == 201
+    assert ht_reopened[key1] == value1
+    for i in range(200):
+        key = H2(i)
+        assert ht_reopened[key] == value_type(i, i+1, i+2)
+
+
+def test_mmap_shrink_to_fit(tmp_path):
+    path = str(tmp_path / "test_shrink.borghash")
+    # Small initial_capacity so it grows
+    ht = HashTableNT(key_size=key_size, value_type=value_type, value_format=value_format, capacity=100)
+    for i in range(100):
+        ht[H2(i)] = value_type(i, 0, 0)
+    ht.write(path)
+    
+    ht_mmap = HashTableNT.open_mmap(path)
+    # Add items to trigger growth of kv_capacity beyond kv_used
+    for i in range(100, 200):
+         ht_mmap[H2(i)] = value_type(i, 0, 0)
+    
+    # After 200 items, kv_capacity > 200 (due to kv_grow_factor)
+    assert ht_mmap.inner.kv_capacity > 200
+    initial_size = os.path.getsize(path)
+
+    # shrink_to_fit should reduce file size to exactly kv_used
+    ht_mmap.inner.shrink_to_fit()
+    ht_mmap.write_header()
+    
+    shrunk_size = os.path.getsize(path)
+    assert shrunk_size < initial_size
+    assert len(ht_mmap) == 200
+
+
+def test_mmap_new_file(tmp_path):
+    # Testing using HashTableNT directly with a path to create a NEW mmapped file
+    path = str(tmp_path / "new_mmap.borghash")
+    ht = HashTableNT(key_size=key_size, value_type=value_type, value_format=value_format, path=path)
+    ht[key1] = value1
+    ht.write_header() # Initialize header for new file
+    assert os.path.exists(path)
+    
+    # Check if it persists without explicit write()
+    ht2 = HashTableNT.open_mmap(path)
+    assert ht2[key1] == value1
+
+
+def test_mmap_corrupt_magic(tmp_path):
+    path = tmp_path / "corrupt.borghash"
+    path.write_bytes(b"NOTBORG" + b"\x00" * 100)
+    with pytest.raises(ValueError, match="magic BORGHASH not found"):
+        HashTableNT.open_mmap(str(path))
diff --git a/tests/hashtablent_test.py b/tests/hashtablent_test.py
index cad2498..3a578ac 100644
--- a/tests/hashtablent_test.py
+++ b/tests/hashtablent_test.py
@@ -184,7 +184,7 @@ def test_size(ntht, n):
         ntht.write(f)
         real_size = f.tell()
     # Is our estimate good enough?
-    assert estimated_size * 0.9 < real_size < estimated_size * 1.0
+    assert estimated_size * 0.9 < real_size <= estimated_size * 1.0
 
 
 def test_demo():

From cbc849e428727c434f966f6742a356250dead4e7 Mon Sep 17 00:00:00 2001
From: Thomas Waldmann <tw@waldmann-edv.de>
Date: Sun, 5 Apr 2026 08:43:26 +0200
Subject: [PATCH 3/6] add benchmark test for mmapped HashTableNT

---
 tests/benchmark_test.py | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/tests/benchmark_test.py b/tests/benchmark_test.py
index 24e68bb..c3a3c6e 100644
--- a/tests/benchmark_test.py
+++ b/tests/benchmark_test.py
@@ -9,6 +9,9 @@
 from borghash import HashTable, HashTableNT
 from .hashtable_test import H2
 
+import tempfile
+import os
+
 VALUE_TYPE = namedtuple("value_type", "value")
 VALUE_FMT_TYPE = namedtuple("value_format", "value")
 VALUE_FMT = VALUE_FMT_TYPE("I")
@@ -41,7 +44,22 @@ def pd():  # Python dict
     return dict()
 
 
-TEST_PARAMS = [(bh, False), (bhnt, True), (pd, False), (pd, True)]
+def bhmmap():  # BorgHash HashTableNT with mmap
+    fd, path = tempfile.mkstemp(suffix=".borghash")
+    os.close(fd)
+    
+    class MappedHashTableNT(HashTableNT):
+        def __del__(self):
+            try:
+                os.remove(path)
+            except OSError:
+                pass
+
+    ht = MappedHashTableNT(key_size=KEY_SIZE, value_type=VALUE_TYPE, value_format=VALUE_FMT, path=path)
+    return ht
+
+
+TEST_PARAMS = [(bh, False), (bhnt, True), (pd, False), (pd, True), (bhmmap, True)]
 
 
 def setup(ht_class, items, fill=False, nt=False):

From 0b734eec124303ad08318f3b7a5c679c9cc6c22e Mon Sep 17 00:00:00 2001
From: Thomas Waldmann <tw@waldmann-edv.de>
Date: Sun, 5 Apr 2026 15:53:57 +0200
Subject: [PATCH 4/6] fixup: improve mmap support

---
 src/borghash/HashTable.pyx     | 12 +++++-------
 src/borghash/HashTableNT.pyx   |  4 ++++
 tests/hashtablent_mmap_test.py | 15 +++++++--------
 3 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/src/borghash/HashTable.pyx b/src/borghash/HashTable.pyx
index 0113708..19e596d 100644
--- a/src/borghash/HashTable.pyx
+++ b/src/borghash/HashTable.pyx
@@ -119,12 +119,16 @@ cdef class HashTable:
 
     def __del__(self) -> None:
         free(self.table)
+        self.table = NULL
         if self.fd != -1:
             if self.kv != NULL:
                 munmap(self.kv - self.kv_offset, self.mmap_size)
+                self.kv = NULL
             close(self.fd)
+            self.fd = -1
         else:
             free(self.kv)
+            self.kv = NULL
 
     def clear(self) -> None:
         """Empty the HashTable and start from scratch."""
@@ -313,8 +317,7 @@ cdef class HashTable:
                 # Don't shrink automatically during resize if we already have space.
                 # This prevents truncating an existing file's data when it's opened
                 # with a smaller initial_capacity than the file already contains.
-                # HOWEVER, if capacity is kv_used, we might be in shrink_to_fit.
-                # Let's allow shrinking if capacity < self.kv_capacity.
+                # HOWEVER, we MUST shrink if capacity < self.kv_capacity (e.g. shrink_to_fit).
                 if new_mmap_size <= self.mmap_size and capacity >= self.kv_capacity:
                     return
                 munmap(self.kv - self.kv_offset, self.mmap_size)
@@ -333,11 +336,6 @@ cdef class HashTable:
     def shrink_to_fit(self) -> None:
         """Shrink the KV array and the file to the actually used size."""
         self._resize_kv(self.kv_used)
-        if self.fd != -1:
-            # _resize_kv already calls ftruncate to new_mmap_size,
-            # which is kv_offset + capacity * entry_size.
-            # Here capacity is self.kv_used.
-            pass
 
     def k_to_idx(self, key: bytes) -> int:
         """
diff --git a/src/borghash/HashTableNT.pyx b/src/borghash/HashTableNT.pyx
index f9d8310..fa4cadf 100644
--- a/src/borghash/HashTableNT.pyx
+++ b/src/borghash/HashTableNT.pyx
@@ -302,6 +302,10 @@ cdef class HashTableNT:
         """Write/update the file header and metadata. Required for mmapped files."""
         if self.inner.fd == -1:
              raise RuntimeError("Not a memory-mapped HashTableNT (no path/fd).")
+
+        # Always shrink to fit before writing header
+        self.inner.shrink_to_fit()
+
         # Save current position
         cdef off_t current_pos = lseek(self.inner.fd, 0, SEEK_CUR)
         # Seek to start
diff --git a/tests/hashtablent_mmap_test.py b/tests/hashtablent_mmap_test.py
index 7e6d41b..bd4312b 100644
--- a/tests/hashtablent_mmap_test.py
+++ b/tests/hashtablent_mmap_test.py
@@ -86,21 +86,20 @@ def test_mmap_shrink_to_fit(tmp_path):
     ht.write(path)
     
     ht_mmap = HashTableNT.open_mmap(path)
-    # Add items to trigger growth of kv_capacity beyond kv_used
-    for i in range(100, 200):
+    # Add enough items to trigger KV growth
+    for i in range(100, 1000):
          ht_mmap[H2(i)] = value_type(i, 0, 0)
     
-    # After 200 items, kv_capacity > 200 (due to kv_grow_factor)
-    assert ht_mmap.inner.kv_capacity > 200
+    # After 1000 items, kv_capacity > 1000 (due to kv_grow_factor)
+    assert ht_mmap.inner.kv_capacity > 1000
     initial_size = os.path.getsize(path)
 
-    # shrink_to_fit should reduce file size to exactly kv_used
-    ht_mmap.inner.shrink_to_fit()
+    # write_header should now automatically call shrink_to_fit
     ht_mmap.write_header()
     
     shrunk_size = os.path.getsize(path)
     assert shrunk_size < initial_size
-    assert len(ht_mmap) == 200
+    assert len(ht_mmap) == 1000
 
 
 def test_mmap_new_file(tmp_path):
@@ -108,7 +107,7 @@ def test_mmap_new_file(tmp_path):
     path = str(tmp_path / "new_mmap.borghash")
     ht = HashTableNT(key_size=key_size, value_type=value_type, value_format=value_format, path=path)
     ht[key1] = value1
-    ht.write_header() # Initialize header for new file
+    ht.write_header()  # Initialize header for new file
     assert os.path.exists(path)
     
     # Check if it persists without explicit write()

From 2d9c1a528935a1f93943a92a8fde8615dddf6b2c Mon Sep 17 00:00:00 2001
From: Thomas Waldmann <tw@waldmann-edv.de>
Date: Sun, 5 Apr 2026 18:00:55 +0200
Subject: [PATCH 5/6] add failing test for mmap_delete_persistence

---
 tests/hashtablent_mmap_test.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/tests/hashtablent_mmap_test.py b/tests/hashtablent_mmap_test.py
index bd4312b..e724733 100644
--- a/tests/hashtablent_mmap_test.py
+++ b/tests/hashtablent_mmap_test.py
@@ -120,3 +120,33 @@ def test_mmap_corrupt_magic(tmp_path):
     path.write_bytes(b"NOTBORG" + b"\x00" * 100)
     with pytest.raises(ValueError, match="magic BORGHASH not found"):
         HashTableNT.open_mmap(str(path))
+
+
+def test_mmap_delete_persistence(tmp_path):
+    path = str(tmp_path / "delete_test.borghash")
+    # use a mmapped HashTableNT
+    ht = HashTableNT(key_size=key_size, value_type=value_type, value_format=value_format, path=path)
+
+    # add 100 entries
+    for i in range(100):
+        ht[H2(i)] = value_type(i, i, i)
+    assert len(ht) == 100
+
+    # delete 50 entries
+    for i in range(25, 75):
+        del ht[H2(i)]
+    assert len(ht) == 50
+
+    # flush ht to disk
+    ht.write_header()
+    del ht
+
+    # open that ht again via mmap and check if the ht size is 50
+    ht2 = HashTableNT(key_size=key_size, value_type=value_type, value_format=value_format, path=path)
+    assert len(ht2) == 50
+
+    # verification of entries
+    for i in range(50, 100):
+        assert ht2[H2(i)] == value_type(i, i, i)
+    for i in range(50):
+        assert H2(i) not in ht2

From c717cfefeaccb7a6a96fb09762a1c77f62c6dd55 Mon Sep 17 00:00:00 2001
From: Thomas Waldmann <tw@waldmann-edv.de>
Date: Fri, 1 May 2026 15:38:36 +0200
Subject: [PATCH 6/6] refactor mmap pointer handling and header format

Introduce a dedicated `header` pointer that tracks the start of the mmap
region.

Extend `HEADER_FMT` to include `capacity`, `used`, and `kv_used` directly
in the binary header (previously stored in the JSON metadata blob).

The idea is that we can easily update these values without recreating
the JSON inside the mmapped file. The JSON shall only contain stuff that
does not change and that is difficult to map to a fixed layout.

`__init__`: reconstructs hash table from KV array when opening an
existing mmap file, replacing duplicated logic in `open_mmap`

Fix `test_mmap_delete_persistence`: assertions were testing the inverse
of the actual delete range (0..24 and 75..99 kept, 25..74 deleted)
---
 src/borghash/HashTable.pxd     |   1 +
 src/borghash/HashTable.pyx     |  31 +++++---
 src/borghash/HashTableNT.pyx   | 132 +++++++++++++++++++--------------
 tests/hashtablent_mmap_test.py |  27 ++++---
 4 files changed, 112 insertions(+), 79 deletions(-)

diff --git a/src/borghash/HashTable.pxd b/src/borghash/HashTable.pxd
index d5d1841..0451520 100644
--- a/src/borghash/HashTable.pxd
+++ b/src/borghash/HashTable.pxd
@@ -9,6 +9,7 @@ cdef class HashTable:
     cdef public uint32_t kv_capacity, kv_used
     cdef float kv_grow_factor
     cdef uint8_t* kv
+    cdef uint8_t* header
     cdef int fd
     cdef size_t mmap_size
     cdef uint32_t kv_offset
diff --git a/src/borghash/HashTable.pyx b/src/borghash/HashTable.pyx
index 19e596d..fd1b8ba 100644
--- a/src/borghash/HashTable.pyx
+++ b/src/borghash/HashTable.pyx
@@ -89,17 +89,22 @@ cdef class HashTable:
         self.kv_grow_factor = kv_grow_factor
         self.kv_used = 0
         self.kv = NULL
+        self.header = NULL
         if self.fd != -1:
             # For mmap, we determine current size and capacity from file size.
             file_size = lseek(self.fd, 0, SEEK_END)
-            if file_size > self.kv_offset:  # kv array is not empty
+            if file_size > 0:
                 self.mmap_size = file_size
-                # map the full file, starting from offset 0
                 new_kv = mmap(NULL, self.mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, self.fd, 0)
                 if new_kv == <void*> -1:
                     raise OSError(errno, "mmap failed")
-                self.kv = <uint8_t*> new_kv + self.kv_offset
-                self.kv_capacity = <uint32_t>((self.mmap_size - self.kv_offset) // (self.ksize + self.vsize))
+                self.header = <uint8_t*> new_kv
+                if self.kv_offset > 0:
+                    self.kv = <uint8_t*> new_kv + self.kv_offset
+                    self.kv_capacity = <uint32_t>((self.mmap_size - self.kv_offset) // (self.ksize + self.vsize))
+                else:
+                    self.kv = NULL
+                    self.kv_capacity = 0
             else:
                 self._resize_kv(int(self.initial_capacity * self.max_load_factor))
         else:
@@ -121,8 +126,9 @@ cdef class HashTable:
         free(self.table)
         self.table = NULL
         if self.fd != -1:
-            if self.kv != NULL:
-                munmap(self.kv - self.kv_offset, self.mmap_size)
+            if self.header != NULL:
+                munmap(self.header, self.mmap_size)
+                self.header = NULL
                 self.kv = NULL
             close(self.fd)
             self.fd = -1
@@ -309,24 +315,25 @@ cdef class HashTable:
         # We must never use kv indices >= RESERVED; thus, we'll never need more capacity either.
         cdef size_t capacity = min(new_capacity, <size_t> RESERVED - 1)
         cdef size_t new_mmap_size
-        cdef void* new_kv
+        cdef void* new_ptr
         self.stats_resize_kv += 1
         if self.fd != -1:
             new_mmap_size = self.kv_offset + capacity * (self.ksize + self.vsize) * sizeof(uint8_t)
-            if self.kv != NULL:
+            if self.header != NULL:
                 # Don't shrink automatically during resize if we already have space.
                 # This prevents truncating an existing file's data when it's opened
                 # with a smaller initial_capacity than the file already contains.
                 # HOWEVER, we MUST shrink if capacity < self.kv_capacity (e.g. shrink_to_fit).
                 if new_mmap_size <= self.mmap_size and capacity >= self.kv_capacity:
                     return
-                munmap(self.kv - self.kv_offset, self.mmap_size)
+                munmap(self.header, self.mmap_size)
             if ftruncate(self.fd, new_mmap_size) == -1:
                 raise OSError(errno, "ftruncate failed")
-            new_kv = mmap(NULL, new_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, self.fd, 0)
-            if new_kv == <void*> -1:
+            new_ptr = mmap(NULL, new_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, self.fd, 0)
+            if new_ptr == <void*> -1:
                 raise OSError(errno, "mmap failed")
-            self.kv = <uint8_t*> new_kv + self.kv_offset
+            self.header = <uint8_t*> new_ptr
+            self.kv = <uint8_t*> new_ptr + self.kv_offset
             self.mmap_size = new_mmap_size
         else:
             # realloc is already highly optimized (in Linux). By using mremap internally, only the peak address space usage is "old size" + "new size", while the peak memory usage is only "new size".
diff --git a/src/borghash/HashTableNT.pyx b/src/borghash/HashTableNT.pyx
index fa4cadf..2fc50d9 100644
--- a/src/borghash/HashTableNT.pyx
+++ b/src/borghash/HashTableNT.pyx
@@ -11,13 +11,15 @@ import json
 import struct
 
 from .HashTable import HashTable, MIN_CAPACITY, _fill
+from libc.stdint cimport uint8_t
 from posix.types cimport off_t
 from posix.unistd cimport lseek, SEEK_SET, SEEK_CUR, write as c_write
+from libc.string cimport memcpy, memset
 
 MAGIC = b"BORGHASH"
 assert len(MAGIC) == 8
 VERSION = 1  # version of the on-disk (serialized) format produced by .write().
-HEADER_FMT = "<8sII"  # magic, version, meta length
+HEADER_FMT = "<8sIIIII"  # magic, version, meta length, capacity, used, kv_used
 ALIGNMENT = 64  # usual length of cache line
 
 BYTE_ORDER = dict(big=">", little="<", network="!", native="=")  # struct format chars
@@ -29,6 +31,7 @@ cdef class HashTableNT:
                  int key_size=0, value_type=None, value_format=None,
                  int capacity = MIN_CAPACITY, str byte_order="little",
                  str path = None, int kv_offset = 4096) -> None:
+        cdef int valid_count
         if not isinstance(key_size, int) or not key_size >= 4:
             raise ValueError("key_size must be an integer and >= 4.")
         if type(value_type) is not type:
@@ -49,6 +52,34 @@ cdef class HashTableNT:
         self.value_size = self.value_struct.size
         self.inner = HashTable(key_size=self.key_size, value_size=self.value_size, capacity=capacity,
                                path=path, kv_offset=kv_offset)
+        if path:
+            # Check if file already exists and has a header
+            header_size = struct.calcsize(HEADER_FMT)
+            if self.inner.mmap_size >= header_size:
+                header_bytes = (<char*>self.inner.header)[:header_size]
+                magic, version, meta_size, capacity, used, kv_used = struct.unpack(HEADER_FMT, header_bytes)
+                if magic == MAGIC:
+                    # If the file already has a header, it means it's an existing table.
+                    # We MUST use the capacity from the header to correctly restore the hash table.
+                    # Also, reset used to 0 as update_table_only will increment it.
+                    self.inner.kv_used = kv_used
+                    (<HashTable>self.inner).used = 0
+                    if capacity != self.inner.capacity:
+                         self.inner._resize_table(capacity)
+
+                    # This is an existing file with a header, let's restore the state
+                    # Populate hash table
+                    valid_count = 0
+                    for i in range(kv_used):
+                        key = self.inner.idx_to_k(i)
+                        # A deleted slot MUST have a zero key. idx_to_k returns the raw bytes.
+                        if any(key):
+                            self.inner.update_table_only(key, i)
+                            valid_count += 1
+
+                    # After populating, self.inner.used should match 'used' from header
+                    assert self.inner.used == used
+                    assert valid_count == used
         _fill(self, items)
 
     def clear(self) -> None:
@@ -180,20 +211,20 @@ cdef class HashTableNT:
             'value_format_name': self.value_format.__class__.__name__,
             'value_format_fields': self.value_format._fields,
             'value_format': self.value_format,
-            'capacity': self.inner.capacity,
-            'used': self.inner.used,  # count of valid keys / values
-            'kv_used': self.inner.kv_used, # count of slots in the kv array
         }
         meta_bytes = json.dumps(meta).encode("utf-8")
         header_size = struct.calcsize(HEADER_FMT)
         # Calculate kv_offset based on current meta_bytes length, then meta_size is everything in between.
         kv_offset = (header_size + len(meta_bytes) + ALIGNMENT - 1) // ALIGNMENT * ALIGNMENT
         meta_size = kv_offset - header_size
-        header_bytes = struct.pack(HEADER_FMT, MAGIC, VERSION, meta_size)
+
+        header_bytes = struct.pack(HEADER_FMT, MAGIC, VERSION, meta_size,
+                                   self.inner.capacity, self.inner.used, self.inner.used)
         fd.write(header_bytes)
         fd.write(meta_bytes)
         # Pad with zeros until kv_offset
         fd.write(b'\x00' * (meta_size - len(meta_bytes)))
+
         count = 0
         for i in range(self.inner.kv_used):
              try:
@@ -219,7 +250,7 @@ cdef class HashTableNT:
         header_bytes = fd.read(header_size)
         if len(header_bytes) < header_size:
             raise ValueError("Invalid file, file is too short.")
-        magic, version, meta_size = struct.unpack(HEADER_FMT, header_bytes)
+        magic, version, meta_size, capacity, used, kv_used = struct.unpack(HEADER_FMT, header_bytes)
         if magic != MAGIC:
             # Try old header format? No, we broke compatibility on purpose.
             raise ValueError("Invalid file, magic %s not found." % MAGIC.decode())
@@ -233,9 +264,8 @@ cdef class HashTableNT:
         value_format_t = namedtuple(meta['value_format_name'], meta['value_format_fields'])
         value_format = value_format_t(*meta['value_format'])
         ht = cls(key_size=meta['key_size'], value_format=value_format, value_type=value_type,
-                 capacity=meta['capacity'], byte_order=meta['byte_order'])
+                 capacity=capacity, byte_order=meta['byte_order'])
         ksize, vsize = meta['key_size'], meta['value_size']
-        kv_used = meta.get('kv_used', meta['used'])
         for i in range(kv_used):
             key = fd.read(ksize)
             value = fd.read(vsize)
@@ -247,12 +277,15 @@ cdef class HashTableNT:
     @classmethod
     def open_mmap(cls, path: str, value_type: Any = None, value_format: Any = None):
         """Open an existing borghash file in mmap mode."""
+        # We can just use the constructor, it already handles existing files with headers
+        # but we need to extract meta to get value_type/format if not provided.
+        # This is a bit redundant but stays compatible with the signature.
         with open(path, 'rb') as fd:
             header_size = struct.calcsize(HEADER_FMT)
             header_bytes = fd.read(header_size)
             if len(header_bytes) < header_size:
                 raise ValueError("Invalid file, file is too short.")
-            magic, version, meta_size = struct.unpack(HEADER_FMT, header_bytes)
+            magic, version, meta_size, capacity, used, kv_used = struct.unpack(HEADER_FMT, header_bytes)
             if magic != MAGIC:
                 raise ValueError("Invalid file, magic %s not found." % MAGIC.decode())
             if version != VERSION:
@@ -268,25 +301,10 @@ cdef class HashTableNT:
         if value_format is None:
             value_format_t = namedtuple(meta['value_format_name'], meta['value_format_fields'])
             value_format = value_format_t(*meta['value_format'])
-            
-        ht = cls(key_size=meta['key_size'], value_format=value_format, value_type=value_type,
-                 capacity=meta['capacity'], byte_order=meta['byte_order'],
-                 path=path, kv_offset=kv_offset)
 
-        # In mmap mode, we must populate the hash table (self.inner.table) from the KV array.
-        # self.inner.kv is already mapped.
-        ksize, vsize = meta['key_size'], meta['value_size']
-        kv_used = meta.get('kv_used', meta['used']) # fallback for older versions/standard write()
-        ht.inner.kv_used = kv_used
-        for i in range(kv_used):
-            try:
-                 key = ht.inner.idx_to_k(i)
-                 # A zero key means it's a deleted slot or uninitialized
-                 if any(key):
-                      ht.inner.update_table_only(key, i)
-            except KeyError:
-                 continue
-        return ht
+        return cls(key_size=meta['key_size'], value_format=value_format, value_type=value_type,
+                   capacity=capacity, byte_order=meta['byte_order'],
+                   path=path, kv_offset=kv_offset)
 
     def size(self) -> int:
         """
@@ -304,20 +322,19 @@ cdef class HashTableNT:
              raise RuntimeError("Not a memory-mapped HashTableNT (no path/fd).")
 
         # Always shrink to fit before writing header
+        # BUT only if we are in mmap mode and it was a resize that led to over-capacity.
         self.inner.shrink_to_fit()
 
-        # Save current position
-        cdef off_t current_pos = lseek(self.inner.fd, 0, SEEK_CUR)
-        # Seek to start
-        lseek(self.inner.fd, 0, SEEK_SET)
-        
+        header_size = struct.calcsize(HEADER_FMT)
+
+        # If the file is new or was just created, it might not even have space for the meta.
+        # We should ensure the metadata is also written if it's not already there.
+        # For a brand new file, kv_offset might be the initial 4096.
+
         meta = {
             'key_size': self.key_size,
             'value_size': self.value_size,
             'byte_order': self.byte_order,
-            'used': self.inner.used,
-            'kv_used': self.inner.kv_used,
-            'capacity': self.inner.capacity,
             'value_type_name': self.value_type.__name__,
             'value_type_fields': self.value_type._fields,
             'value_format_name': self.value_format.__class__.__name__,
@@ -325,26 +342,31 @@ cdef class HashTableNT:
             'value_format': self.value_format,
         }
         meta_bytes = json.dumps(meta).encode("utf-8")
-        header_size = struct.calcsize(HEADER_FMT)
         kv_offset = (header_size + len(meta_bytes) + ALIGNMENT - 1) // ALIGNMENT * ALIGNMENT
-        if kv_offset != self.inner.kv_offset:
-             # This is a bit tricky, if we change meta size, we'd need to shift the KV array.
-             # For now, let's just ensure we don't exceed the original kv_offset.
-             if kv_offset > self.inner.kv_offset:
-                  raise RuntimeError("Metadata too large for the current kv_offset.")
-             kv_offset = self.inner.kv_offset # stay at the original offset
-             
-        meta_size = kv_offset - header_size
-        header_bytes = struct.pack(HEADER_FMT, MAGIC, VERSION, meta_size)
-        
-        # We need a file object for write() or use os.write
-        # Since we're in Cython, we can use POSIX write()
-        c_write(self.inner.fd, <char*>header_bytes, len(header_bytes))
-        c_write(self.inner.fd, <char*>meta_bytes, len(meta_bytes))
+        if kv_offset > self.inner.kv_offset:
+             # This can happen if user didn't provide enough kv_offset initially for the metadata.
+             # In __init__, default kv_offset is 4096 which is usually plenty.
+             raise RuntimeError(f"Metadata too large ({len(meta_bytes)} bytes) for current kv_offset ({self.inner.kv_offset}).")
+
+        # We always use the self.inner.kv_offset that was established.
+        meta_size = self.inner.kv_offset - header_size
+        header_bytes = struct.pack(HEADER_FMT, MAGIC, VERSION, meta_size,
+                                   self.inner.capacity, self.inner.used, self.inner.kv_used)
+
+        # Ensure mmap is large enough to hold at least the header and meta
+        if self.inner.mmap_size < self.inner.kv_offset:
+             self.inner._resize_kv(0) # this will grow it to at least kv_offset
+
+        cdef HashTable inner = <HashTable>self.inner
+        cdef uint8_t* header = inner.header
+        cdef char* header_ptr = <char*>header_bytes
+        cdef char* meta_ptr = <char*>meta_bytes
+        cdef size_t h_size = header_size
+        # Update the header in the mmapped memory
+        memcpy(header, header_ptr, h_size)
+        # Also write/update the meta bytes if they are not there or to be sure.
+        memcpy(header + h_size, meta_ptr, len(meta_bytes))
+        # Zero out padding between meta and KV array
         padding = meta_size - len(meta_bytes)
         if padding > 0:
-             zeros = b'\x00' * padding
-             c_write(self.inner.fd, <char*>zeros, padding)
-        
-        # Restore position
-        lseek(self.inner.fd, current_pos, SEEK_SET)
+             memset(header + h_size + len(meta_bytes), 0, padding)
diff --git a/tests/hashtablent_mmap_test.py b/tests/hashtablent_mmap_test.py
index e724733..5112ba4 100644
--- a/tests/hashtablent_mmap_test.py
+++ b/tests/hashtablent_mmap_test.py
@@ -36,14 +36,14 @@ def test_mmap_persistence(tmp_path):
     ht = HashTableNT(key_size=key_size, value_type=value_type, value_format=value_format)
     ht[key1] = value1
     ht.write(path)
-    
+
     # Open mmap, modify, and close
     ht_mmap = HashTableNT.open_mmap(path)
     ht_mmap[key2] = value2
     del ht_mmap[key1]
     # Update header/metadata in the file
     ht_mmap.write_header()
-    
+
     # Re-open normally to verify
     ht_read = HashTableNT.read(path)
     assert key1 not in ht_read
@@ -56,18 +56,18 @@ def test_mmap_resize(tmp_path):
     ht = HashTableNT(key_size=key_size, value_type=value_type, value_format=value_format, capacity=100)
     ht[key1] = value1
     ht.write(path)
-    
+
     ht_mmap = HashTableNT.open_mmap(path)
     # Add many items to trigger KV and table resize
     for i in range(200):
         key = H2(i)
         ht_mmap[key] = value_type(i, i+1, i+2)
-    
+
     ht_mmap.write_header()  # update used count in metadata
-    
+
     assert len(ht_mmap) == 201
     assert ht_mmap[key1] == value1
-    
+
     # Close and reopen to ensure resized file is valid
     ht_reopened = HashTableNT.open_mmap(path)
     assert len(ht_reopened) == 201
@@ -84,19 +84,19 @@ def test_mmap_shrink_to_fit(tmp_path):
     for i in range(100):
         ht[H2(i)] = value_type(i, 0, 0)
     ht.write(path)
-    
+
     ht_mmap = HashTableNT.open_mmap(path)
     # Add enough items to trigger KV growth
     for i in range(100, 1000):
          ht_mmap[H2(i)] = value_type(i, 0, 0)
-    
+
     # After 1000 items, kv_capacity > 1000 (due to kv_grow_factor)
     assert ht_mmap.inner.kv_capacity > 1000
     initial_size = os.path.getsize(path)
 
     # write_header should now automatically call shrink_to_fit
     ht_mmap.write_header()
-    
+
     shrunk_size = os.path.getsize(path)
     assert shrunk_size < initial_size
     assert len(ht_mmap) == 1000
@@ -109,7 +109,7 @@ def test_mmap_new_file(tmp_path):
     ht[key1] = value1
     ht.write_header()  # Initialize header for new file
     assert os.path.exists(path)
-    
+
     # Check if it persists without explicit write()
     ht2 = HashTableNT.open_mmap(path)
     assert ht2[key1] == value1
@@ -146,7 +146,10 @@ def test_mmap_delete_persistence(tmp_path):
     assert len(ht2) == 50
 
     # verification of entries
-    for i in range(50, 100):
+    # 0..24 were NOT deleted, 25..74 WERE deleted, 75..99 were NOT deleted
+    for i in range(25):
         assert ht2[H2(i)] == value_type(i, i, i)
-    for i in range(50):
+    for i in range(25, 75):
         assert H2(i) not in ht2
+    for i in range(75, 100):
+        assert ht2[H2(i)] == value_type(i, i, i)