From 6cd1324d2677a787a899d25a65e70995ffcf5813 Mon Sep 17 00:00:00 2001
From: James Dumay <jameswdumay@gmail.com>
Date: Fri, 17 Apr 2026 18:25:59 +1000
Subject: [PATCH] Fix multi-shard moe-split tensor counting

---
 tools/moe-split/moe-split.cpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/tools/moe-split/moe-split.cpp b/tools/moe-split/moe-split.cpp
index 4ac2c6240010..913a4725a3a0 100644
--- a/tools/moe-split/moe-split.cpp
+++ b/tools/moe-split/moe-split.cpp
@@ -496,12 +496,13 @@ static void write_group(
         }
     }
 
-    // Create ggml context for ALL tensors (trunk + experts).
-    // Previously used n_expert_tensors which underallocated for models where the
-    // total tensor count far exceeds the expert tensor count (e.g. GLM-5.1 has
-    // 1809 total tensors but only ~237 expert tensors), causing SIGABRT at the
-    // last block when the context overflows.
-    size_t ctx_size = (size_t)(n_tensors + 64) * ggml_tensor_overhead() + 4096;
+    // Create ggml context for ALL tensors (trunk + experts) across every input
+    // shard. The multi-shard path no longer has a single ctx_in tensor count.
+    int n_tensors_total = 0;
+    for (const auto & input_shard : input_shards) {
+        n_tensors_total += gguf_get_n_tensors(input_shard.ctx_gguf);
+    }
+    size_t ctx_size = (size_t)(n_tensors_total + 64) * ggml_tensor_overhead() + 4096;
     struct ggml_init_params ctx_params = {
         /*.mem_size   =*/ ctx_size,
         /*.mem_buffer =*/ nullptr,