ml-explore · abnormal749 · May 28, 2026
diff --git a/mlx_lm/utils.py b/mlx_lm/utils.py
@@ -939,7 +939,7 @@ def save(
         hf_repo = None
 
     dst_path = Path(dst_path)
-    save_model(dst_path, model, donate_model=True)
+    save_model(dst_path, model, donate_model=donate_model)
     save_config(config, config_path=dst_path / "config.json")
     tokenizer.save_pretrained(dst_path)
 

diff --git a/tests/test_gguf.py b/tests/test_gguf.py
@@ -1,11 +1,15 @@
 import os
+import sys
 import tempfile
 import unittest
+from contextlib import ExitStack
 from pathlib import Path
 from unittest.mock import MagicMock, patch
 
 import mlx.core as mx
+import mlx.nn as nn
 
+import mlx_lm.fuse as fuse
 from mlx_lm.gguf import convert_to_gguf
 
 
@@ -58,5 +62,58 @@ def test_convert_to_gguf(
         self.assertEqual(called_args[0], output_file_path)
 
 
+class TestFuseGGUFExport(unittest.TestCase):
+    def test_export_gguf_receives_preserved_model_weights(self):
+        class TinyTokenizer:
+            def save_pretrained(self, path):
+                Path(path, "tokenizer_config.json").write_text("{}\n")
+
+        model = nn.Linear(2, 2, bias=False)
+        mx.eval(model.parameters())
+
+        recorded = {}
+
+        def fake_convert_to_gguf(save_path, weights, config, output_path):
+            recorded["weights"] = {k: tuple(v.shape) for k, v in weights.items()}
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            source_path = Path(tmpdir) / "source_model"
+            source_path.mkdir()
+            save_path = Path(tmpdir) / "fused_model"
+            argv = [
+                "mlx_lm.fuse",
+                "--model",
+                str(source_path),
+                "--save-path",
+                str(save_path),
+                "--export-gguf",
+            ]
+
+            with ExitStack() as stack:
+                stack.enter_context(patch.object(sys, "argv", argv))
+                stack.enter_context(
+                    patch.object(
+                        fuse,
+                        "load",
+                        return_value=(
+                            model,
+                            TinyTokenizer(),
+                            {"model_type": "llama"},
+                        ),
+                    )
+                )
+                stack.enter_context(patch("mlx_lm.utils.create_model_card"))
+                stack.enter_context(
+                    patch.object(
+                        fuse,
+                        "convert_to_gguf",
+                        side_effect=fake_convert_to_gguf,
+                    )
+                )
+                fuse.main()
+
+        self.assertEqual(recorded["weights"], {"weight": (2, 2)})
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -4,6 +4,7 @@
 import tempfile
 import unittest
 from pathlib import Path
+from unittest.mock import patch
 
 import mlx.core as mx
 import mlx.nn as nn
@@ -59,6 +60,48 @@ def test_make_shards(self):
         shards = utils.make_shards(dict(weights), 1)
         self.assertTrue(gb <= len(shards) <= gb + 1)
 
+    def test_save_respects_donate_model(self):
+        class TinyTokenizer:
+            def save_pretrained(self, path):
+                Path(path, "tokenizer_config.json").write_text("{}\n")
+
+        def make_model():
+            model = nn.Linear(2, 2, bias=False)
+            mx.eval(model.parameters())
+            return model
+
+        src_path = Path(self.test_dir) / "source_model"
+        src_path.mkdir(exist_ok=True)
+
+        with patch("mlx_lm.utils.create_model_card"):
+            model = make_model()
+            utils.save(
+                Path(self.test_dir) / "preserved_model",
+                src_path,
+                model,
+                TinyTokenizer(),
+                {"model_type": "tiny"},
+                donate_model=False,
+            )
+            self.assertEqual(
+                {k: tuple(v.shape) for k, v in tree_flatten(model.parameters())},
+                {"weight": (2, 2)},
+            )
+
+            model = make_model()
+            utils.save(
+                Path(self.test_dir) / "donated_model",
+                src_path,
+                model,
+                TinyTokenizer(),
+                {"model_type": "tiny"},
+                donate_model=True,
+            )
+            self.assertEqual(
+                {k: tuple(v.shape) for k, v in tree_flatten(model.parameters())},
+                {"weight": (0,)},
+            )
+
     def test_quantize(self):
         from mlx_lm.models import llama