diff --git a/mlx_lm/utils.py b/mlx_lm/utils.py index ef3d266b9..919f15b19 100644 --- a/mlx_lm/utils.py +++ b/mlx_lm/utils.py @@ -939,7 +939,7 @@ def save( hf_repo = None dst_path = Path(dst_path) - save_model(dst_path, model, donate_model=True) + save_model(dst_path, model, donate_model=donate_model) save_config(config, config_path=dst_path / "config.json") tokenizer.save_pretrained(dst_path) diff --git a/tests/test_gguf.py b/tests/test_gguf.py index f7e789a00..8f6c1f009 100644 --- a/tests/test_gguf.py +++ b/tests/test_gguf.py @@ -1,11 +1,15 @@ import os +import sys import tempfile import unittest +from contextlib import ExitStack from pathlib import Path from unittest.mock import MagicMock, patch import mlx.core as mx +import mlx.nn as nn +import mlx_lm.fuse as fuse from mlx_lm.gguf import convert_to_gguf @@ -58,5 +62,58 @@ def test_convert_to_gguf( self.assertEqual(called_args[0], output_file_path) +class TestFuseGGUFExport(unittest.TestCase): + def test_export_gguf_receives_preserved_model_weights(self): + class TinyTokenizer: + def save_pretrained(self, path): + Path(path, "tokenizer_config.json").write_text("{}\n") + + model = nn.Linear(2, 2, bias=False) + mx.eval(model.parameters()) + + recorded = {} + + def fake_convert_to_gguf(save_path, weights, config, output_path): + recorded["weights"] = {k: tuple(v.shape) for k, v in weights.items()} + + with tempfile.TemporaryDirectory() as tmpdir: + source_path = Path(tmpdir) / "source_model" + source_path.mkdir() + save_path = Path(tmpdir) / "fused_model" + argv = [ + "mlx_lm.fuse", + "--model", + str(source_path), + "--save-path", + str(save_path), + "--export-gguf", + ] + + with ExitStack() as stack: + stack.enter_context(patch.object(sys, "argv", argv)) + stack.enter_context( + patch.object( + fuse, + "load", + return_value=( + model, + TinyTokenizer(), + {"model_type": "llama"}, + ), + ) + ) + stack.enter_context(patch("mlx_lm.utils.create_model_card")) + stack.enter_context( + patch.object( + fuse, + "convert_to_gguf", + side_effect=fake_convert_to_gguf, + ) + ) + fuse.main() + + self.assertEqual(recorded["weights"], {"weight": (2, 2)}) + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_utils.py b/tests/test_utils.py index 88b68fe33..f99327843 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -4,6 +4,7 @@ import tempfile import unittest from pathlib import Path +from unittest.mock import patch import mlx.core as mx import mlx.nn as nn @@ -59,6 +60,48 @@ def test_make_shards(self): shards = utils.make_shards(dict(weights), 1) self.assertTrue(gb <= len(shards) <= gb + 1) + def test_save_respects_donate_model(self): + class TinyTokenizer: + def save_pretrained(self, path): + Path(path, "tokenizer_config.json").write_text("{}\n") + + def make_model(): + model = nn.Linear(2, 2, bias=False) + mx.eval(model.parameters()) + return model + + src_path = Path(self.test_dir) / "source_model" + src_path.mkdir(exist_ok=True) + + with patch("mlx_lm.utils.create_model_card"): + model = make_model() + utils.save( + Path(self.test_dir) / "preserved_model", + src_path, + model, + TinyTokenizer(), + {"model_type": "tiny"}, + donate_model=False, + ) + self.assertEqual( + {k: tuple(v.shape) for k, v in tree_flatten(model.parameters())}, + {"weight": (2, 2)}, + ) + + model = make_model() + utils.save( + Path(self.test_dir) / "donated_model", + src_path, + model, + TinyTokenizer(), + {"model_type": "tiny"}, + donate_model=True, + ) + self.assertEqual( + {k: tuple(v.shape) for k, v in tree_flatten(model.parameters())}, + {"weight": (0,)}, + ) + def test_quantize(self): from mlx_lm.models import llama