Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion mlx_lm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -939,7 +939,7 @@ def save(
hf_repo = None

dst_path = Path(dst_path)
save_model(dst_path, model, donate_model=True)
save_model(dst_path, model, donate_model=donate_model)
save_config(config, config_path=dst_path / "config.json")
tokenizer.save_pretrained(dst_path)

Expand Down
57 changes: 57 additions & 0 deletions tests/test_gguf.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
import os
import sys
import tempfile
import unittest
from contextlib import ExitStack
from pathlib import Path
from unittest.mock import MagicMock, patch

import mlx.core as mx
import mlx.nn as nn

import mlx_lm.fuse as fuse
from mlx_lm.gguf import convert_to_gguf


Expand Down Expand Up @@ -58,5 +62,58 @@ def test_convert_to_gguf(
self.assertEqual(called_args[0], output_file_path)


class TestFuseGGUFExport(unittest.TestCase):
def test_export_gguf_receives_preserved_model_weights(self):
class TinyTokenizer:
def save_pretrained(self, path):
Path(path, "tokenizer_config.json").write_text("{}\n")

model = nn.Linear(2, 2, bias=False)
mx.eval(model.parameters())

recorded = {}

def fake_convert_to_gguf(save_path, weights, config, output_path):
recorded["weights"] = {k: tuple(v.shape) for k, v in weights.items()}

with tempfile.TemporaryDirectory() as tmpdir:
source_path = Path(tmpdir) / "source_model"
source_path.mkdir()
save_path = Path(tmpdir) / "fused_model"
argv = [
"mlx_lm.fuse",
"--model",
str(source_path),
"--save-path",
str(save_path),
"--export-gguf",
]

with ExitStack() as stack:
stack.enter_context(patch.object(sys, "argv", argv))
stack.enter_context(
patch.object(
fuse,
"load",
return_value=(
model,
TinyTokenizer(),
{"model_type": "llama"},
),
)
)
stack.enter_context(patch("mlx_lm.utils.create_model_card"))
stack.enter_context(
patch.object(
fuse,
"convert_to_gguf",
side_effect=fake_convert_to_gguf,
)
)
fuse.main()

self.assertEqual(recorded["weights"], {"weight": (2, 2)})


if __name__ == "__main__":
unittest.main()
43 changes: 43 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import tempfile
import unittest
from pathlib import Path
from unittest.mock import patch

import mlx.core as mx
import mlx.nn as nn
Expand Down Expand Up @@ -59,6 +60,48 @@ def test_make_shards(self):
shards = utils.make_shards(dict(weights), 1)
self.assertTrue(gb <= len(shards) <= gb + 1)

def test_save_respects_donate_model(self):
class TinyTokenizer:
def save_pretrained(self, path):
Path(path, "tokenizer_config.json").write_text("{}\n")

def make_model():
model = nn.Linear(2, 2, bias=False)
mx.eval(model.parameters())
return model

src_path = Path(self.test_dir) / "source_model"
src_path.mkdir(exist_ok=True)

with patch("mlx_lm.utils.create_model_card"):
model = make_model()
utils.save(
Path(self.test_dir) / "preserved_model",
src_path,
model,
TinyTokenizer(),
{"model_type": "tiny"},
donate_model=False,
)
self.assertEqual(
{k: tuple(v.shape) for k, v in tree_flatten(model.parameters())},
{"weight": (2, 2)},
)

model = make_model()
utils.save(
Path(self.test_dir) / "donated_model",
src_path,
model,
TinyTokenizer(),
{"model_type": "tiny"},
donate_model=True,
)
self.assertEqual(
{k: tuple(v.shape) for k, v in tree_flatten(model.parameters())},
{"weight": (0,)},
)

def test_quantize(self):
from mlx_lm.models import llama

Expand Down