diff --git a/CHANGELOG.md b/CHANGELOG.md index fced16a..999d888 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,17 @@ Versioning follows [Semantic Versioning](https://semver.org/). ## [Unreleased] ### Added +- **LoRA Foundation** (v0.5, part 1 of 2). Pure-Swift Core layer + for HuggingFace LoRA adapter discovery — no engine integration + yet, no UI. `LocalAdapter` value type holds adapter metadata + parsed from PEFT's `adapter_config.json` (`base_model_name_or_path`, + `r`, `lora_alpha`, `target_modules`, `peft_type`). `AdapterStore` + actor scans `~/.mac-mlx/adapters//` for directories that + contain both `adapter_config.json` and `adapter_model.safetensors` + — best-effort, malformed configs / missing weights silently drop. + 10 new unit tests cover decode + scan paths. Engine application + of adapter weights via `LoRATrain.convert` + `loadLoRAWeights` + and the parameters-inspector picker land in v0.5 part 2. - **Prompt cache tiering** (v0.4.0 engine parity, part 1 of 3). Successive chat turns on the same model now reuse the KV cache when the new prompt extends the previous one — the shared prefix diff --git a/MacMLXCore/Sources/MacMLXCore/Managers/AdapterStore.swift b/MacMLXCore/Sources/MacMLXCore/Managers/AdapterStore.swift new file mode 100644 index 0000000..f52dc97 --- /dev/null +++ b/MacMLXCore/Sources/MacMLXCore/Managers/AdapterStore.swift @@ -0,0 +1,54 @@ +import Foundation + +/// Scans `~/.mac-mlx/adapters//` for PEFT-format LoRA adapters. +/// +/// Mirrors `ModelLibraryManager` shape but for adapters: a directory +/// is recognised when it contains both `adapter_config.json` and +/// `adapter_model.safetensors`. Bad / unreadable configs silently +/// drop — the scan must not blow up because of one malformed +/// directory. +public actor AdapterStore { + private let fileManager: FileManager + + public init(fileManager: FileManager = .default) { + self.fileManager = fileManager + } + + /// Enumerate adapters under `directory`. Sorted by `name` + /// (case-insensitive locale compare) for stable UI rendering. + public func scan(_ directory: URL) async throws -> [LocalAdapter] { + // If the adapters directory hasn't been created yet, treat as + // empty — the user simply hasn't downloaded any adapters. The + // GUI is responsible for offering to create the directory. + guard fileManager.fileExists(atPath: directory.path) else { return [] } + + let contents = try fileManager.contentsOfDirectory( + at: directory, + includingPropertiesForKeys: [.isDirectoryKey], + options: [.skipsHiddenFiles] + ) + + var results: [LocalAdapter] = [] + for url in contents { + guard try url.resourceValues(forKeys: [.isDirectoryKey]).isDirectory == true, + !url.lastPathComponent.hasPrefix(".") else { continue } + + let configURL = url.appendingPathComponent("adapter_config.json") + let weightsURL = url.appendingPathComponent("adapter_model.safetensors") + guard fileManager.fileExists(atPath: configURL.path), + fileManager.fileExists(atPath: weightsURL.path), + let data = try? Data(contentsOf: configURL), + let cfg = try? JSONDecoder().decode(LocalAdapter.PEFTConfig.self, from: data) + else { continue } + + results.append(LocalAdapter( + name: url.lastPathComponent, + directory: url, + targetModel: cfg.baseModelNameOrPath, + rank: cfg.r, + targetModules: cfg.targetModules ?? [] + )) + } + return results.sorted { $0.name.localizedCompare($1.name) == .orderedAscending } + } +} diff --git a/MacMLXCore/Sources/MacMLXCore/Models/LocalAdapter.swift b/MacMLXCore/Sources/MacMLXCore/Models/LocalAdapter.swift new file mode 100644 index 0000000..812d4c5 --- /dev/null +++ b/MacMLXCore/Sources/MacMLXCore/Models/LocalAdapter.swift @@ -0,0 +1,73 @@ +import Foundation + +/// One LoRA adapter directory present on the local filesystem. +/// +/// Discovered by `AdapterStore.scan(_:)` via the presence of +/// `adapter_config.json` + `adapter_model.safetensors` (PEFT format). +/// `targetModel` is advisory — the engine layer applies the adapter +/// regardless and surfaces a clear typed error if the dimensions +/// don't fit the loaded base model. +public struct LocalAdapter: Codable, Hashable, Identifiable, Sendable { + public var id: String { name } + public let name: String + public let directory: URL + /// Base-model id from the adapter's config (e.g. + /// `mlx-community/Qwen3-8B-4bit`). Optional — older adapters + /// don't always carry it. + public let targetModel: String? + /// LoRA rank (`r` in PEFT config). Nil if absent / unparseable. + public let rank: Int? + /// Names of the linear layers the adapter touches (e.g. + /// `["q_proj", "v_proj"]`). Empty array if absent. + public let targetModules: [String] + + public init( + name: String, + directory: URL, + targetModel: String?, + rank: Int?, + targetModules: [String] + ) { + self.name = name + self.directory = directory + self.targetModel = targetModel + self.rank = rank + self.targetModules = targetModules + } + + /// On-disk PEFT `adapter_config.json` shape. + /// + /// Exposed publicly so `AdapterStore` and tests can decode it + /// without re-deriving the schema. Mirrors the subset of HF PEFT + /// fields we currently consume — extend when we start honouring + /// `lora_dropout`, `bias`, etc. + public struct PEFTConfig: Codable, Hashable, Sendable { + public let baseModelNameOrPath: String? + public let r: Int? + public let loraAlpha: Int? + public let targetModules: [String]? + public let peftType: String? + + public init( + baseModelNameOrPath: String?, + r: Int?, + loraAlpha: Int?, + targetModules: [String]?, + peftType: String? + ) { + self.baseModelNameOrPath = baseModelNameOrPath + self.r = r + self.loraAlpha = loraAlpha + self.targetModules = targetModules + self.peftType = peftType + } + + private enum CodingKeys: String, CodingKey { + case baseModelNameOrPath = "base_model_name_or_path" + case r + case loraAlpha = "lora_alpha" + case targetModules = "target_modules" + case peftType = "peft_type" + } + } +} diff --git a/MacMLXCore/Tests/MacMLXCoreTests/Managers/AdapterStoreTests.swift b/MacMLXCore/Tests/MacMLXCoreTests/Managers/AdapterStoreTests.swift new file mode 100644 index 0000000..8b79e1a --- /dev/null +++ b/MacMLXCore/Tests/MacMLXCoreTests/Managers/AdapterStoreTests.swift @@ -0,0 +1,121 @@ +import Testing +import Foundation +@testable import MacMLXCore + +/// Filesystem-backed: serialised so swift-testing's parallel executor +/// doesn't thrash on the temp directory (same rationale as the v0.4.1 +/// VLM detection suite). +@Suite("AdapterStore", .serialized) +struct AdapterStoreTests { + + @Test + func scanFindsAdapterWithPEFTConfig() async throws { + let temp = try TempDir() + try writeAdapter( + in: temp.url, + name: "qwen3-medical", + targetModel: "mlx-community/Qwen3-8B-4bit", + r: 8 + ) + let store = AdapterStore() + let found = try await store.scan(temp.url) + #expect(found.count == 1) + #expect(found[0].name == "qwen3-medical") + #expect(found[0].rank == 8) + #expect(found[0].targetModel == "mlx-community/Qwen3-8B-4bit") + #expect(found[0].targetModules == ["q_proj", "v_proj"]) + } + + @Test + func scanIgnoresDirsWithoutAdapterConfig() async throws { + let temp = try TempDir() + let stray = temp.url.appendingPathComponent("not-an-adapter") + try FileManager.default.createDirectory(at: stray, withIntermediateDirectories: true) + let store = AdapterStore() + let found = try await store.scan(temp.url) + #expect(found.isEmpty) + } + + @Test + func scanRequiresAdapterModelSafetensors() async throws { + // Has config but no safetensors → not a usable adapter. + let temp = try TempDir() + let dir = temp.url.appendingPathComponent("config-only") + try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true) + try Data(#"{"r":8,"target_modules":["q_proj"]}"#.utf8) + .write(to: dir.appendingPathComponent("adapter_config.json")) + let store = AdapterStore() + let found = try await store.scan(temp.url) + #expect(found.isEmpty) + } + + @Test + func scanIgnoresMalformedConfigJSON() async throws { + let temp = try TempDir() + let dir = temp.url.appendingPathComponent("malformed") + try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true) + try Data("{not json".utf8) + .write(to: dir.appendingPathComponent("adapter_config.json")) + try Data().write(to: dir.appendingPathComponent("adapter_model.safetensors")) + let store = AdapterStore() + let found = try await store.scan(temp.url) + #expect(found.isEmpty) + } + + @Test + func scanReturnsEmptyForMissingDirectory() async throws { + let temp = try TempDir() + let neverExisted = temp.url.appendingPathComponent("does-not-exist") + let store = AdapterStore() + let found = try await store.scan(neverExisted) + #expect(found.isEmpty) + } + + @Test + func scanSortsAdaptersByName() async throws { + let temp = try TempDir() + try writeAdapter(in: temp.url, name: "zeta", targetModel: nil, r: 4) + try writeAdapter(in: temp.url, name: "alpha", targetModel: nil, r: 4) + try writeAdapter(in: temp.url, name: "mu", targetModel: nil, r: 4) + let store = AdapterStore() + let found = try await store.scan(temp.url) + #expect(found.map(\.name) == ["alpha", "mu", "zeta"]) + } + + private func writeAdapter(in root: URL, name: String, targetModel: String?, r: Int) throws { + let dir = root.appendingPathComponent(name) + try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true) + let cfg: String + if let targetModel { + cfg = """ + { + "base_model_name_or_path": "\(targetModel)", + "r": \(r), + "lora_alpha": \(r * 2), + "target_modules": ["q_proj", "v_proj"], + "peft_type": "LORA" + } + """ + } else { + cfg = """ + { + "r": \(r), + "target_modules": ["q_proj", "v_proj"], + "peft_type": "LORA" + } + """ + } + try Data(cfg.utf8).write(to: dir.appendingPathComponent("adapter_config.json")) + try Data().write(to: dir.appendingPathComponent("adapter_model.safetensors")) + } +} + +private struct TempDir { + let url: URL + init() throws { + let base = FileManager.default.temporaryDirectory + .appendingPathComponent("macmlx-adapter-tests-\(UUID().uuidString)", isDirectory: true) + try FileManager.default.createDirectory(at: base, withIntermediateDirectories: true) + self.url = base + } +} diff --git a/MacMLXCore/Tests/MacMLXCoreTests/Models/LocalAdapterTests.swift b/MacMLXCore/Tests/MacMLXCoreTests/Models/LocalAdapterTests.swift new file mode 100644 index 0000000..c377557 --- /dev/null +++ b/MacMLXCore/Tests/MacMLXCoreTests/Models/LocalAdapterTests.swift @@ -0,0 +1,63 @@ +import Testing +import Foundation +@testable import MacMLXCore + +@Suite("LocalAdapter") +struct LocalAdapterTests { + + @Test + func decodesPEFTAdapterConfig() throws { + let json = """ + { + "base_model_name_or_path": "mlx-community/Qwen3-8B-4bit", + "r": 8, + "lora_alpha": 16, + "target_modules": ["q_proj", "v_proj"], + "peft_type": "LORA" + } + """ + let cfg = try JSONDecoder().decode(LocalAdapter.PEFTConfig.self, from: Data(json.utf8)) + #expect(cfg.baseModelNameOrPath == "mlx-community/Qwen3-8B-4bit") + #expect(cfg.r == 8) + #expect(cfg.loraAlpha == 16) + #expect(cfg.targetModules == ["q_proj", "v_proj"]) + #expect(cfg.peftType == "LORA") + } + + @Test + func decodesAdapterConfigMissingOptionalFields() throws { + let json = #"{"r": 4}"# + let cfg = try JSONDecoder().decode(LocalAdapter.PEFTConfig.self, from: Data(json.utf8)) + #expect(cfg.r == 4) + #expect(cfg.baseModelNameOrPath == nil) + #expect(cfg.loraAlpha == nil) + #expect(cfg.targetModules == nil) + #expect(cfg.peftType == nil) + } + + @Test + func roundTripsThroughJSON() throws { + let original = LocalAdapter( + name: "qwen3-medical-lora", + directory: URL(fileURLWithPath: "/tmp/medical"), + targetModel: "mlx-community/Qwen3-8B-4bit", + rank: 8, + targetModules: ["q_proj", "v_proj"] + ) + let data = try JSONEncoder().encode(original) + let back = try JSONDecoder().decode(LocalAdapter.self, from: data) + #expect(back == original) + } + + @Test + func idMirrorsName() { + let a = LocalAdapter( + name: "x", + directory: URL(fileURLWithPath: "/tmp/x"), + targetModel: nil, + rank: nil, + targetModules: [] + ) + #expect(a.id == "x") + } +} diff --git a/docs/superpowers/plans/2026-05-10-v0.5.md b/docs/superpowers/plans/2026-05-10-v0.5.md new file mode 100644 index 0000000..89dcb3e --- /dev/null +++ b/docs/superpowers/plans/2026-05-10-v0.5.md @@ -0,0 +1,403 @@ +# v0.5 — LoRA adapters + MCP client (+ continuous batching watch) + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Two independently-shippable feature tracks for v0.5 plus a +status-tracking note on continuous batching. + +1. **LoRA adapter inference** — drop a HuggingFace LoRA adapter (PEFT + format) into `~/.mac-mlx/adapters//`, the GUI lists it, the + user picks it from the parameters inspector, the engine applies it + at load time, and chat / API responses use the adapted weights. No + training UI — strictly inference-only. + +2. **MCP client** — counterpart to v0.4.0's MCP server. Configure + external MCP servers in `~/.mac-mlx/mcp.json` (mirrors + `claude_desktop_config.json`); chat models tool-call through them + when the assistant emits OpenAI-style `tool_calls`. Brings macMLX + into parity with Claude Desktop / Cursor as an MCP host. + +3. **Continuous batching** — *not committed* this minor. Apple still + hasn't ported `BatchGenerator` / `BatchKVCache` from `mlx-lm` + Python to `mlx-swift-lm`. Plan task §C tracks the upstream issue + and lays out the wrapper-vs-fork decision tree so we can move fast + when it lands. + +**Architecture summary:** +- LoRA: pure-Swift extension to `MLXSwiftEngine.load(_:)` — accepts + an optional `adapter:` parameter, runs `LoRATrain.convert(model:layers:)` + + `LoRATrain.loadLoRAWeights(model:url:)` from `mlx-swift-lm`'s + existing public API. Adapter format: PEFT-compatible + (`adapter_config.json` + `adapter_model.safetensors`). New + `AdapterStore` actor scans `~/.mac-mlx/adapters/` the same way + `ModelLibraryManager` scans `~/.mac-mlx/models/`. +- MCP client: new `MCPClientPool` actor in `MacMLXCore`, owns a + dictionary `[serverName: Client]` keyed by config-file entries. + Chat completions surface a `tools: [Tool]` field built from the + union of every connected server's `listTools()`. When the model + emits `tool_calls` in its response, route each call to the + matching server via `Client.callTool(name:arguments:)` and + inject the result back into the conversation as a tool-role + message. Reuses the `MCPBridge` pattern that already shipped for + the server side. +- Two PRs per track, four total. + +**Tech stack:** +- `mlx-swift-lm` 3.31.x — already pinned. Adds nothing new. +- `modelcontextprotocol/swift-sdk` 0.12.x — already pulled into + `macmlx-cli`. Promote to `MacMLXCore` for the client side. + +**Status:** Plan is firm; v0.4.1 has just shipped (PRs #33 / #34 / +#35). v0.5 is the next minor. + +--- + +## Track A — LoRA adapter inference + +### File structure (adapters) + +| File | What it owns | +|---|---| +| `MacMLXCore/Sources/MacMLXCore/Models/LocalAdapter.swift` | new — `LocalAdapter` value type (`name`, `directory`, `targetModel: String?`, `rank: Int?`, `targetModules: [String]`) decoded from `adapter_config.json` | +| `MacMLXCore/Sources/MacMLXCore/Managers/AdapterStore.swift` | new — `actor AdapterStore` scans `~/.mac-mlx/adapters/`, returns `[LocalAdapter]` sorted by name | +| `MacMLXCore/Sources/MacMLXCore/Engine/InferenceEngine.swift` | extend `load(_:)` signature to `load(_ model: LocalModel, adapter: LocalAdapter? = nil)` (default keeps every existing call site working) | +| `MacMLXCore/Sources/MacMLXCore/Engine/MLXSwiftEngine.swift` | apply LoRA after the base model loads: `LoRATrain.convert(model:layers:)` + `LoRATrain.loadLoRAWeights(model:url:)` | +| `MacMLXCore/Sources/MacMLXCore/Models/ModelParameters.swift` | extend with `adapterName: String?` so the per-model parameters inspector remembers the user's pick | +| `macMLX/macMLX/Views/Settings/AdaptersSection.swift` | new — Settings tab section listing local adapters (helper for users to verify the directory scan) | +| `macMLX/macMLX/Views/Chat/ParametersInspector.swift` | extend — adapter picker below the system-prompt field, gated on `coordinator.currentModel` not being `.mlxVLM` for the MVP | +| Tests (Foundation PR): `MacMLXCore/Tests/MacMLXCoreTests/Models/LocalAdapterTests.swift`, `MacMLXCore/Tests/MacMLXCoreTests/Managers/AdapterStoreTests.swift` | new — config decoding + filesystem scan | + +### A.1 Foundation PR (this branch) + +Pure-Swift Core layer: data model + adapter directory scan. **No +engine integration, no UI, no LoRA application yet**. Ships +independently. + +- [ ] **Step 1: Write the failing tests for `LocalAdapter`** + +```swift +@Suite("LocalAdapter") +struct LocalAdapterTests { + @Test + func decodesPEFTAdapterConfig() throws { + let json = """ + { + "base_model_name_or_path": "mlx-community/Qwen3-8B-4bit", + "r": 8, + "lora_alpha": 16, + "target_modules": ["q_proj", "v_proj"], + "peft_type": "LORA" + } + """ + let cfg = try JSONDecoder().decode(LocalAdapter.PEFTConfig.self, from: Data(json.utf8)) + #expect(cfg.baseModelNameOrPath == "mlx-community/Qwen3-8B-4bit") + #expect(cfg.r == 8) + #expect(cfg.targetModules == ["q_proj", "v_proj"]) + } + + @Test + func roundTripsThroughJSON() throws { + let original = LocalAdapter( + name: "qwen3-medical-lora", + directory: URL(fileURLWithPath: "/tmp/medical"), + targetModel: "mlx-community/Qwen3-8B-4bit", + rank: 8, + targetModules: ["q_proj", "v_proj"] + ) + let data = try JSONEncoder().encode(original) + let back = try JSONDecoder().decode(LocalAdapter.self, from: data) + #expect(back == original) + } +} +``` + +- [ ] **Step 2: Run — verify failure** + +```bash +swift test --package-path MacMLXCore --filter LocalAdapter +``` +Expected: FAIL — type undefined. + +- [ ] **Step 3: Write `LocalAdapter`** + +```swift +import Foundation + +/// One LoRA adapter directory present on the local filesystem. +/// +/// Discovered by `AdapterStore.scan(_:)` via `adapter_config.json` + +/// `adapter_model.safetensors`. Targets a single base model (advisory +/// — the user can apply mismatched adapters at their own risk; the +/// engine layer surfaces a clear error if the dimensions don't fit). +public struct LocalAdapter: Codable, Hashable, Identifiable, Sendable { + public var id: String { name } + public let name: String + public let directory: URL + /// Base-model id from the adapter's config (e.g. + /// `mlx-community/Qwen3-8B-4bit`). Optional — older adapters + /// don't always carry it. + public let targetModel: String? + public let rank: Int? + public let targetModules: [String] + + public init( + name: String, + directory: URL, + targetModel: String?, + rank: Int?, + targetModules: [String] + ) { + self.name = name + self.directory = directory + self.targetModel = targetModel + self.rank = rank + self.targetModules = targetModules + } + + /// PEFT `adapter_config.json` shape — exposed for + /// `AdapterStore` to decode without re-deriving the schema. + public struct PEFTConfig: Codable, Sendable { + public let baseModelNameOrPath: String? + public let r: Int? + public let loraAlpha: Int? + public let targetModules: [String]? + public let peftType: String? + + private enum CodingKeys: String, CodingKey { + case baseModelNameOrPath = "base_model_name_or_path" + case r + case loraAlpha = "lora_alpha" + case targetModules = "target_modules" + case peftType = "peft_type" + } + } +} +``` + +- [ ] **Step 4: Write `AdapterStore` tests** + +```swift +@Suite("AdapterStore", .serialized) +struct AdapterStoreTests { + @Test + func scanFindsAdapterWithPEFTConfig() async throws { + let temp = try TempDir() + try writeAdapter(in: temp.url, name: "qwen3-medical", targetModel: "mlx-community/Qwen3-8B-4bit", r: 8) + let store = AdapterStore() + let found = try await store.scan(temp.url) + #expect(found.count == 1) + #expect(found[0].name == "qwen3-medical") + #expect(found[0].rank == 8) + } + + @Test + func scanIgnoresDirsWithoutAdapterConfig() async throws { + let temp = try TempDir() + let stray = temp.url.appendingPathComponent("not-an-adapter") + try FileManager.default.createDirectory(at: stray, withIntermediateDirectories: true) + let store = AdapterStore() + let found = try await store.scan(temp.url) + #expect(found.isEmpty) + } + + @Test + func scanRequiresAdapterModelSafetensors() async throws { + // Has config but no safetensors weights → not a usable adapter. + let temp = try TempDir() + let dir = temp.url.appendingPathComponent("config-only") + try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true) + try Data(#"{"r":8,"target_modules":["q_proj"]}"#.utf8) + .write(to: dir.appendingPathComponent("adapter_config.json")) + let store = AdapterStore() + let found = try await store.scan(temp.url) + #expect(found.isEmpty) + } + + private func writeAdapter(in root: URL, name: String, targetModel: String, r: Int) throws { + let dir = root.appendingPathComponent(name) + try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true) + let cfg = """ + { + "base_model_name_or_path": "\(targetModel)", + "r": \(r), + "lora_alpha": \(r * 2), + "target_modules": ["q_proj", "v_proj"], + "peft_type": "LORA" + } + """ + try Data(cfg.utf8).write(to: dir.appendingPathComponent("adapter_config.json")) + try Data().write(to: dir.appendingPathComponent("adapter_model.safetensors")) + } +} +``` + +- [ ] **Step 5: Write `AdapterStore`** + +```swift +import Foundation + +/// Scans `~/.mac-mlx/adapters//` for PEFT-format LoRA adapters. +public actor AdapterStore { + private let fileManager: FileManager + + public init(fileManager: FileManager = .default) { + self.fileManager = fileManager + } + + public func scan(_ directory: URL) async throws -> [LocalAdapter] { + let contents = try fileManager.contentsOfDirectory( + at: directory, + includingPropertiesForKeys: [.isDirectoryKey], + options: [.skipsHiddenFiles] + ) + + var results: [LocalAdapter] = [] + for url in contents { + guard try url.resourceValues(forKeys: [.isDirectoryKey]).isDirectory == true, + !url.lastPathComponent.hasPrefix(".") else { continue } + + let configURL = url.appendingPathComponent("adapter_config.json") + let weightsURL = url.appendingPathComponent("adapter_model.safetensors") + guard fileManager.fileExists(atPath: configURL.path), + fileManager.fileExists(atPath: weightsURL.path), + let data = try? Data(contentsOf: configURL), + let cfg = try? JSONDecoder().decode(LocalAdapter.PEFTConfig.self, from: data) + else { continue } + + results.append(LocalAdapter( + name: url.lastPathComponent, + directory: url, + targetModel: cfg.baseModelNameOrPath, + rank: cfg.r, + targetModules: cfg.targetModules ?? [] + )) + } + return results.sorted { $0.name.localizedCompare($1.name) == .orderedAscending } + } +} +``` + +- [ ] **Step 6: Run full Core suite** + +```bash +swift test --package-path MacMLXCore +``` +Expected: all green; ~5 new tests added (2 LocalAdapter + 3 AdapterStore). + +- [ ] **Step 7: CHANGELOG entry under [Unreleased]** + +```markdown +- **LoRA Foundation** (v0.5, part 1 of 2). Pure-Swift Core layer + for HuggingFace LoRA adapter discovery — no engine integration + yet, no UI. `LocalAdapter` value type and `AdapterStore` actor + scan `~/.mac-mlx/adapters//` for PEFT-format adapters + (adapter_config.json + adapter_model.safetensors). Engine + application of adapter weights, parameters-inspector picker, + and the Adapters settings section follow in v0.5 part 2. +``` + +- [ ] **Step 8: Open PR `feat/v0.5-lora-foundation`** + +### A.2 Engine + UI PR (next session) + +Apply the LoRA adapter at load time, surface picker UI, persist +adapter selection per model. + +- [ ] Extend `InferenceEngine.load(_:)` to `load(_ model: LocalModel, adapter: LocalAdapter? = nil)` with default `nil` so the protocol stays backward-compatible. +- [ ] In `MLXSwiftEngine`, after the base `LLMModelFactory.shared.loadContainer(...)` call, if an adapter was passed: + ```swift + await container.perform { context in + LoRATrain.convert(model: context.model, layers: Array(context.model.loraLinearLayers().suffix(adapter.targetModuleCount))) + try LoRATrain.loadLoRAWeights(model: context.model, url: adapter.directory.appendingPathComponent("adapter_model.safetensors")) + } + ``` +- [ ] Surface mismatched-base errors gracefully — `LoRATrain.loadLoRAWeights` throws a typed shape mismatch; wrap as `EngineError.loraMismatch(adapter:expected:got:)`. +- [ ] `ModelParameters.adapterName: String?` extension + new field in the parameters inspector. Reload-on-change wiring already exists for system-prompt; mirror it for `adapterName`. +- [ ] `EngineCoordinator.load(_:)` becomes `load(_ model:, adapter:)` and `AppState.bootstrap` re-runs adapter resolution from `paramStore` for the rehydrated model. +- [ ] Settings → "Adapters" section listing detected adapters (sym-linked from the picker for verification). +- [ ] Test plan: a Metal-gated smoke test against `mlx-community/Qwen3-1.7B-4bit` + a small sample adapter from HF (~50 MB). + +--- + +## Track B — MCP client + +### File structure (mcp client) + +| File | What it owns | +|---|---| +| `MacMLXCore/Sources/MacMLXCore/MCP/MCPClientConfig.swift` | new — `MCPClientConfig` Codable struct mirrors `claude_desktop_config.json`'s `mcpServers` shape: `[name: ServerEntry]` where `ServerEntry { command: String, args: [String], env: [String:String]? }` | +| `MacMLXCore/Sources/MacMLXCore/MCP/MCPClientPool.swift` | new — `actor MCPClientPool` owns `[String: Client]`. Spawns subprocesses + `StdioTransport` per entry. `connectAll()` / `listAllTools()` / `callTool(server:name:arguments:)` | +| `MacMLXCore/Sources/MacMLXCore/Server/HummingbirdServer.swift` | extend chat-completions handler — when `request.tools` is provided, mirror them; when the response contains `tool_calls`, route each one through `MCPClientPool.callTool` and inject the result into a follow-up generation turn | +| `macMLX/macMLX/Views/Settings/MCPSettingsSection.swift` | new — Settings tab section showing configured MCP servers + connection status + "Add server…" sheet that writes back to `~/.mac-mlx/mcp.json` | +| `macMLX/macMLX/Views/Chat/ChatMessageView.swift` | extend — render tool-role messages distinctly (collapsible "Called `.`" block, like the existing `` block) | +| Tests | unit tests against the SDK's stdio transport using the official `everything` server as a fixture | + +### B.1 Config + Pool PR + +Connection lifecycle + tool listing. **No chat-side tool-call +routing yet** — that lands in B.2. + +- [ ] Plan stub for `MCPClientConfig` + `MCPClientPool`. +- [ ] `MCPClientPool.connectAll()` reads `~/.mac-mlx/mcp.json`, spawns each entry as a subprocess via `Process` + `Pipe`, hands the file descriptors to a `StdioTransport`, and stores the connected `Client` in the pool dict. +- [ ] `MCPClientPool.listAllTools() async throws -> [String: [Tool]]` returns server-name → tools map. +- [ ] Settings → "MCP Servers" section: list configured servers + connection status dot. Add / remove buttons write back to `~/.mac-mlx/mcp.json`. +- [ ] Tests: mock `Transport` impl that scripts an `Initialize` + `ListTools` round-trip without spawning a real subprocess. + +### B.2 Tool-call routing PR + +- [ ] Extend `HummingbirdServer.handleChatCompletions` to inject discovered tools into the engine prompt (via the chat template's `tools=` mechanism — `Tokenizers.applyChatTemplate` already supports it). +- [ ] When the response contains a `tool_calls` field (OpenAI shape), parse each, route through the pool, append the tool result as a new `tool`-role message, and re-run generation. Loop with a max-depth cap (default 5). +- [ ] `ChatMessageView` renders tool-role messages as a collapsible "Called `.`(args=...) → " block. +- [ ] Tests: end-to-end against the SDK's `everything` example server (which exposes `add`, `echo`, etc.). + +--- + +## Track C — Continuous batching watch (no implementation) + +This minor does *not* ship batched inference. The Swift-side +`mlx-swift-lm` API is still single-request. Scope here is to keep +the door open without committing any code. + +- [ ] **Step 1: File an upstream tracking issue** + +```text +Title: Port BatchGenerator + BatchKVCache from Python mlx-lm to mlx-swift-lm +Body: Cite https://github.com/ml-explore/mlx-lm/pull/941 (KV quant + batching) + and https://github.com/ml-explore/mlx-lm/pull/1101 (memory-aware + BatchQuantizedKVCache). Note that the attention kernel + (MLXFast.scaledDotProductAttention) already accepts a leading + batch dim — what's missing is the iterator + cache plumbing. +``` + +- [ ] **Step 2: README roadmap update** — change the v0.5 line in the README to "(continuous batching deferred — upstream tracking #N)" so users know it slipped. + +- [ ] **Step 3: Decision tree (no code change yet):** + - If Apple ships a `BatchGenerator` in `mlx-swift-lm` ≤ 6.x.x by then, + we wrap it in v0.5.x — scheduler + token budget + per-sequence + demux → ~300 LOC of pure-Swift plumbing. + - If not, we revisit in v0.6 / v0.7 and decide between (a) waiting + longer or (b) carrying a Llama / Qwen-only batched fork (~2 weeks + + ongoing merge pain). + +--- + +## Self-review + +- ✅ LoRA: inference-only, no training UI, uses upstream public API +- ✅ MCP client: configuration-file format mirrors Claude Desktop — + zero learning curve for users, and existing MCP servers in the + ecosystem all work +- ✅ Type consistency: `LocalAdapter.targetModel` matches the field + name on `MCPClientConfig.ServerEntry.command` +- ✅ Backwards compat: `InferenceEngine.load(_:)` default param keeps + every existing call site (CLI / GUI / HTTP server) compiling +- ✅ Continuous batching honest about its blocker — no over-promise + +## Execution Handoff + +Two-track release. Tracks are independent — Track A and Track B +can land in either order. Track C is a documentation pass; do it +during whichever track ships first. + +Foundation PRs (A.1, B.1) are small and shippable inline. Engine / +UI PRs (A.2, B.2) are larger and benefit from a dedicated session. diff --git a/docs/superpowers/plans/2026-05-10-v0.6.md b/docs/superpowers/plans/2026-05-10-v0.6.md new file mode 100644 index 0000000..05bccee --- /dev/null +++ b/docs/superpowers/plans/2026-05-10-v0.6.md @@ -0,0 +1,146 @@ +# v0.6 — Speech I/O via DePasqualeOrg/mlx-swift-audio + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add MLX-native speech I/O — push-to-talk in chat (STT) and +optional auto-speak of assistant replies (TTS). Replaces the +WhisperKit + AVSpeechSynthesizer plan from the v0.3 roadmap because +[`DePasqualeOrg/mlx-swift-audio`](https://github.com/DePasqualeOrg/mlx-swift-audio) +landed and gives us both directions in one MLX-native package. + +**Architecture summary:** +- New SPM dependency: `DePasqualeOrg/mlx-swift-audio`, pinned to a + commit SHA (the upstream README warns "expect breaking changes" — + do not pin to `main`). Import only the `MLXAudio` core product; + skip `MLXAudio_Kokoro` because it transitively pulls GPL-3 + espeak-ng and would propagate GPL-3 to macMLX. +- STT: `MLXAudio.Whisper` (multilingual default) + `MLXAudio.FunASR` + (Chinese-strong, optional). Push-to-talk mic button in + `ChatInputView`. Auto-stop on 1 s silence using `AVAudioEngine`'s + level meter. +- TTS: default to `MLXAudio.Marvis` (streaming so first audio + comes back fast). Optional `MLXAudio.Chatterbox` for voice + cloning from a user-recorded reference clip. CosyVoice 2 / 3 + available behind a Settings checkbox. +- New `~/.mac-mlx/audio/` for: cached STT model (~3 GB Whisper + large-v3), cached TTS model, optional voice-clone reference + WAV files. +- Two PRs: STT (mic input) and TTS (read aloud) — independent. + +**Tech stack:** +- `mlx-swift-audio` — pinned by commit SHA. MIT, ~120★, 5/9 push. + Active enough that breaking changes between commits are real; + CI re-pins weekly via a smoke-test job. +- `AVFoundation` for mic capture + level metering + playback. +- macMLX core stays unchanged for engine plumbing — STT and TTS + are GUI-side modalities; the engine treats their output / input + as plain text. + +**Status:** Planning only. v0.5 (LoRA + MCP client) is the +preceding minor and should land first. Consider v0.6 after a v0.5.x +patch has settled. + +--- + +## Track A — STT (mic input in chat) + +### Files + +| File | What it owns | +|---|---| +| `MacMLXCore/Package.swift` | add `mlx-swift-audio` package + link `MLXAudio` to `MacMLXCore` target | +| `MacMLXCore/Sources/MacMLXCore/Audio/STTService.swift` | new — `actor STTService` wrapping `MLXAudio.Whisper`. Loads model on first call; `transcribe(_:locale:) async throws -> String` consumes a WAV/PCM buffer; cancellable | +| `macMLX/macMLX/Audio/MicCaptureSession.swift` | new — `AVAudioEngine`-backed mic recorder; level meter for silence detection; emits `Data` when stopped | +| `macMLX/macMLX/Views/Chat/ChatInputView.swift` | extend — push-to-talk mic button next to the paperclip; tap to start, tap to stop, hold-to-talk variant; auto-stop after 1 s silence; transcription drops into the text field | +| `macMLX/macMLX/Views/Settings/AudioSettingsSection.swift` | new — model picker (Whisper-small / medium / large-v3 / Fun-ASR), language preference, auto-stop sensitivity slider | + +### A.1 SPM wiring + STTService skeleton (PR) + +- [ ] Add the SPM dep, pinned to a commit SHA. Example: + ```swift + .package(url: "https://github.com/DePasqualeOrg/mlx-swift-audio.git", + revision: ""), + ``` +- [ ] Link only the `MLXAudio` product (skip `Kokoro` to avoid GPL-3). +- [ ] `STTService` skeleton: stores a lazy `Whisper` instance + a `currentTask: Task` for cancellation. `transcribe(_:locale:)` throws if the model isn't loaded. `loadModel(_:)` resolves the cached safetensors at `~/.mac-mlx/audio/whisper//`. +- [ ] CI smoke job that re-resolves SPM weekly so we notice upstream breakage early. + +### A.2 Mic UI + chat integration (PR) + +- [ ] `MicCaptureSession` records via `AVAudioEngine` at 16 kHz mono PCM (Whisper's expected input). Level meter every 50 ms; auto-stop when RMS stays below threshold for ≥ 1 s. +- [ ] Add a microphone button to `ChatInputView` (between paperclip and text field). Tap toggles record. While recording, show a pulsing red dot + waveform. +- [ ] On stop, hand the buffer to `STTService.transcribe(_:)`, set the resulting text into `inputText`, do NOT auto-send (user reviews + edits before sending). +- [ ] Permission flow: first record asks for mic permission; deny shows a one-shot inline help row pointing at System Settings → Privacy. +- [ ] Tests: `MicCaptureSession` is platform-bound (AVAudioEngine), so unit-test the silence-detector against synthetic level samples; integration leave for manual QA. + +--- + +## Track B — TTS (read assistant replies aloud) + +### Files + +| File | What it owns | +|---|---| +| `MacMLXCore/Sources/MacMLXCore/Audio/TTSService.swift` | new — `actor TTSService` wrapping `MLXAudio.Marvis` for streaming. `speak(_ text: String, voice: String?) -> AsyncStream` yields PCM buffers as they synthesise | +| `macMLX/macMLX/Audio/SpeechPlayer.swift` | new — `AVAudioPlayerNode`-based player that consumes the `AsyncStream` from `TTSService` | +| `macMLX/macMLX/Views/Chat/ChatMessageView.swift` | extend — speaker button on each assistant bubble; tap plays / stops | +| `macMLX/macMLX/Views/Settings/AudioSettingsSection.swift` | extend (from Track A) — TTS model picker (Marvis / Chatterbox / CosyVoice 2), default voice, auto-speak toggle, voice-clone reference upload | +| Voice clone storage: `~/.mac-mlx/audio/voices/.wav` | + +### B.1 Streaming TTS service (PR) + +- [ ] `TTSService.loadModel(_:)` resolves cached weights at `~/.mac-mlx/audio/tts//`. +- [ ] `speak(_:voice:)` returns `AsyncStream` of 16-bit PCM buffers. Cancellation via the consuming task. +- [ ] Tests: mock model that emits 5 fixed buffers — verify cancellation truncates output. + +### B.2 Player + speaker buttons (PR) + +- [ ] `SpeechPlayer` schedules buffers on an `AVAudioPlayerNode`. Bus rate matches the model output (24 kHz for Marvis). +- [ ] Speaker button on `ChatMessageView` (assistant role only). Idle = "speaker.wave.2", playing = "speaker.wave.3.fill" pulsing. +- [ ] Auto-speak setting: when on, every completed assistant reply auto-plays. When user starts speaking (mic activates), pause TTS. + +--- + +## Risks + +- **Upstream stability** — `mlx-swift-audio` README warns "expect + breaking changes." Mitigate with the weekly CI re-resolve job + (Track A.1) and a `Audio/Internal/` adapter so all model-specific + details live behind `STTService` / `TTSService` interfaces. +- **espeak-ng C-library / GPL-3** — explicitly skip the `Kokoro` + product; verify the `MLXAudio` core product doesn't transitively + pull espeak-ng (re-check on first SPM resolve). +- **Model sizes** — Whisper large-v3 is ~3 GB. Default to + `whisper-small` or `whisper-medium`; surface the size to the + user in Settings. +- **DMG packaging** — audio xcframeworks may inflate the .app size. + Compare before / after on the v0.6 release branch and tighten + `.dmg-postprocess` if needed. +- **Microphone permissions UX** — the first record is the only + chance to capture the user-friendly explanation; pre-record a + one-shot "we need mic access for push-to-talk" dialog. + +## ⚠️ Do NOT use `Adamiito0909/mlx-swift-audio` + +A user surfaced this repo during the v0.4 roadmap pass — it's a +copycat of DePasqualeOrg's package with a malware-drop pattern +(README aggressively links a `.zip` deep inside an Xcode workspace +folder). Don't add it as a dependency. Use the legitimate +`DePasqualeOrg/mlx-swift-audio` exclusively. Already documented in +`docs/roadmap-post-v0.3.6.md`. + +## Self-review + +- ✅ STT and TTS are independently shippable (two PRs each, four + total; tracks share only the SPM dep + Settings section) +- ✅ License-clean (skip Kokoro) +- ✅ Doesn't touch `MLXSwiftEngine` — speech is GUI-side modality +- ✅ Backwards compat: every existing chat path stays text-only by + default + +## Execution Handoff + +Pick this up after v0.5 (LoRA + MCP client) ships. Track A (STT) is +the higher-leverage entry; "speak to your local LLM" is more novel +than "have the local LLM speak back." Land A.1 + A.2 first, then +B.1 + B.2 if appetite remains.