Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@ Versioning follows [Semantic Versioning](https://semver.org/).
## [Unreleased]

### Added
- **Settings audio fields** (v0.6 audio foundation). Schema-only —
no runtime audio yet, just persistence so the v0.6 STT / TTS
feature work has settled storage to talk to. Five new keys with
audio-off defaults: `audioEnabled`, `sttModel`, `ttsModel`,
`ttsVoice`, `ttsAutoSpeak`. Backwards-compatible decode: pre-v0.6
`~/.mac-mlx/settings.json` files load unchanged (every new key
decodes via `decodeIfPresent` and falls back to "audio off"). 3
new tests cover defaults / round-trip / legacy-JSON decode.
- **MCP Client Config** (v0.5 MCP track, part 1 of 2). Pure-Swift
data layer for connecting macMLX to external MCP servers (mirror
of v0.4.0's MCP server role, but reversed: now we *are* the host
Expand Down
58 changes: 56 additions & 2 deletions MacMLXCore/Sources/MacMLXCore/Managers/SettingsManager.swift
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,32 @@ public struct Settings: Codable, Equatable, Sendable {
/// for small-memory Macs.
public var maxResidentMemoryGB: Int

// MARK: - Speech I/O (v0.6+)

/// Master toggle for speech features — `false` keeps mic capture
/// + TTS playback completely off, mirrors the v0.6 first-run UX.
public var audioEnabled: Bool

/// Identifier of the STT model to load on demand
/// (e.g. `whisper-small`, `whisper-medium`, `whisper-large-v3`,
/// `fun-asr`). Nil means "user hasn't picked one" — the chat
/// input's mic button surfaces a one-shot picker on first use.
public var sttModel: String?

/// Identifier of the TTS model
/// (e.g. `marvis`, `chatterbox`, `cosyvoice2`). Nil = no TTS
/// model picked.
public var ttsModel: String?

/// Voice id passed to the TTS model. Voice cloning works by
/// pointing this at a `~/.mac-mlx/audio/voices/<name>.wav`
/// reference clip. Nil = use the model's default voice.
public var ttsVoice: String?

/// Auto-speak completed assistant replies. False (default) keeps
/// playback opt-in via the per-bubble speaker button.
public var ttsAutoSpeak: Bool

// MARK: Factory

/// Sensible out-of-the-box defaults — used when no settings file exists.
Expand All @@ -89,7 +115,12 @@ public struct Settings: Codable, Equatable, Sendable {
hfEndpoint: "https://huggingface.co",
kvCacheHotMB: 512,
kvCacheColdGB: 20,
maxResidentMemoryGB: max(4, Int(MemoryProbe.totalMemoryGB()) / 2)
maxResidentMemoryGB: max(4, Int(MemoryProbe.totalMemoryGB()) / 2),
audioEnabled: false,
sttModel: nil,
ttsModel: nil,
ttsVoice: nil,
ttsAutoSpeak: false
)

// MARK: Init
Expand All @@ -108,7 +139,12 @@ public struct Settings: Codable, Equatable, Sendable {
hfEndpoint: String = "https://huggingface.co",
kvCacheHotMB: Int = 512,
kvCacheColdGB: Int = 20,
maxResidentMemoryGB: Int = max(4, Int(MemoryProbe.totalMemoryGB()) / 2)
maxResidentMemoryGB: Int = max(4, Int(MemoryProbe.totalMemoryGB()) / 2),
audioEnabled: Bool = false,
sttModel: String? = nil,
ttsModel: String? = nil,
ttsVoice: String? = nil,
ttsAutoSpeak: Bool = false
) {
self.modelDirectory = modelDirectory
self.preferredEngine = preferredEngine
Expand All @@ -124,6 +160,11 @@ public struct Settings: Codable, Equatable, Sendable {
self.kvCacheHotMB = kvCacheHotMB
self.kvCacheColdGB = kvCacheColdGB
self.maxResidentMemoryGB = maxResidentMemoryGB
self.audioEnabled = audioEnabled
self.sttModel = sttModel
self.ttsModel = ttsModel
self.ttsVoice = ttsVoice
self.ttsAutoSpeak = ttsAutoSpeak
}

// MARK: - Codable (backward-compat decode)
Expand All @@ -146,6 +187,11 @@ public struct Settings: Codable, Equatable, Sendable {
case kvCacheHotMB
case kvCacheColdGB
case maxResidentMemoryGB
case audioEnabled
case sttModel
case ttsModel
case ttsVoice
case ttsAutoSpeak
}

public init(from decoder: Decoder) throws {
Expand All @@ -167,6 +213,14 @@ public struct Settings: Codable, Equatable, Sendable {
self.maxResidentMemoryGB =
(try c.decodeIfPresent(Int.self, forKey: .maxResidentMemoryGB))
?? max(4, Int(MemoryProbe.totalMemoryGB()) / 2)
// v0.6 audio fields — pre-v0.6 settings.json files don't carry
// them. Fall back to "audio off" so existing installs upgrade
// without surprise mic permission prompts.
self.audioEnabled = try c.decodeIfPresent(Bool.self, forKey: .audioEnabled) ?? false
self.sttModel = try c.decodeIfPresent(String.self, forKey: .sttModel)
self.ttsModel = try c.decodeIfPresent(String.self, forKey: .ttsModel)
self.ttsVoice = try c.decodeIfPresent(String.self, forKey: .ttsVoice)
self.ttsAutoSpeak = try c.decodeIfPresent(Bool.self, forKey: .ttsAutoSpeak) ?? false
}
}

Expand Down
62 changes: 62 additions & 0 deletions MacMLXCore/Tests/MacMLXCoreTests/Managers/SettingsAudioTests.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import Testing
import Foundation
@testable import MacMLXCore

@Suite("Settings audio fields (v0.6)")
struct SettingsAudioTests {

@Test
func defaultSettingsHaveAudioOffAndNoModelsPicked() {
let s = Settings.default
#expect(s.audioEnabled == false)
#expect(s.sttModel == nil)
#expect(s.ttsModel == nil)
#expect(s.ttsVoice == nil)
#expect(s.ttsAutoSpeak == false)
}

@Test
func roundTripsThroughJSON() throws {
var s = Settings.default
s.audioEnabled = true
s.sttModel = "whisper-medium"
s.ttsModel = "marvis"
s.ttsVoice = "voices/clone-kevin.wav"
s.ttsAutoSpeak = true

let data = try JSONEncoder().encode(s)
let back = try JSONDecoder().decode(Settings.self, from: data)
#expect(back.audioEnabled == true)
#expect(back.sttModel == "whisper-medium")
#expect(back.ttsModel == "marvis")
#expect(back.ttsVoice == "voices/clone-kevin.wav")
#expect(back.ttsAutoSpeak == true)
}

/// Pre-v0.6 settings.json files don't carry any of the audio
/// keys — the decoder must default to "audio off" so existing
/// installs upgrade without surprise.
@Test
func legacyJSONWithoutAudioKeysDecodesWithAudioOff() throws {
let legacy = """
{
"modelDirectory": "file:///tmp/models",
"preferredEngine": "mlx-swift-lm",
"serverPort": 8000,
"autoStartServer": false,
"lastLoadedModel": null,
"onboardingComplete": true,
"pythonPath": null,
"swiftLMPath": null,
"sparkleUpdateChannel": "release",
"logRetentionDays": 7
}
"""
let decoded = try JSONDecoder().decode(Settings.self, from: Data(legacy.utf8))
#expect(decoded.audioEnabled == false)
#expect(decoded.sttModel == nil)
#expect(decoded.ttsModel == nil)
#expect(decoded.ttsVoice == nil)
#expect(decoded.ttsAutoSpeak == false)
}
}