Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions Shared/Sources/Shared/Audio/WAVEncoder.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import Foundation

/// 将原始 PCM 数据封装为 WAV 文件格式(内存中)
/// WAV = 44 字节 RIFF 头 + 原始 PCM 数据,零转码开销
public enum WAVEncoder {
/// 将 PCM 数据编码为 WAV
/// - Parameters:
/// - pcmData: 原始 PCM 音频数据(例如 S16LE)
/// - sampleRate: 采样率(例如 16000)
/// - channels: 声道数(例如 1 = 单声道)
/// - bitsPerSample: 每个采样的位数(例如 16)
/// - Returns: 包含 RIFF/WAV 头的完整 WAV 文件数据
public static func encode(pcmData: Data, sampleRate: Int, channels: Int, bitsPerSample: Int) -> Data {
let byteRate = sampleRate * channels * (bitsPerSample / 8)
let blockAlign = channels * (bitsPerSample / 8)
let dataSize = UInt32(pcmData.count)
let fileSize = UInt32(36) + dataSize

var header = Data(capacity: 44)

// RIFF chunk descriptor
header.append(contentsOf: [0x52, 0x49, 0x46, 0x46]) // "RIFF"
header.appendLittleEndian(fileSize)
header.append(contentsOf: [0x57, 0x41, 0x56, 0x45]) // "WAVE"

// fmt sub-chunk
header.append(contentsOf: [0x66, 0x6D, 0x74, 0x20]) // "fmt "
header.appendLittleEndian(UInt32(16)) // sub-chunk size
header.appendLittleEndian(UInt16(1)) // PCM format
header.appendLittleEndian(UInt16(channels))
header.appendLittleEndian(UInt32(sampleRate))
header.appendLittleEndian(UInt32(byteRate))
header.appendLittleEndian(UInt16(blockAlign))
header.appendLittleEndian(UInt16(bitsPerSample))

// data sub-chunk
header.append(contentsOf: [0x64, 0x61, 0x74, 0x61]) // "data"
header.appendLittleEndian(dataSize)

return header + pcmData
}
}

// MARK: - Data helpers for little-endian encoding

private extension Data {
mutating func appendLittleEndian(_ value: UInt16) {
var v = value.littleEndian
Swift.withUnsafeBytes(of: &v) { append(contentsOf: $0) }
}

mutating func appendLittleEndian(_ value: UInt32) {
var v = value.littleEndian
Swift.withUnsafeBytes(of: &v) { append(contentsOf: $0) }
}
}
140 changes: 128 additions & 12 deletions Shared/Sources/Shared/Core/Storage/APIKeyStorage.swift
Original file line number Diff line number Diff line change
@@ -1,24 +1,81 @@
import Foundation

/// 存储火山引擎语音识别 API Key 和配置(使用 UserDefaults)
/// vLLM API 调用模式
public enum VLLMApiMode: String, CaseIterable, Identifiable, Sendable {
/// 标准 OpenAI Audio Transcriptions 接口 (POST /audio/transcriptions, multipart/form-data)
case audioTranscriptions = "audio_transcriptions"
/// Chat Completions 接口 (POST /chat/completions, base64 audio data URL)
case chatCompletions = "chat_completions"

public var id: String { rawValue }

public var displayName: String {
switch self {
case .audioTranscriptions: return "Audio Transcriptions (标准)"
case .chatCompletions: return "Chat Completions"
}
}
}

/// 语音识别服务提供商
public enum SpeechProvider: String, CaseIterable, Identifiable, Sendable {
case volcEngine = "volcengine"
case vllm = "vllm"

public var id: String { rawValue }

public var displayName: String {
switch self {
case .volcEngine: return "火山引擎豆包"
case .vllm: return "vLLM (OpenAI 兼容)"
}
}
}

/// 存储语音识别 API 配置(使用 UserDefaults)
@MainActor
public final class APIKeyStorage: Sendable {
public static let shared = APIKeyStorage()

private let defaults = UserDefaults.standard

private enum Keys {
// 通用
static let provider = "speech.provider"
// 火山引擎
static let apiKey = "volcengine.api.key"
static let resourceId = "volcengine.resource.id"
// vLLM
static let vllmBaseURL = "vllm.base.url"
static let vllmModelName = "vllm.model.name"
static let vllmAPIKey = "vllm.api.key"
static let vllmApiMode = "vllm.api.mode"
}

public init() {}


// MARK: - Provider 选择

public var selectedProvider: SpeechProvider {
get {
guard let rawValue = defaults.string(forKey: Keys.provider),
let value = SpeechProvider(rawValue: rawValue) else {
return .volcEngine
}
return value
}
set {
defaults.set(newValue.rawValue, forKey: Keys.provider)
}
}

// MARK: - 火山引擎配置

public var apiKey: String? {
get { defaults.string(forKey: Keys.apiKey) }
set { defaults.set(newValue, forKey: Keys.apiKey) }
}

/// 资源 ID(模型版本)
public var resourceId: VolcEngineResourceId {
get {
Expand All @@ -32,22 +89,81 @@ public final class APIKeyStorage: Sendable {
defaults.set(newValue.rawValue, forKey: Keys.resourceId)
}
}


// MARK: - vLLM 配置

public var vllmBaseURL: String {
get { defaults.string(forKey: Keys.vllmBaseURL) ?? "http://localhost:8000/v1" }
set { defaults.set(newValue, forKey: Keys.vllmBaseURL) }
}

public var vllmModelName: String {
get { defaults.string(forKey: Keys.vllmModelName) ?? "openai/whisper-large-v3" }
set { defaults.set(newValue, forKey: Keys.vllmModelName) }
}

public var vllmAPIKey: String? {
get { defaults.string(forKey: Keys.vllmAPIKey) }
set { defaults.set(newValue, forKey: Keys.vllmAPIKey) }
}

public var vllmApiMode: VLLMApiMode {
get {
guard let rawValue = defaults.string(forKey: Keys.vllmApiMode),
let value = VLLMApiMode(rawValue: rawValue) else {
return .audioTranscriptions
}
return value
}
set {
defaults.set(newValue.rawValue, forKey: Keys.vllmApiMode)
}
}

// MARK: - 状态检查

public var isConfigured: Bool {
guard let apiKey else { return false }
return !apiKey.isEmpty
switch selectedProvider {
case .volcEngine:
guard let apiKey else { return false }
return !apiKey.isEmpty
case .vllm:
return !vllmBaseURL.isEmpty && !vllmModelName.isEmpty
}
}


// MARK: - 保存

public func save(apiKey: String) {
self.apiKey = apiKey
}

public func save(resourceId: VolcEngineResourceId) {
self.resourceId = resourceId
}


public func save(vllmBaseURL: String) {
self.vllmBaseURL = vllmBaseURL
}

public func save(vllmModelName: String) {
self.vllmModelName = vllmModelName
}

public func save(vllmAPIKey: String?) {
self.vllmAPIKey = vllmAPIKey
}

public func save(vllmApiMode: VLLMApiMode) {
self.vllmApiMode = vllmApiMode
}

public func clear() {
defaults.removeObject(forKey: Keys.apiKey)
defaults.removeObject(forKey: Keys.resourceId)
defaults.removeObject(forKey: Keys.vllmBaseURL)
defaults.removeObject(forKey: Keys.vllmModelName)
defaults.removeObject(forKey: Keys.vllmAPIKey)
defaults.removeObject(forKey: Keys.vllmApiMode)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import Foundation

/// 轻量级 multipart/form-data 编码器
/// 用于构建 HTTP 文件上传请求体,无需第三方依赖
struct MultipartFormData {
private let boundary: String
private var body = Data()

/// Content-Type header 值(含 boundary)
var contentType: String {
"multipart/form-data; boundary=\(boundary)"
}

init(boundary: String = UUID().uuidString) {
self.boundary = boundary
}

/// 添加文本字段
mutating func addField(name: String, value: String) {
body.append("--\(boundary)\r\n".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"\(name)\"\r\n\r\n".data(using: .utf8)!)
body.append("\(value)\r\n".data(using: .utf8)!)
}

/// 添加文件字段
mutating func addFile(name: String, fileName: String, mimeType: String, data: Data) {
body.append("--\(boundary)\r\n".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"\(name)\"; filename=\"\(fileName)\"\r\n".data(using: .utf8)!)
body.append("Content-Type: \(mimeType)\r\n\r\n".data(using: .utf8)!)
body.append(data)
body.append("\r\n".data(using: .utf8)!)
}

/// 完成编码,返回最终的请求体数据
func finalize() -> Data {
var result = body
result.append("--\(boundary)--\r\n".data(using: .utf8)!)
return result
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import Foundation

/// 语音识别后端协议(内部使用)
/// 每个后端负责将其原生响应转换为统一的 SpeechRecognitionResult。
/// 后端实例在 @MainActor 的 SpeechRecognitionService 内创建和持有。
@MainActor
protocol SpeechBackend {
/// 开始识别会话,返回结果流
func startSession() async throws -> AsyncStream<SpeechRecognitionResult>

/// 发送音频数据
/// - 流式后端(火山引擎):立即通过 WebSocket 发送
/// - 批量后端(vLLM):缓存到内存 buffer
func sendAudioData(_ data: Data) async throws

/// 完成音频输入
/// - 流式后端:发送结束包
/// - 批量后端:组装 WAV,POST 到服务器,将结果 yield 到 stream
func finishAudio() async throws

/// 测试连接是否可用
func testConnection() async throws

/// 断开/清理
func disconnect() async
}
Loading