Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src-tauri/Info.plist
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>NSMicrophoneUsageDescription</key>
<string>VibeToText needs microphone access to capture speech for local dictation.</string>
</dict>
</plist>
29 changes: 29 additions & 0 deletions src-tauri/src/audio.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use anyhow::{anyhow, Result};
use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
use crossbeam_channel::{bounded, Receiver, Sender};
use serde::Serialize;
use std::sync::{
atomic::{AtomicBool, Ordering},
Arc,
Expand All @@ -16,6 +17,13 @@ pub struct Frame(pub Vec<f32>);
#[derive(Clone, Copy, Debug)]
pub struct Level(pub f32);

#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct InputDeviceInfo {
pub name: String,
pub is_default: bool,
}

pub struct AudioCapture {
stop: Arc<AtomicBool>,
pub frames: Receiver<Frame>,
Expand Down Expand Up @@ -75,6 +83,27 @@ impl AudioCapture {
}
}

pub fn input_devices() -> Result<Vec<InputDeviceInfo>> {
let host = cpal::default_host();
let default_name = host.default_input_device().and_then(|d| d.name().ok());
let mut devices = Vec::new();

for device in host.input_devices()? {
let Ok(name) = device.name() else {
continue;
};
let is_default = default_name.as_ref() == Some(&name);
devices.push(InputDeviceInfo { name, is_default });
}

devices.sort_by(|a, b| match (a.is_default, b.is_default) {
(true, false) => std::cmp::Ordering::Less,
(false, true) => std::cmp::Ordering::Greater,
_ => a.name.to_lowercase().cmp(&b.name.to_lowercase()),
});
Ok(devices)
}

fn run_stream(
device: cpal::Device,
config: cpal::SupportedStreamConfig,
Expand Down
46 changes: 35 additions & 11 deletions src-tauri/src/inject.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
use anyhow::Result;
use enigo::{Direction, Enigo, Key, Keyboard, Settings};
#[cfg(not(target_os = "macos"))]
use enigo::Key;
use enigo::{Direction, Enigo, Keyboard, Settings};
use tauri::AppHandle;
use tauri_plugin_clipboard_manager::ClipboardExt;

/// Type a string at the current cursor as if the user typed it.
/// Used in stream mode for incremental partials.
#[allow(dead_code)]
pub fn type_text(s: &str) -> Result<()> {
let mut enigo = Enigo::new(&Settings::default())?;
let mut enigo = new_enigo()?;
enigo.text(s)?;
Ok(())
}
Expand Down Expand Up @@ -74,15 +76,7 @@ pub fn paste_text(app: &AppHandle, s: &str) -> Result<()> {
);
}

let mut enigo = Enigo::new(&Settings::default())?;
#[cfg(target_os = "macos")]
let mod_key = Key::Meta;
#[cfg(not(target_os = "macos"))]
let mod_key = Key::Control;

enigo.key(mod_key, Direction::Press)?;
enigo.key(Key::Unicode('v'), Direction::Click)?;
enigo.key(mod_key, Direction::Release)?;
send_paste_shortcut()?;

// Restore the user's previous clipboard contents. CRITICAL: there's
// no API for "the foreground app finished consuming this paste,"
Expand Down Expand Up @@ -136,3 +130,33 @@ pub fn paste_text(app: &AppHandle, s: &str) -> Result<()> {
}
Ok(())
}

#[cfg(target_os = "macos")]
fn send_paste_shortcut() -> Result<()> {
let mut enigo = new_enigo()?;
// macOS keycode 55 = Command, 9 = V. Using raw keycodes avoids Enigo's
// layout lookup path, which must run on the main dispatch queue.
enigo.raw(55, Direction::Press)?;
enigo.raw(9, Direction::Click)?;
enigo.raw(55, Direction::Release)?;
Ok(())
}

#[cfg(not(target_os = "macos"))]
fn send_paste_shortcut() -> Result<()> {
let mut enigo = new_enigo()?;
enigo.key(Key::Control, Direction::Press)?;
enigo.key(Key::Unicode('v'), Direction::Click)?;
enigo.key(Key::Control, Direction::Release)?;
Ok(())
}

fn new_enigo() -> Result<Enigo> {
// The app asks for Accessibility separately. During dictation, repeatedly
// opening the system prompt is noisy and can steal focus from the target app.
let settings = Settings {
open_prompt_to_get_permissions: false,
..Settings::default()
};
Ok(Enigo::new(&settings)?)
}
6 changes: 6 additions & 0 deletions src-tauri/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@ async fn get_config(state: tauri::State<'_, Arc<AppState>>) -> Result<AppConfig,
Ok(state.config.lock().await.clone())
}

#[tauri::command]
fn list_input_devices() -> Result<Vec<audio::InputDeviceInfo>, String> {
audio::input_devices().map_err(|e| e.to_string())
}

#[tauri::command]
async fn save_config(
app: tauri::AppHandle,
Expand Down Expand Up @@ -1176,6 +1181,7 @@ pub fn run() {
})
.invoke_handler(tauri::generate_handler![
get_config,
list_input_devices,
save_config,
toggle_dictation,
current_backend,
Expand Down
5 changes: 4 additions & 1 deletion src/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,11 @@ <h2>Behavior</h2>
</label>
<label>
<span>Microphone</span>
<input id="mic_device" placeholder="(system default)" />
<select id="mic_device">
<option value="">(system default)</option>
</select>
</label>
<p id="mic_device_hint" class="hint small">Microphones: checking…</p>
<label class="toggle compact">
<input type="checkbox" id="auto_start" />
<span class="toggle-track"><span class="toggle-thumb"></span></span>
Expand Down
63 changes: 53 additions & 10 deletions src/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ const fields = [
"toggle_settings_hotkey",
"dictate_hotkey",
"stt_toggle_hotkey",
"mic_device",
];

async function loadConfig() {
Expand All @@ -47,6 +46,7 @@ async function loadConfig() {
const el = document.getElementById(k);
if (el) el.value = cfg[toCamel(k)] ?? "";
}
await refreshMicrophones(cfg.micDevice || "");
document.getElementById("trailing_space").checked = !!cfg.trailingSpace;
// STT master toggle. cfg.sttEnabled defaults to true if missing on
// older configs that didn't have the field.
Expand Down Expand Up @@ -116,6 +116,55 @@ function toCamel(s) {
return s.replace(/_([a-z])/g, (_, c) => c.toUpperCase());
}

async function refreshMicrophones(selected = val("mic_device")) {
const select = document.getElementById("mic_device");
const hint = document.getElementById("mic_device_hint");
if (!select) return;

try {
const devices = await invoke("list_input_devices");
select.innerHTML = "";

const defaultDevice = devices.find((d) => d.isDefault);
select.appendChild(
new Option(
defaultDevice
? `(system default — ${defaultDevice.name})`
: "(system default)",
""
)
);

for (const device of devices) {
const label = device.isDefault ? `${device.name} — default` : device.name;
select.appendChild(new Option(label, device.name));
}

if (selected && !devices.some((d) => d.name === selected)) {
select.appendChild(new Option(`${selected} — not currently available`, selected));
}

select.value = selected || "";
select.disabled = devices.length === 0;
if (hint) {
hint.textContent =
devices.length === 0
? "No microphones found"
: `${devices.length} microphone${devices.length === 1 ? "" : "s"} available`;
}
} catch (e) {
console.error("failed to list microphones:", e);
select.innerHTML = "";
select.appendChild(new Option("(system default)", ""));
select.value = selected || "";
if (selected) {
select.appendChild(new Option(`${selected} — saved`, selected));
select.value = selected;
}
if (hint) hint.textContent = "Microphones unavailable";
}
}

const HOTKEY_DEFAULTS = {
toggle_settings_hotkey: "Ctrl+Alt+V",
dictate_hotkey: "Ctrl+Alt+D",
Expand Down Expand Up @@ -282,15 +331,8 @@ function wireAutoSave() {
if (auto) auto.addEventListener("change", () => scheduleSave({ immediate: true }));
const stt = document.getElementById("stt_enabled");
if (stt) stt.addEventListener("change", () => scheduleSave({ immediate: true }));
// Free-text fields — debounce so typing isn't one save per keystroke.
["mic_device"].forEach((id) => {
const el = document.getElementById(id);
if (!el) return;
el.addEventListener("input", () => scheduleSave());
// Commit immediately when focus leaves so a debounced save in flight
// doesn't get stranded.
el.addEventListener("blur", () => scheduleSave({ immediate: true }));
});
const mic = document.getElementById("mic_device");
if (mic) mic.addEventListener("change", () => scheduleSave({ immediate: true }));
// Multi-line text fields (initial prompt, custom dictionary).
// Same debounce-on-input + commit-on-blur pattern.
["whisper_initial_prompt", "custom_dictionary"].forEach((id) => {
Expand Down Expand Up @@ -1054,6 +1096,7 @@ listen("settings-shown", () => {
// Force reflow so the next class-add re-runs the keyframes.
void body.offsetWidth;
body.classList.add("show-pop");
refreshMicrophones();
});

// Kept for forward-compatibility — currently we always select "whisper".
Expand Down
Loading