diff --git a/pyproject.toml b/pyproject.toml index ed579bf..968362e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ npu = [ ] [project.scripts] -vibemouse = "vibemouse.main:main" +vibemouse = "vibemouse.cli.main:main" [tool.setuptools] package-dir = {"" = "."} diff --git a/tests/test_main.py b/tests/cli/test_main.py similarity index 100% rename from tests/test_main.py rename to tests/cli/test_main.py diff --git a/tests/test_app.py b/tests/core/test_app.py similarity index 99% rename from tests/test_app.py rename to tests/core/test_app.py index 5b1caa4..818653f 100644 --- a/tests/test_app.py +++ b/tests/core/test_app.py @@ -504,7 +504,7 @@ def test_transcription_failure_logs_exception(self) -> None: getattr(subject, "_transcribe_and_output"), ) - with self.assertLogs("vibemouse.app", level="ERROR") as captured: + with self.assertLogs("vibemouse.core.app", level="ERROR") as captured: transcribe_and_output(recording, "default") self.assertTrue( diff --git a/tests/test_audio.py b/tests/core/test_audio.py similarity index 100% rename from tests/test_audio.py rename to tests/core/test_audio.py diff --git a/tests/test_output.py b/tests/core/test_output.py similarity index 100% rename from tests/test_output.py rename to tests/core/test_output.py diff --git a/tests/test_keyboard_listener.py b/tests/listener/test_keyboard_listener.py similarity index 100% rename from tests/test_keyboard_listener.py rename to tests/listener/test_keyboard_listener.py diff --git a/tests/test_mouse_listener.py b/tests/listener/test_mouse_listener.py similarity index 100% rename from tests/test_mouse_listener.py rename to tests/listener/test_mouse_listener.py diff --git a/tests/test_deploy.py b/tests/ops/test_deploy.py similarity index 100% rename from tests/test_deploy.py rename to tests/ops/test_deploy.py diff --git a/tests/test_doctor.py b/tests/ops/test_doctor.py similarity index 100% rename from tests/test_doctor.py rename to tests/ops/test_doctor.py diff --git a/tests/test_system_integration.py b/tests/platform/test_system_integration.py similarity index 100% rename from tests/test_system_integration.py rename to tests/platform/test_system_integration.py diff --git a/vibemouse/app.py b/vibemouse/app.py index fc7641a..59fd2ed 100644 --- a/vibemouse/app.py +++ b/vibemouse/app.py @@ -1,403 +1,4 @@ -from __future__ import annotations +from importlib import import_module as _import_module +import sys as _sys -import logging -import subprocess -import threading -from pathlib import Path -from typing import Literal - -from vibemouse.audio import AudioRecorder, AudioRecording -from vibemouse.config import AppConfig, write_status -from vibemouse.keyboard_listener import KeyboardHotkeyListener -from vibemouse.mouse_listener import SideButtonListener -from vibemouse.output import TextOutput -from vibemouse.system_integration import SystemIntegration, create_system_integration -from vibemouse.transcriber import SenseVoiceTranscriber - - -TranscriptionTarget = Literal["default", "openclaw"] -_LOG = logging.getLogger(__name__) - - -class VoiceMouseApp: - def __init__(self, config: AppConfig) -> None: - if config.front_button == config.rear_button: - raise ValueError("Front and rear side buttons must be different") - - self._config: AppConfig = config - self._system_integration: SystemIntegration = create_system_integration() - self._recorder: AudioRecorder = AudioRecorder( - sample_rate=config.sample_rate, - channels=config.channels, - dtype=config.dtype, - temp_dir=config.temp_dir, - ) - self._transcriber: SenseVoiceTranscriber = SenseVoiceTranscriber(config) - self._output: TextOutput = TextOutput( - system_integration=self._system_integration, - openclaw_command=config.openclaw_command, - openclaw_agent=config.openclaw_agent, - openclaw_timeout_s=config.openclaw_timeout_s, - openclaw_retries=config.openclaw_retries, - ) - self._listener: SideButtonListener = SideButtonListener( - on_front_press=self._on_front_press, - on_rear_press=self._on_rear_press, - on_gesture=self._on_gesture, - front_button=config.front_button, - rear_button=config.rear_button, - debounce_s=config.button_debounce_ms / 1000.0, - gestures_enabled=config.gestures_enabled, - gesture_trigger_button=config.gesture_trigger_button, - gesture_threshold_px=config.gesture_threshold_px, - gesture_freeze_pointer=config.gesture_freeze_pointer, - gesture_restore_cursor=config.gesture_restore_cursor, - system_integration=self._system_integration, - ) - self._keyboard_listener: KeyboardHotkeyListener = KeyboardHotkeyListener( - on_hotkey=self._on_front_press, - keycodes=config.record_hotkey_keycodes, - debounce_s=config.button_debounce_ms / 1000.0, - ) - self._recording_submit_listener: KeyboardHotkeyListener | None = None - if config.recording_submit_keycode is not None: - self._recording_submit_listener = KeyboardHotkeyListener( - on_hotkey=self._on_recording_submit_press, - keycodes=(config.recording_submit_keycode,), - debounce_s=config.button_debounce_ms / 1000.0, - ) - self._stop_event: threading.Event = threading.Event() - self._transcribe_lock: threading.Lock = threading.Lock() - self._workers_lock: threading.Lock = threading.Lock() - self._workers: set[threading.Thread] = set() - self._prewarm_started: bool = False - - def run(self) -> None: - self._listener.start() - self._keyboard_listener.start() - if self._recording_submit_listener is not None: - self._recording_submit_listener.start() - self._set_recording_status(False) - recording_submit_hotkey = self._config.recording_submit_keycode - _LOG.info( - "VibeMouse ready. " - + f"Model={self._config.model_name}, preferred_device={self._config.device}, " - + f"backend={self._config.transcriber_backend}, auto_paste={self._config.auto_paste}, " - + f"enter_mode={self._config.enter_mode}, debounce_ms={self._config.button_debounce_ms}, " - + f"front_button={self._config.front_button}, rear_button={self._config.rear_button}, " - + f"record_hotkey_keycodes={self._config.record_hotkey_keycodes}, " - + f"recording_submit_keycode={recording_submit_hotkey}, " - + f"gestures_enabled={self._config.gestures_enabled}, " - + f"gesture_trigger={self._config.gesture_trigger_button}, " - + f"gesture_threshold_px={self._config.gesture_threshold_px}, " - + f"gesture_freeze_pointer={self._config.gesture_freeze_pointer}, " - + f"gesture_restore_cursor={self._config.gesture_restore_cursor}, " - + f"prewarm_on_start={self._config.prewarm_on_start}, " - + f"prewarm_delay_s={self._config.prewarm_delay_s}. " - + "Press side-front to start/stop recording. While recording, side-rear sends transcript to OpenClaw; otherwise side-rear sends Enter." - ) - self._maybe_prewarm_transcriber() - try: - _ = self._stop_event.wait() - except KeyboardInterrupt: - self._stop_event.set() - finally: - self.shutdown() - - def shutdown(self) -> None: - self._listener.stop() - self._keyboard_listener.stop() - if self._recording_submit_listener is not None: - self._recording_submit_listener.stop() - self._recorder.cancel() - self._set_recording_status(False) - with self._workers_lock: - workers = list(self._workers) - still_running: list[threading.Thread] = [] - for worker in workers: - worker.join(timeout=5) - if worker.is_alive(): - still_running.append(worker) - if still_running: - _LOG.warning( - f"Shutdown warning: {len(still_running)} transcription worker(s) are still running" - ) - - def _on_front_press(self) -> None: - if not self._recorder.is_recording: - try: - self._recorder.start() - self._set_recording_status(True) - _LOG.info("Recording started") - except Exception as error: - self._set_recording_status(False) - _LOG.exception("Failed to start recording: %s", error) - return - - try: - recording = self._stop_recording() - except Exception as error: - _LOG.exception("Failed to stop recording: %s", error) - return - - if recording is None: - return - - self._start_transcription_worker(recording, output_target="default") - - def _on_rear_press(self) -> None: - if self._recorder.is_recording: - try: - recording = self._stop_recording() - except Exception as error: - _LOG.exception("Failed to stop recording from rear button: %s", error) - return - - if recording is None: - return - - _LOG.info( - "Recording stopped by rear button, sending transcript to OpenClaw" - ) - self._start_transcription_worker(recording, output_target="openclaw") - return - - try: - self._output.send_enter(mode=self._config.enter_mode) - if self._config.enter_mode == "none": - _LOG.info("Enter key handling disabled (enter_mode=none)") - else: - _LOG.info("Enter key sent") - except Exception as error: - _LOG.exception("Failed to send Enter: %s", error) - - def _on_recording_submit_press(self) -> None: - if not self._recorder.is_recording: - return - _LOG.info("Recording submit hotkey pressed, routing to rear-button logic") - self._on_rear_press() - - def _on_gesture(self, direction: str) -> None: - action = self._resolve_gesture_action(direction) - if action == "noop": - _LOG.info("Gesture '%s' recognized with no action configured", direction) - return - - if action == "record_toggle": - _LOG.info("Gesture '%s' -> toggle recording", direction) - self._on_front_press() - return - - if action == "send_enter": - mode = self._config.enter_mode - if mode == "none": - mode = "enter" - try: - self._output.send_enter(mode=mode) - _LOG.info("Gesture '%s' -> send enter (%s)", direction, mode) - except Exception as error: - _LOG.exception( - "Gesture '%s' failed to send enter: %s", direction, error - ) - return - - if action == "workspace_left": - if self._switch_workspace("left"): - _LOG.info("Gesture '%s' -> switch workspace left", direction) - else: - _LOG.warning("Gesture '%s' failed to switch workspace left", direction) - return - - if action == "workspace_right": - if self._switch_workspace("right"): - _LOG.info("Gesture '%s' -> switch workspace right", direction) - else: - _LOG.warning("Gesture '%s' failed to switch workspace right", direction) - return - - _LOG.warning("Gesture '%s' mapped to unknown action '%s'", direction, action) - - def _resolve_gesture_action(self, direction: str) -> str: - mapping = { - "up": self._config.gesture_up_action, - "down": self._config.gesture_down_action, - "left": self._config.gesture_left_action, - "right": self._config.gesture_right_action, - } - return mapping.get(direction, "noop") - - def _switch_workspace(self, direction: str) -> bool: - try: - system_integration = self._system_integration - except AttributeError: - system_integration = None - - if system_integration is not None: - try: - return bool(system_integration.switch_workspace(direction)) - except Exception: - return False - - workspace_arg = "e-1" if direction == "left" else "e+1" - try: - proc = subprocess.run( - ["hyprctl", "dispatch", "workspace", workspace_arg], - capture_output=True, - text=True, - check=False, - timeout=1.0, - ) - except (OSError, subprocess.TimeoutExpired): - return False - - return proc.returncode == 0 and proc.stdout.strip() == "ok" - - def _stop_recording(self) -> AudioRecording | None: - try: - recording = self._recorder.stop_and_save() - except Exception as error: - self._set_recording_status(False) - raise RuntimeError(error) from error - - self._set_recording_status(False) - if recording is None: - _LOG.info("Recording was empty and has been discarded") - return None - return recording - - def _start_transcription_worker( - self, - recording: AudioRecording, - *, - output_target: TranscriptionTarget, - ) -> None: - worker = threading.Thread( - target=self._transcribe_and_output, - args=(recording, output_target), - daemon=True, - ) - with self._workers_lock: - self._workers.add(worker) - worker.start() - - def _transcribe_and_output( - self, - recording: AudioRecording, - output_target: TranscriptionTarget, - ) -> None: - current = threading.current_thread() - try: - _LOG.info( - "Recording stopped (%.1fs), transcribing...", recording.duration_s - ) - with self._transcribe_lock: - text = self._transcriber.transcribe(recording.path) - - if not text: - _LOG.info("No speech recognized") - return - - if output_target == "openclaw": - dispatch = self._output.send_to_openclaw_result(text) - route = dispatch.route - dispatch_reason = dispatch.reason - else: - route = self._output.inject_or_clipboard( - text, - auto_paste=self._config.auto_paste, - ) - dispatch_reason = "n/a" - - device = self._transcriber.device_in_use - backend = self._transcriber.backend_in_use - - if output_target == "openclaw": - if route == "openclaw": - _LOG.info( - "Transcribed with %s on %s, sent to OpenClaw (%s)", - backend, - device, - dispatch_reason, - ) - elif route == "clipboard": - _LOG.warning( - "Transcribed with %s on %s, OpenClaw unavailable so copied to clipboard (%s)", - backend, - device, - dispatch_reason, - ) - else: - _LOG.warning( - "Transcribed with %s on %s, but OpenClaw output was empty (%s)", - backend, - device, - dispatch_reason, - ) - return - - if route == "typed": - _LOG.info( - "Transcribed with %s on %s, typed into focused input", - backend, - device, - ) - elif route == "pasted": - _LOG.info( - "Transcribed with %s on %s, pasted via system shortcut", - backend, - device, - ) - elif route == "clipboard": - _LOG.info( - "Transcribed with %s on %s, copied to clipboard", backend, device - ) - else: - _LOG.warning( - "Transcribed with %s on %s, but output was empty", backend, device - ) - except Exception as error: - _LOG.exception("Transcription failed: %s", error) - finally: - self._safe_unlink(recording.path) - with self._workers_lock: - self._workers.discard(current) - - def _safe_unlink(self, path: Path) -> None: - try: - path.unlink(missing_ok=True) - except Exception as error: - _LOG.warning("Failed to remove temp audio file %s: %s", path, error) - - def _maybe_prewarm_transcriber(self) -> None: - if not self._config.prewarm_on_start or self._prewarm_started: - return - self._prewarm_started = True - - worker = threading.Thread( - target=self._prewarm_transcriber, - args=(self._config.prewarm_delay_s,), - daemon=True, - ) - worker.start() - - def _prewarm_transcriber(self, delay_s: float = 0.0) -> None: - if delay_s > 0: - _LOG.info("Transcriber prewarm scheduled in %.1fs", delay_s) - if self._stop_event.wait(timeout=delay_s): - return - - try: - self._transcriber.prewarm() - _LOG.info("Transcriber prewarm complete") - except Exception as error: - _LOG.warning("Transcriber prewarm skipped: %s", error) - - def _set_recording_status(self, is_recording: bool) -> None: - payload = { - "recording": is_recording, - "state": "recording" if is_recording else "idle", - } - try: - write_status(self._config.status_file, payload) - except Exception: - return +_sys.modules[__name__] = _import_module("vibemouse.core.app") diff --git a/vibemouse/audio.py b/vibemouse/audio.py index 51cad7b..b364e8d 100644 --- a/vibemouse/audio.py +++ b/vibemouse/audio.py @@ -1,299 +1,4 @@ -from __future__ import annotations +from importlib import import_module as _import_module +import sys as _sys -import importlib -import logging -import threading -from collections.abc import Callable, Iterable, Mapping -from dataclasses import dataclass -from pathlib import Path -from typing import Protocol, cast -from uuid import uuid4 - -import numpy as np -from numpy.typing import NDArray - -_LOG = logging.getLogger(__name__) - - -AudioFrame = NDArray[np.float32] - - -@dataclass -class AudioRecording: - path: Path - duration_s: float - - -class _AudioStream(Protocol): - def start(self) -> None: ... - - def stop(self) -> None: ... - - def close(self) -> None: ... - - -class _SoundDeviceModule(Protocol): - def InputStream( - self, - *, - samplerate: int, - channels: int, - dtype: str, - device: int | str | None, - callback: Callable[[AudioFrame, int, object, object], None], - ) -> _AudioStream: ... - - def query_devices(self) -> object: ... - - -class _SoundFileModule(Protocol): - def write(self, file: str | Path, data: AudioFrame, samplerate: int) -> None: ... - - -class AudioRecorder: - def __init__( - self, sample_rate: int, channels: int, dtype: str, temp_dir: Path - ) -> None: - self._sample_rate: int = sample_rate - self._channels: int = channels - self._dtype: str = dtype - self._temp_dir: Path = temp_dir - self._sd: _SoundDeviceModule | None = None - self._sf: _SoundFileModule | None = None - self._lock: threading.Lock = threading.Lock() - self._frames: list[AudioFrame] = [] - self._stream: _AudioStream | None = None - self._recording: bool = False - self._selected_input_device: int | str | None = None - self._active_sample_rate: int = sample_rate - - @property - def is_recording(self) -> bool: - with self._lock: - return self._recording - - def start(self) -> None: - self._ensure_audio_modules() - with self._lock: - if self._recording: - return - try: - self._temp_dir.mkdir(parents=True, exist_ok=True) - except OSError as error: - raise RuntimeError( - f"Failed to create temp audio directory {self._temp_dir}: {error}" - ) from error - self._frames = [] - if self._sd is None: - raise RuntimeError("Audio input module not initialized") - device = self._resolve_input_device() - samplerate = self._sample_rate - try: - stream = self._sd.InputStream( - samplerate=samplerate, - channels=self._channels, - dtype=self._dtype, - device=device, - callback=self._callback, - ) - except Exception as first_error: - fallback_rate = self._resolve_device_sample_rate(device) - if fallback_rate is None or fallback_rate == samplerate: - raise - stream = self._sd.InputStream( - samplerate=fallback_rate, - channels=self._channels, - dtype=self._dtype, - device=device, - callback=self._callback, - ) - samplerate = fallback_rate - _LOG.warning( - "Audio samplerate fallback applied: %s -> %s (%s)", - self._sample_rate, - fallback_rate, - first_error, - ) - stream.start() - self._stream = stream - self._recording = True - self._active_sample_rate = samplerate - _LOG.info( - "Audio recording stream started: device=%s sample_rate=%s channels=%s", - device, - samplerate, - self._channels, - ) - - def stop_and_save(self) -> AudioRecording | None: - with self._lock: - if not self._recording: - return None - stream = self._stream - self._stream = None - self._recording = False - - if stream is not None: - stream.stop() - stream.close() - _LOG.info("Audio recording stream stopped") - - with self._lock: - if not self._frames: - return None - audio = np.concatenate(self._frames, axis=0) - self._frames = [] - - out_path = self._temp_dir / f"recording_{uuid4().hex}.wav" - if self._sf is None: - raise RuntimeError("Audio write module not initialized") - try: - self._sf.write(out_path, audio, self._active_sample_rate) - except Exception as error: - raise RuntimeError( - f"Failed to write recording to {out_path}: {error}" - ) from error - duration = float(len(audio) / self._active_sample_rate) - _LOG.info("Audio recording saved: path=%s duration_s=%.2f", out_path, duration) - return AudioRecording(path=out_path, duration_s=duration) - - def cancel(self) -> None: - with self._lock: - if not self._recording: - self._frames = [] - return - stream = self._stream - self._stream = None - self._recording = False - self._frames = [] - - if stream is not None: - stream.stop() - stream.close() - - def _callback( - self, indata: AudioFrame, frames: int, time_data: object, status: object - ) -> None: - del frames - del time_data - del status - with self._lock: - if self._recording: - self._frames.append(indata.copy()) - - def _ensure_audio_modules(self) -> None: - if self._sd is not None and self._sf is not None: - return - try: - sounddevice_module = importlib.import_module("sounddevice") - soundfile_module = importlib.import_module("soundfile") - except Exception as error: - raise RuntimeError( - "Audio dependencies missing. Install sounddevice and soundfile." - ) from error - - self._sd = cast(_SoundDeviceModule, cast(object, sounddevice_module)) - self._sf = cast(_SoundFileModule, cast(object, soundfile_module)) - - def _resolve_input_device(self) -> int | str | None: - if self._selected_input_device is not None: - return self._selected_input_device - if self._sd is None: - return None - - query_devices = getattr(self._sd, "query_devices", None) - if not callable(query_devices): - return None - - try: - devices_obj = query_devices() - except Exception: - return None - - devices = _coerce_device_list(devices_obj) - if devices is None: - return None - - default_index: int | None = None - default_attr = getattr(self._sd, "default", None) - default_device = getattr(default_attr, "device", None) - if isinstance(default_device, list | tuple) and default_device: - candidate = default_device[0] - if isinstance(candidate, int): - default_index = candidate - - def _name(index: int) -> str: - if index < 0 or index >= len(devices): - return "" - item = devices[index] - if not isinstance(item, Mapping): - return "" - raw = item.get("name", "") - return raw if isinstance(raw, str) else "" - - if default_index is not None: - default_name = _name(default_index).lower() - if "monitor" not in default_name: - self._selected_input_device = default_index - return default_index - - virtual_names = { - "default", - "pulse", - "pipewire", - "sysdefault", - "jack", - "lavrate", - "samplerate", - "speex", - "upmix", - "vdownmix", - } - for idx, entry in enumerate(devices): - if not isinstance(entry, Mapping): - continue - max_inputs = entry.get("max_input_channels", 0) - if not isinstance(max_inputs, int | float) or max_inputs <= 0: - continue - name_obj = entry.get("name", "") - if not isinstance(name_obj, str): - continue - name = name_obj.lower() - if "monitor" in name or name.strip() in virtual_names: - continue - self._selected_input_device = idx - return idx - - return None - - def _resolve_device_sample_rate(self, device: int | str | None) -> int | None: - if self._sd is None: - return None - query_devices = getattr(self._sd, "query_devices", None) - if not callable(query_devices): - return None - try: - devices_obj = query_devices() - except Exception: - return None - devices = _coerce_device_list(devices_obj) - if devices is None: - return None - - if isinstance(device, int) and 0 <= device < len(devices): - entry = devices[device] - if isinstance(entry, Mapping): - raw = entry.get("default_samplerate") - if isinstance(raw, int | float) and raw > 0: - return int(raw) - return None - - -def _coerce_device_list(devices_obj: object) -> list[object] | None: - if isinstance(devices_obj, list): - return devices_obj - if isinstance(devices_obj, tuple): - return list(devices_obj) - if isinstance(devices_obj, Iterable): - return list(devices_obj) - return None +_sys.modules[__name__] = _import_module("vibemouse.core.audio") diff --git a/vibemouse/cli/__init__.py b/vibemouse/cli/__init__.py new file mode 100644 index 0000000..a9a2c5b --- /dev/null +++ b/vibemouse/cli/__init__.py @@ -0,0 +1 @@ +__all__ = [] diff --git a/vibemouse/cli/main.py b/vibemouse/cli/main.py new file mode 100644 index 0000000..d524b3b --- /dev/null +++ b/vibemouse/cli/main.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +import argparse + +from vibemouse.core.app import VoiceMouseApp +from vibemouse.config import load_config +from vibemouse.core.logging_setup import configure_logging +from vibemouse.ops.deploy import configure_deploy_parser, run_deploy +from vibemouse.ops.doctor import run_doctor + + +def _build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(prog="vibemouse") + subparsers = parser.add_subparsers(dest="command") + _ = subparsers.add_parser("run", help="run the voice-input daemon") + doctor_parser = subparsers.add_parser("doctor", help="run environment diagnostics") + _ = doctor_parser.add_argument( + "--fix", + action="store_true", + help="apply safe auto-remediations before running checks", + ) + deploy_parser = subparsers.add_parser( + "deploy", + help="generate service/env files and deploy as user service", + ) + configure_deploy_parser(deploy_parser) + return parser + + +def main(argv: list[str] | None = None) -> int: + parser = _build_parser() + args = parser.parse_args(argv) + + raw_command = getattr(args, "command", None) + command = raw_command if isinstance(raw_command, str) else "run" + if command == "doctor": + apply_fixes_raw = getattr(args, "fix", False) + apply_fixes = bool(apply_fixes_raw) + return run_doctor(apply_fixes=apply_fixes) + if command == "deploy": + return run_deploy(args) + + config = load_config() + configure_logging(config.log_level) + app = VoiceMouseApp(config) + app.run() + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/vibemouse/core/__init__.py b/vibemouse/core/__init__.py new file mode 100644 index 0000000..a9a2c5b --- /dev/null +++ b/vibemouse/core/__init__.py @@ -0,0 +1 @@ +__all__ = [] diff --git a/vibemouse/core/app.py b/vibemouse/core/app.py new file mode 100644 index 0000000..a7ba9de --- /dev/null +++ b/vibemouse/core/app.py @@ -0,0 +1,406 @@ +from __future__ import annotations + +import logging +import subprocess +import threading +from pathlib import Path +from typing import Literal + +from vibemouse.core.audio import AudioRecorder, AudioRecording +from vibemouse.config import AppConfig, write_status +from vibemouse.core.output import TextOutput +from vibemouse.core.transcriber import SenseVoiceTranscriber +from vibemouse.listener.keyboard_listener import KeyboardHotkeyListener +from vibemouse.listener.mouse_listener import SideButtonListener +from vibemouse.platform.system_integration import ( + SystemIntegration, + create_system_integration, +) + + +TranscriptionTarget = Literal["default", "openclaw"] +_LOG = logging.getLogger(__name__) + + +class VoiceMouseApp: + def __init__(self, config: AppConfig) -> None: + if config.front_button == config.rear_button: + raise ValueError("Front and rear side buttons must be different") + + self._config: AppConfig = config + self._system_integration: SystemIntegration = create_system_integration() + self._recorder: AudioRecorder = AudioRecorder( + sample_rate=config.sample_rate, + channels=config.channels, + dtype=config.dtype, + temp_dir=config.temp_dir, + ) + self._transcriber: SenseVoiceTranscriber = SenseVoiceTranscriber(config) + self._output: TextOutput = TextOutput( + system_integration=self._system_integration, + openclaw_command=config.openclaw_command, + openclaw_agent=config.openclaw_agent, + openclaw_timeout_s=config.openclaw_timeout_s, + openclaw_retries=config.openclaw_retries, + ) + self._listener: SideButtonListener = SideButtonListener( + on_front_press=self._on_front_press, + on_rear_press=self._on_rear_press, + on_gesture=self._on_gesture, + front_button=config.front_button, + rear_button=config.rear_button, + debounce_s=config.button_debounce_ms / 1000.0, + gestures_enabled=config.gestures_enabled, + gesture_trigger_button=config.gesture_trigger_button, + gesture_threshold_px=config.gesture_threshold_px, + gesture_freeze_pointer=config.gesture_freeze_pointer, + gesture_restore_cursor=config.gesture_restore_cursor, + system_integration=self._system_integration, + ) + self._keyboard_listener: KeyboardHotkeyListener = KeyboardHotkeyListener( + on_hotkey=self._on_front_press, + keycodes=config.record_hotkey_keycodes, + debounce_s=config.button_debounce_ms / 1000.0, + ) + self._recording_submit_listener: KeyboardHotkeyListener | None = None + if config.recording_submit_keycode is not None: + self._recording_submit_listener = KeyboardHotkeyListener( + on_hotkey=self._on_recording_submit_press, + keycodes=(config.recording_submit_keycode,), + debounce_s=config.button_debounce_ms / 1000.0, + ) + self._stop_event: threading.Event = threading.Event() + self._transcribe_lock: threading.Lock = threading.Lock() + self._workers_lock: threading.Lock = threading.Lock() + self._workers: set[threading.Thread] = set() + self._prewarm_started: bool = False + + def run(self) -> None: + self._listener.start() + self._keyboard_listener.start() + if self._recording_submit_listener is not None: + self._recording_submit_listener.start() + self._set_recording_status(False) + recording_submit_hotkey = self._config.recording_submit_keycode + _LOG.info( + "VibeMouse ready. " + + f"Model={self._config.model_name}, preferred_device={self._config.device}, " + + f"backend={self._config.transcriber_backend}, auto_paste={self._config.auto_paste}, " + + f"enter_mode={self._config.enter_mode}, debounce_ms={self._config.button_debounce_ms}, " + + f"front_button={self._config.front_button}, rear_button={self._config.rear_button}, " + + f"record_hotkey_keycodes={self._config.record_hotkey_keycodes}, " + + f"recording_submit_keycode={recording_submit_hotkey}, " + + f"gestures_enabled={self._config.gestures_enabled}, " + + f"gesture_trigger={self._config.gesture_trigger_button}, " + + f"gesture_threshold_px={self._config.gesture_threshold_px}, " + + f"gesture_freeze_pointer={self._config.gesture_freeze_pointer}, " + + f"gesture_restore_cursor={self._config.gesture_restore_cursor}, " + + f"prewarm_on_start={self._config.prewarm_on_start}, " + + f"prewarm_delay_s={self._config.prewarm_delay_s}. " + + "Press side-front to start/stop recording. While recording, side-rear sends transcript to OpenClaw; otherwise side-rear sends Enter." + ) + self._maybe_prewarm_transcriber() + try: + _ = self._stop_event.wait() + except KeyboardInterrupt: + self._stop_event.set() + finally: + self.shutdown() + + def shutdown(self) -> None: + self._listener.stop() + self._keyboard_listener.stop() + if self._recording_submit_listener is not None: + self._recording_submit_listener.stop() + self._recorder.cancel() + self._set_recording_status(False) + with self._workers_lock: + workers = list(self._workers) + still_running: list[threading.Thread] = [] + for worker in workers: + worker.join(timeout=5) + if worker.is_alive(): + still_running.append(worker) + if still_running: + _LOG.warning( + f"Shutdown warning: {len(still_running)} transcription worker(s) are still running" + ) + + def _on_front_press(self) -> None: + if not self._recorder.is_recording: + try: + self._recorder.start() + self._set_recording_status(True) + _LOG.info("Recording started") + except Exception as error: + self._set_recording_status(False) + _LOG.exception("Failed to start recording: %s", error) + return + + try: + recording = self._stop_recording() + except Exception as error: + _LOG.exception("Failed to stop recording: %s", error) + return + + if recording is None: + return + + self._start_transcription_worker(recording, output_target="default") + + def _on_rear_press(self) -> None: + if self._recorder.is_recording: + try: + recording = self._stop_recording() + except Exception as error: + _LOG.exception("Failed to stop recording from rear button: %s", error) + return + + if recording is None: + return + + _LOG.info( + "Recording stopped by rear button, sending transcript to OpenClaw" + ) + self._start_transcription_worker(recording, output_target="openclaw") + return + + try: + self._output.send_enter(mode=self._config.enter_mode) + if self._config.enter_mode == "none": + _LOG.info("Enter key handling disabled (enter_mode=none)") + else: + _LOG.info("Enter key sent") + except Exception as error: + _LOG.exception("Failed to send Enter: %s", error) + + def _on_recording_submit_press(self) -> None: + if not self._recorder.is_recording: + return + _LOG.info("Recording submit hotkey pressed, routing to rear-button logic") + self._on_rear_press() + + def _on_gesture(self, direction: str) -> None: + action = self._resolve_gesture_action(direction) + if action == "noop": + _LOG.info("Gesture '%s' recognized with no action configured", direction) + return + + if action == "record_toggle": + _LOG.info("Gesture '%s' -> toggle recording", direction) + self._on_front_press() + return + + if action == "send_enter": + mode = self._config.enter_mode + if mode == "none": + mode = "enter" + try: + self._output.send_enter(mode=mode) + _LOG.info("Gesture '%s' -> send enter (%s)", direction, mode) + except Exception as error: + _LOG.exception( + "Gesture '%s' failed to send enter: %s", direction, error + ) + return + + if action == "workspace_left": + if self._switch_workspace("left"): + _LOG.info("Gesture '%s' -> switch workspace left", direction) + else: + _LOG.warning("Gesture '%s' failed to switch workspace left", direction) + return + + if action == "workspace_right": + if self._switch_workspace("right"): + _LOG.info("Gesture '%s' -> switch workspace right", direction) + else: + _LOG.warning("Gesture '%s' failed to switch workspace right", direction) + return + + _LOG.warning("Gesture '%s' mapped to unknown action '%s'", direction, action) + + def _resolve_gesture_action(self, direction: str) -> str: + mapping = { + "up": self._config.gesture_up_action, + "down": self._config.gesture_down_action, + "left": self._config.gesture_left_action, + "right": self._config.gesture_right_action, + } + return mapping.get(direction, "noop") + + def _switch_workspace(self, direction: str) -> bool: + try: + system_integration = self._system_integration + except AttributeError: + system_integration = None + + if system_integration is not None: + try: + return bool(system_integration.switch_workspace(direction)) + except Exception: + return False + + workspace_arg = "e-1" if direction == "left" else "e+1" + try: + proc = subprocess.run( + ["hyprctl", "dispatch", "workspace", workspace_arg], + capture_output=True, + text=True, + check=False, + timeout=1.0, + ) + except (OSError, subprocess.TimeoutExpired): + return False + + return proc.returncode == 0 and proc.stdout.strip() == "ok" + + def _stop_recording(self) -> AudioRecording | None: + try: + recording = self._recorder.stop_and_save() + except Exception as error: + self._set_recording_status(False) + raise RuntimeError(error) from error + + self._set_recording_status(False) + if recording is None: + _LOG.info("Recording was empty and has been discarded") + return None + return recording + + def _start_transcription_worker( + self, + recording: AudioRecording, + *, + output_target: TranscriptionTarget, + ) -> None: + worker = threading.Thread( + target=self._transcribe_and_output, + args=(recording, output_target), + daemon=True, + ) + with self._workers_lock: + self._workers.add(worker) + worker.start() + + def _transcribe_and_output( + self, + recording: AudioRecording, + output_target: TranscriptionTarget, + ) -> None: + current = threading.current_thread() + try: + _LOG.info( + "Recording stopped (%.1fs), transcribing...", recording.duration_s + ) + with self._transcribe_lock: + text = self._transcriber.transcribe(recording.path) + + if not text: + _LOG.info("No speech recognized") + return + + if output_target == "openclaw": + dispatch = self._output.send_to_openclaw_result(text) + route = dispatch.route + dispatch_reason = dispatch.reason + else: + route = self._output.inject_or_clipboard( + text, + auto_paste=self._config.auto_paste, + ) + dispatch_reason = "n/a" + + device = self._transcriber.device_in_use + backend = self._transcriber.backend_in_use + + if output_target == "openclaw": + if route == "openclaw": + _LOG.info( + "Transcribed with %s on %s, sent to OpenClaw (%s)", + backend, + device, + dispatch_reason, + ) + elif route == "clipboard": + _LOG.warning( + "Transcribed with %s on %s, OpenClaw unavailable so copied to clipboard (%s)", + backend, + device, + dispatch_reason, + ) + else: + _LOG.warning( + "Transcribed with %s on %s, but OpenClaw output was empty (%s)", + backend, + device, + dispatch_reason, + ) + return + + if route == "typed": + _LOG.info( + "Transcribed with %s on %s, typed into focused input", + backend, + device, + ) + elif route == "pasted": + _LOG.info( + "Transcribed with %s on %s, pasted via system shortcut", + backend, + device, + ) + elif route == "clipboard": + _LOG.info( + "Transcribed with %s on %s, copied to clipboard", backend, device + ) + else: + _LOG.warning( + "Transcribed with %s on %s, but output was empty", backend, device + ) + except Exception as error: + _LOG.exception("Transcription failed: %s", error) + finally: + self._safe_unlink(recording.path) + with self._workers_lock: + self._workers.discard(current) + + def _safe_unlink(self, path: Path) -> None: + try: + path.unlink(missing_ok=True) + except Exception as error: + _LOG.warning("Failed to remove temp audio file %s: %s", path, error) + + def _maybe_prewarm_transcriber(self) -> None: + if not self._config.prewarm_on_start or self._prewarm_started: + return + self._prewarm_started = True + + worker = threading.Thread( + target=self._prewarm_transcriber, + args=(self._config.prewarm_delay_s,), + daemon=True, + ) + worker.start() + + def _prewarm_transcriber(self, delay_s: float = 0.0) -> None: + if delay_s > 0: + _LOG.info("Transcriber prewarm scheduled in %.1fs", delay_s) + if self._stop_event.wait(timeout=delay_s): + return + + try: + self._transcriber.prewarm() + _LOG.info("Transcriber prewarm complete") + except Exception as error: + _LOG.warning("Transcriber prewarm skipped: %s", error) + + def _set_recording_status(self, is_recording: bool) -> None: + payload = { + "recording": is_recording, + "state": "recording" if is_recording else "idle", + } + try: + write_status(self._config.status_file, payload) + except Exception: + return diff --git a/vibemouse/core/audio.py b/vibemouse/core/audio.py new file mode 100644 index 0000000..51cad7b --- /dev/null +++ b/vibemouse/core/audio.py @@ -0,0 +1,299 @@ +from __future__ import annotations + +import importlib +import logging +import threading +from collections.abc import Callable, Iterable, Mapping +from dataclasses import dataclass +from pathlib import Path +from typing import Protocol, cast +from uuid import uuid4 + +import numpy as np +from numpy.typing import NDArray + +_LOG = logging.getLogger(__name__) + + +AudioFrame = NDArray[np.float32] + + +@dataclass +class AudioRecording: + path: Path + duration_s: float + + +class _AudioStream(Protocol): + def start(self) -> None: ... + + def stop(self) -> None: ... + + def close(self) -> None: ... + + +class _SoundDeviceModule(Protocol): + def InputStream( + self, + *, + samplerate: int, + channels: int, + dtype: str, + device: int | str | None, + callback: Callable[[AudioFrame, int, object, object], None], + ) -> _AudioStream: ... + + def query_devices(self) -> object: ... + + +class _SoundFileModule(Protocol): + def write(self, file: str | Path, data: AudioFrame, samplerate: int) -> None: ... + + +class AudioRecorder: + def __init__( + self, sample_rate: int, channels: int, dtype: str, temp_dir: Path + ) -> None: + self._sample_rate: int = sample_rate + self._channels: int = channels + self._dtype: str = dtype + self._temp_dir: Path = temp_dir + self._sd: _SoundDeviceModule | None = None + self._sf: _SoundFileModule | None = None + self._lock: threading.Lock = threading.Lock() + self._frames: list[AudioFrame] = [] + self._stream: _AudioStream | None = None + self._recording: bool = False + self._selected_input_device: int | str | None = None + self._active_sample_rate: int = sample_rate + + @property + def is_recording(self) -> bool: + with self._lock: + return self._recording + + def start(self) -> None: + self._ensure_audio_modules() + with self._lock: + if self._recording: + return + try: + self._temp_dir.mkdir(parents=True, exist_ok=True) + except OSError as error: + raise RuntimeError( + f"Failed to create temp audio directory {self._temp_dir}: {error}" + ) from error + self._frames = [] + if self._sd is None: + raise RuntimeError("Audio input module not initialized") + device = self._resolve_input_device() + samplerate = self._sample_rate + try: + stream = self._sd.InputStream( + samplerate=samplerate, + channels=self._channels, + dtype=self._dtype, + device=device, + callback=self._callback, + ) + except Exception as first_error: + fallback_rate = self._resolve_device_sample_rate(device) + if fallback_rate is None or fallback_rate == samplerate: + raise + stream = self._sd.InputStream( + samplerate=fallback_rate, + channels=self._channels, + dtype=self._dtype, + device=device, + callback=self._callback, + ) + samplerate = fallback_rate + _LOG.warning( + "Audio samplerate fallback applied: %s -> %s (%s)", + self._sample_rate, + fallback_rate, + first_error, + ) + stream.start() + self._stream = stream + self._recording = True + self._active_sample_rate = samplerate + _LOG.info( + "Audio recording stream started: device=%s sample_rate=%s channels=%s", + device, + samplerate, + self._channels, + ) + + def stop_and_save(self) -> AudioRecording | None: + with self._lock: + if not self._recording: + return None + stream = self._stream + self._stream = None + self._recording = False + + if stream is not None: + stream.stop() + stream.close() + _LOG.info("Audio recording stream stopped") + + with self._lock: + if not self._frames: + return None + audio = np.concatenate(self._frames, axis=0) + self._frames = [] + + out_path = self._temp_dir / f"recording_{uuid4().hex}.wav" + if self._sf is None: + raise RuntimeError("Audio write module not initialized") + try: + self._sf.write(out_path, audio, self._active_sample_rate) + except Exception as error: + raise RuntimeError( + f"Failed to write recording to {out_path}: {error}" + ) from error + duration = float(len(audio) / self._active_sample_rate) + _LOG.info("Audio recording saved: path=%s duration_s=%.2f", out_path, duration) + return AudioRecording(path=out_path, duration_s=duration) + + def cancel(self) -> None: + with self._lock: + if not self._recording: + self._frames = [] + return + stream = self._stream + self._stream = None + self._recording = False + self._frames = [] + + if stream is not None: + stream.stop() + stream.close() + + def _callback( + self, indata: AudioFrame, frames: int, time_data: object, status: object + ) -> None: + del frames + del time_data + del status + with self._lock: + if self._recording: + self._frames.append(indata.copy()) + + def _ensure_audio_modules(self) -> None: + if self._sd is not None and self._sf is not None: + return + try: + sounddevice_module = importlib.import_module("sounddevice") + soundfile_module = importlib.import_module("soundfile") + except Exception as error: + raise RuntimeError( + "Audio dependencies missing. Install sounddevice and soundfile." + ) from error + + self._sd = cast(_SoundDeviceModule, cast(object, sounddevice_module)) + self._sf = cast(_SoundFileModule, cast(object, soundfile_module)) + + def _resolve_input_device(self) -> int | str | None: + if self._selected_input_device is not None: + return self._selected_input_device + if self._sd is None: + return None + + query_devices = getattr(self._sd, "query_devices", None) + if not callable(query_devices): + return None + + try: + devices_obj = query_devices() + except Exception: + return None + + devices = _coerce_device_list(devices_obj) + if devices is None: + return None + + default_index: int | None = None + default_attr = getattr(self._sd, "default", None) + default_device = getattr(default_attr, "device", None) + if isinstance(default_device, list | tuple) and default_device: + candidate = default_device[0] + if isinstance(candidate, int): + default_index = candidate + + def _name(index: int) -> str: + if index < 0 or index >= len(devices): + return "" + item = devices[index] + if not isinstance(item, Mapping): + return "" + raw = item.get("name", "") + return raw if isinstance(raw, str) else "" + + if default_index is not None: + default_name = _name(default_index).lower() + if "monitor" not in default_name: + self._selected_input_device = default_index + return default_index + + virtual_names = { + "default", + "pulse", + "pipewire", + "sysdefault", + "jack", + "lavrate", + "samplerate", + "speex", + "upmix", + "vdownmix", + } + for idx, entry in enumerate(devices): + if not isinstance(entry, Mapping): + continue + max_inputs = entry.get("max_input_channels", 0) + if not isinstance(max_inputs, int | float) or max_inputs <= 0: + continue + name_obj = entry.get("name", "") + if not isinstance(name_obj, str): + continue + name = name_obj.lower() + if "monitor" in name or name.strip() in virtual_names: + continue + self._selected_input_device = idx + return idx + + return None + + def _resolve_device_sample_rate(self, device: int | str | None) -> int | None: + if self._sd is None: + return None + query_devices = getattr(self._sd, "query_devices", None) + if not callable(query_devices): + return None + try: + devices_obj = query_devices() + except Exception: + return None + devices = _coerce_device_list(devices_obj) + if devices is None: + return None + + if isinstance(device, int) and 0 <= device < len(devices): + entry = devices[device] + if isinstance(entry, Mapping): + raw = entry.get("default_samplerate") + if isinstance(raw, int | float) and raw > 0: + return int(raw) + return None + + +def _coerce_device_list(devices_obj: object) -> list[object] | None: + if isinstance(devices_obj, list): + return devices_obj + if isinstance(devices_obj, tuple): + return list(devices_obj) + if isinstance(devices_obj, Iterable): + return list(devices_obj) + return None diff --git a/vibemouse/core/logging_setup.py b/vibemouse/core/logging_setup.py new file mode 100644 index 0000000..caf7a96 --- /dev/null +++ b/vibemouse/core/logging_setup.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +import logging + +_LOG_FORMAT = "%(asctime)s %(levelname)s [%(name)s] %(message)s" + + +def configure_logging(level_name: str) -> None: + normalized = level_name.strip().upper() + level = getattr(logging, normalized, logging.INFO) + root = logging.getLogger() + if not root.handlers: + logging.basicConfig(level=level, format=_LOG_FORMAT) + return + root.setLevel(level) + + +def get_logger(name: str) -> logging.Logger: + return logging.getLogger(name) diff --git a/vibemouse/core/output.py b/vibemouse/core/output.py new file mode 100644 index 0000000..463812a --- /dev/null +++ b/vibemouse/core/output.py @@ -0,0 +1,456 @@ +from __future__ import annotations + +import importlib +import json +import shlex +import subprocess +import time +from dataclasses import dataclass +from typing import Protocol, cast + +import pyperclip + +from vibemouse.platform.system_integration import ( + SystemIntegration, + create_system_integration, + is_terminal_window_payload, + load_atspi_module, + probe_text_input_focus_via_atspi, + probe_send_enter_via_atspi, +) + + +class TextOutput: + def __init__( + self, + *, + system_integration: SystemIntegration | None = None, + openclaw_command: str = "openclaw", + openclaw_agent: str | None = None, + openclaw_timeout_s: float = 20.0, + openclaw_retries: int = 0, + ) -> None: + try: + keyboard_module = importlib.import_module("pynput.keyboard") + except Exception as error: + raise RuntimeError( + f"Failed to load keyboard control dependencies: {error}" + ) from error + + controller_ctor = cast( + _ControllerCtor, + getattr(cast(object, keyboard_module), "Controller"), + ) + key_holder = cast( + _KeyHolder, + getattr(cast(object, keyboard_module), "Key"), + ) + self._kb: _KeyboardController = controller_ctor() + self._enter_key: object = key_holder.enter + self._ctrl_key: object = key_holder.ctrl + self._shift_key: object = key_holder.shift + self._insert_key: object = key_holder.insert + self._atspi: object | None = load_atspi_module() + self._system_integration: SystemIntegration = ( + system_integration + if system_integration is not None + else create_system_integration() + ) + self._hyprland_session: bool = self._system_integration.is_hyprland + self._openclaw_command: str = openclaw_command + self._openclaw_agent: str | None = openclaw_agent + self._openclaw_timeout_s: float = max(0.5, openclaw_timeout_s) + self._openclaw_retries: int = max(0, int(openclaw_retries)) + + def send_enter(self, *, mode: str = "enter") -> None: + normalized = mode.strip().lower() + if normalized == "none": + return + if normalized == "enter": + if self._send_hyprland_shortcut(mod="", key="Return"): + return + if self._send_enter_via_atspi(): + return + self._tap_key(self._enter_key) + return + if normalized == "ctrl_enter": + self._tap_modified_key(self._ctrl_key, self._enter_key) + return + if normalized == "shift_enter": + self._tap_modified_key(self._shift_key, self._enter_key) + return + raise ValueError(f"Unsupported enter mode: {mode!r}") + + def inject_or_clipboard(self, text: str, *, auto_paste: bool = False) -> str: + normalized = text.strip() + if not normalized: + return "empty" + + if self._is_text_input_focused(): + self._kb.type(normalized) + return "typed" + + pyperclip.copy(normalized) + if auto_paste: + try: + self._paste_clipboard() + return "pasted" + except Exception: + return "clipboard" + return "clipboard" + + def send_to_openclaw(self, text: str) -> str: + return self.send_to_openclaw_result(text).route + + def send_to_openclaw_result(self, text: str) -> "OpenClawDispatchResult": + normalized = text.strip() + if not normalized: + return OpenClawDispatchResult(route="empty", reason="empty_text") + + command = self._build_openclaw_command(normalized) + if command is None: + pyperclip.copy(normalized) + return OpenClawDispatchResult(route="clipboard", reason="invalid_command") + + attempts = max(1, int(getattr(self, "_openclaw_retries", 0)) + 1) + last_reason = "spawn_error" + for attempt in range(attempts): + try: + _ = subprocess.Popen( + command, + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + start_new_session=True, + ) + if attempt == 0: + return OpenClawDispatchResult( + route="openclaw", + reason="dispatched", + ) + return OpenClawDispatchResult( + route="openclaw", + reason=f"dispatched_after_retry_{attempt}", + ) + except OSError as error: + last_reason = f"spawn_error:{error.__class__.__name__}" + + pyperclip.copy(normalized) + return OpenClawDispatchResult(route="clipboard", reason=last_reason) + + def _build_openclaw_command(self, message: str) -> list[str] | None: + raw_command = str(getattr(self, "_openclaw_command", "openclaw")).strip() + if not raw_command: + return None + + try: + parts = shlex.split(raw_command) + except ValueError: + return None + + if not parts: + return None + + command = [*parts, "agent", "--message", message, "--json"] + agent = getattr(self, "_openclaw_agent", None) + if isinstance(agent, str): + normalized_agent = agent.strip() + if normalized_agent: + command.extend(["--agent", normalized_agent]) + return command + + def _paste_clipboard(self) -> None: + terminal_active = self._is_hyprland_terminal_active() + for mod, key in self._paste_shortcuts(terminal_active=terminal_active): + if self._send_platform_shortcut(mod=mod, key=key): + return + + if ( + self._hyprland_session + and terminal_active + and self._send_ctrl_shift_v_via_keyboard() + ): + return + + if ( + self._hyprland_session + and terminal_active + and self._send_shift_insert_via_keyboard() + ): + return + + self._send_ctrl_v_via_keyboard() + + def _send_ctrl_v_via_keyboard(self) -> None: + pressed_ctrl = False + pressed_v = False + try: + self._kb.press(self._ctrl_key) + pressed_ctrl = True + self._kb.press("v") + pressed_v = True + finally: + if pressed_v: + try: + self._kb.release("v") + except Exception: + pass + if pressed_ctrl: + try: + self._kb.release(self._ctrl_key) + except Exception: + pass + + def _send_ctrl_shift_v_via_keyboard(self) -> bool: + pressed_ctrl = False + pressed_shift = False + pressed_v = False + try: + self._kb.press(self._ctrl_key) + pressed_ctrl = True + self._kb.press(self._shift_key) + pressed_shift = True + self._kb.press("v") + pressed_v = True + return True + except Exception: + return False + finally: + if pressed_v: + try: + self._kb.release("v") + except Exception: + pass + if pressed_shift: + try: + self._kb.release(self._shift_key) + except Exception: + pass + if pressed_ctrl: + try: + self._kb.release(self._ctrl_key) + except Exception: + pass + + def _send_shift_insert_via_keyboard(self) -> bool: + pressed_shift = False + pressed_insert = False + try: + self._kb.press(self._shift_key) + pressed_shift = True + self._kb.press(self._insert_key) + pressed_insert = True + return True + except Exception: + return False + finally: + if pressed_insert: + try: + self._kb.release(self._insert_key) + except Exception: + pass + if pressed_shift: + try: + self._kb.release(self._shift_key) + except Exception: + pass + + def _tap_key(self, key: object) -> None: + self._kb.press(key) + time.sleep(0.012) + self._kb.release(key) + + def _tap_modified_key(self, modifier: object, key: object) -> None: + pressed_modifier = False + pressed_key = False + try: + self._kb.press(modifier) + pressed_modifier = True + self._kb.press(key) + pressed_key = True + time.sleep(0.012) + finally: + if pressed_key: + try: + self._kb.release(key) + except Exception: + pass + if pressed_modifier: + try: + self._kb.release(modifier) + except Exception: + pass + + def _send_enter_via_atspi(self) -> bool: + try: + system_integration = self._system_integration + except AttributeError: + system_integration = None + + if system_integration is not None: + try: + handled = system_integration.send_enter_via_accessibility() + except Exception: + handled = None + if handled is True: + return True + + atspi_module = getattr(self, "_atspi", None) + return probe_send_enter_via_atspi( + atspi_module=atspi_module, + lazy_load=False, + ) + + def _paste_shortcuts(self, *, terminal_active: bool) -> tuple[tuple[str, str], ...]: + try: + system_integration = self._system_integration + except AttributeError: + system_integration = None + + if system_integration is not None: + try: + shortcuts = system_integration.paste_shortcuts( + terminal_active=terminal_active + ) + except Exception: + shortcuts = () + if shortcuts: + return shortcuts + + if terminal_active: + return ( + ("CTRL SHIFT", "V"), + ("SHIFT", "Insert"), + ("CTRL", "V"), + ) + return (("CTRL", "V"),) + + def _send_platform_shortcut(self, *, mod: str, key: str) -> bool: + try: + system_integration = self._system_integration + except AttributeError: + system_integration = None + + if system_integration is not None: + try: + if bool(system_integration.send_shortcut(mod=mod, key=key)): + return True + if not self._hyprland_session: + return False + except Exception: + if not self._hyprland_session: + return False + + if not self._hyprland_session: + return False + + mod_part = mod.strip().upper() + if mod_part: + arg = f"{mod_part}, {key}, activewindow" + else: + arg = f", {key}, activewindow" + + try: + proc = subprocess.run( + ["hyprctl", "dispatch", "sendshortcut", arg], + capture_output=True, + text=True, + check=False, + timeout=1.0, + ) + except (OSError, subprocess.TimeoutExpired): + return False + + return proc.returncode == 0 and proc.stdout.strip() == "ok" + + def _send_hyprland_shortcut(self, *, mod: str, key: str) -> bool: + return self._send_platform_shortcut(mod=mod, key=key) + + def _is_terminal_window_active(self) -> bool: + payload_map: dict[str, object] | None = None + try: + system_integration = self._system_integration + except AttributeError: + system_integration = None + + if system_integration is not None: + try: + terminal_active = system_integration.is_terminal_window_active() + except Exception: + terminal_active = None + if isinstance(terminal_active, bool): + return terminal_active + + if not self._hyprland_session: + return False + + if payload_map is None: + try: + proc = subprocess.run( + ["hyprctl", "-j", "activewindow"], + capture_output=True, + text=True, + check=False, + timeout=1.0, + ) + except (OSError, subprocess.TimeoutExpired): + return False + + if proc.returncode != 0: + return False + + try: + payload_obj = cast(object, json.loads(proc.stdout)) + except json.JSONDecodeError: + return False + + if not isinstance(payload_obj, dict): + return False + + payload_map = cast(dict[str, object], payload_obj) + + return is_terminal_window_payload(payload_map) + + def _is_hyprland_terminal_active(self) -> bool: + return self._is_terminal_window_active() + + def _is_text_input_focused(self) -> bool: + try: + system_integration = self._system_integration + except AttributeError: + system_integration = None + + if system_integration is not None: + try: + focused = system_integration.is_text_input_focused() + except Exception: + focused = None + if isinstance(focused, bool): + return focused + + return probe_text_input_focus_via_atspi() + + +class _KeyboardController(Protocol): + def press(self, key: object) -> None: ... + + def release(self, key: object) -> None: ... + + def type(self, text: str) -> None: ... + + +class _ControllerCtor(Protocol): + def __call__(self) -> _KeyboardController: ... + + +class _KeyHolder(Protocol): + enter: object + ctrl: object + shift: object + insert: object + + +@dataclass(frozen=True) +class OpenClawDispatchResult: + route: str + reason: str diff --git a/vibemouse/core/transcriber.py b/vibemouse/core/transcriber.py new file mode 100644 index 0000000..4e90436 --- /dev/null +++ b/vibemouse/core/transcriber.py @@ -0,0 +1,300 @@ +from __future__ import annotations + +import importlib +import logging +import re +from pathlib import Path +from threading import Lock +from typing import Protocol, cast + +from vibemouse.config import AppConfig + +_LOG = logging.getLogger(__name__) + + +class SenseVoiceTranscriber: + def __init__(self, config: AppConfig) -> None: + self._config: AppConfig = config + self._transcriber: _TranscriberProtocol | None = None + self._transcriber_lock: Lock = Lock() + self.device_in_use: str = config.device + self.backend_in_use: str = "unknown" + + def transcribe(self, audio_path: Path) -> str: + self._ensure_transcriber_loaded() + if self._transcriber is None: + raise RuntimeError("SenseVoice transcriber is not initialized") + return self._transcriber.transcribe(audio_path) + + def prewarm(self) -> None: + self._ensure_transcriber_loaded() + + def _ensure_transcriber_loaded(self) -> None: + if self._transcriber is not None: + return + + with self._transcriber_lock: + if self._transcriber is not None: + return + + backend = self._config.transcriber_backend + if backend in {"auto", "funasr"}: + _LOG.warning( + "Backend %r is deprecated; using 'funasr_onnx' instead", backend + ) + backend = "funasr_onnx" + + if backend != "funasr_onnx": + raise RuntimeError(f"Unsupported backend {backend!r}. Use funasr_onnx.") + + self._build_funasr_onnx_backend() + return + + def _build_funasr_onnx_backend(self) -> None: + backend = _FunASRONNXBackend(self._config) + self._transcriber = backend + self.device_in_use = backend.device_in_use + self.backend_in_use = "funasr_onnx" + + +class _FunASRONNXBackend: + def __init__(self, config: AppConfig) -> None: + self._config: AppConfig = config + self._model: _ONNXSenseVoiceModel | None = None + self._postprocess: _PostprocessFn | None = None + self._load_lock: Lock = Lock() + self.device_in_use: str = "cpu" + self._ensure_model_loaded() + + def transcribe(self, audio_path: Path) -> str: + if self._model is None: + raise RuntimeError("funasr_onnx SenseVoice model is not initialized") + if self._postprocess is None: + raise RuntimeError("funasr postprocess function is not initialized") + + textnorm = "withitn" if self._config.use_itn else "woitn" + result = self._model( + str(audio_path), + language=self._config.language, + textnorm=textnorm, + ) + if not result: + return "" + + raw_text = result[0] + return self._postprocess(raw_text).strip() + + def _ensure_model_loaded(self) -> None: + if self._model is not None: + return + + with self._load_lock: + if self._model is not None: + return + try: + SenseVoiceSmall = self._load_onnx_class() + postprocess = self._load_postprocess() + except Exception as error: + raise RuntimeError( + "funasr_onnx backend requires funasr-onnx package" + ) from error + + requested_path = self._resolve_onnx_model_dir() + self._ensure_tokenizer_file(requested_path) + device_id = self._resolve_onnx_device_id(self._config.device) + + try: + model = SenseVoiceSmall( + model_dir=str(requested_path), + batch_size=1, + device_id=device_id, + quantize=True, + cache_dir=None, + ) + self._model = model + self._postprocess = postprocess + self.device_in_use = self._resolve_device_label(self._config.device) + _LOG.info( + "Loaded funasr_onnx model: device_in_use=%s model=%s", + self.device_in_use, + requested_path, + ) + return + except Exception as primary_error: + if not self._config.fallback_to_cpu: + raise RuntimeError( + f"Failed to load funasr_onnx backend on {self._config.device}: {primary_error}" + ) from primary_error + + try: + model = SenseVoiceSmall( + model_dir=str(requested_path), + batch_size=1, + device_id="-1", + quantize=True, + cache_dir=None, + ) + except Exception as cpu_error: + raise RuntimeError( + f"Failed to load funasr_onnx backend on {self._config.device} and cpu fallback: {cpu_error}" + ) from cpu_error + + self._model = model + self._postprocess = postprocess + self.device_in_use = "cpu" + _LOG.warning( + "Loaded funasr_onnx model with CPU fallback after device load failure" + ) + + def _resolve_onnx_model_dir(self) -> Path: + raw_model = self._config.model_name + canonical_model = raw_model + if raw_model == "iic/SenseVoiceSmall": + canonical_model = "iic/SenseVoiceSmall-onnx" + + if canonical_model.startswith("iic/"): + return self._download_modelscope_snapshot(canonical_model) + + path_candidate = Path(canonical_model) + if not path_candidate.exists(): + return path_candidate + + if self._contains_onnx_model(path_candidate): + return path_candidate + + raise RuntimeError( + f"ONNX model directory {path_candidate} exists but model_quant.onnx/model.onnx is missing" + ) + + @staticmethod + def _contains_onnx_model(model_dir: Path) -> bool: + return (model_dir / "model_quant.onnx").exists() or ( + model_dir / "model.onnx" + ).exists() + + @staticmethod + def _download_modelscope_snapshot(model_id: str) -> Path: + try: + snapshot_mod = importlib.import_module("modelscope.hub.snapshot_download") + except Exception as error: + raise RuntimeError( + "modelscope is required to download ONNX model snapshots" + ) from error + + snapshot_download = cast( + _SnapshotDownloadFn, + getattr(snapshot_mod, "snapshot_download"), + ) + snapshot_path = snapshot_download(model_id) + model_dir = Path(snapshot_path) + if not model_dir.exists(): + raise RuntimeError(f"Downloaded model path does not exist: {snapshot_path}") + if not _FunASRONNXBackend._contains_onnx_model(model_dir): + raise RuntimeError( + f"Downloaded model {model_id} missing model_quant.onnx/model.onnx" + ) + return model_dir + + @staticmethod + def _resolve_onnx_device_id(device: str) -> str: + normalized = device.strip().lower() + if normalized == "cpu": + return "-1" + if normalized.startswith("cuda"): + parts = normalized.split(":", 1) + return parts[1] if len(parts) > 1 and parts[1] else "0" + return "-1" + + @staticmethod + def _resolve_device_label(device: str) -> str: + normalized = device.strip().lower() + if normalized.startswith("cuda"): + return normalized + return "cpu" + + def _ensure_tokenizer_file(self, model_dir: Path) -> None: + target = model_dir / "chn_jpn_yue_eng_ko_spectok.bpe.model" + if target.exists(): + return + + fallback = ( + Path.home() + / ".cache/modelscope/hub/models/iic/SenseVoiceSmall/chn_jpn_yue_eng_ko_spectok.bpe.model" + ) + if fallback.exists(): + model_dir.mkdir(parents=True, exist_ok=True) + _ = target.write_bytes(fallback.read_bytes()) + return + + raise RuntimeError( + "Tokenizer file chn_jpn_yue_eng_ko_spectok.bpe.model is missing and no fallback was found" + ) + + @staticmethod + def _load_onnx_class() -> _ONNXSenseVoiceCtor: + module = importlib.import_module("funasr_onnx") + return cast(_ONNXSenseVoiceCtor, getattr(module, "SenseVoiceSmall")) + + @staticmethod + def _load_postprocess() -> _PostprocessFn: + try: + post_module = importlib.import_module("funasr.utils.postprocess_utils") + return cast( + _PostprocessFn, + getattr(post_module, "rich_transcription_postprocess"), + ) + except Exception: + try: + post_module = importlib.import_module( + "funasr_onnx.utils.postprocess_utils" + ) + return cast( + _PostprocessFn, + getattr(post_module, "rich_transcription_postprocess"), + ) + except Exception: + return _strip_sensevoice_control_tokens + + +_SENSEVOICE_CONTROL_TOKEN_RE = re.compile(r"<\|[^|>]+\|>") + + +def _strip_sensevoice_control_tokens(text: str) -> str: + cleaned = _SENSEVOICE_CONTROL_TOKEN_RE.sub("", text) + return " ".join(cleaned.split()).strip() + + +class _TranscriberProtocol(Protocol): + device_in_use: str + + def transcribe(self, audio_path: Path) -> str: ... + + +class _PostprocessFn(Protocol): + def __call__(self, text: str) -> str: ... + + +class _ONNXSenseVoiceModel(Protocol): + def __call__( + self, + wav_content: str, + *, + language: str, + textnorm: str, + ) -> list[str]: ... + + +class _ONNXSenseVoiceCtor(Protocol): + def __call__( + self, + *, + model_dir: str, + batch_size: int, + device_id: str, + quantize: bool, + cache_dir: str | None, + ) -> _ONNXSenseVoiceModel: ... + + +class _SnapshotDownloadFn(Protocol): + def __call__(self, model_id: str) -> str: ... diff --git a/vibemouse/deploy.py b/vibemouse/deploy.py index dd08ec0..218bae7 100644 --- a/vibemouse/deploy.py +++ b/vibemouse/deploy.py @@ -1,268 +1,4 @@ -from __future__ import annotations +from importlib import import_module as _import_module +import sys as _sys -import argparse -import shlex -import shutil -import subprocess -import sys -from pathlib import Path -from typing import cast - -from vibemouse.doctor import run_doctor - - -_PRESET_OVERRIDES: dict[str, dict[str, str]] = { - "stable": { - "VIBEMOUSE_AUTO_PASTE": "true", - "VIBEMOUSE_BUTTON_DEBOUNCE_MS": "220", - "VIBEMOUSE_PREWARM_ON_START": "true", - "VIBEMOUSE_OPENCLAW_RETRIES": "1", - }, - "fast": { - "VIBEMOUSE_AUTO_PASTE": "true", - "VIBEMOUSE_BUTTON_DEBOUNCE_MS": "120", - "VIBEMOUSE_PREWARM_ON_START": "true", - "VIBEMOUSE_OPENCLAW_RETRIES": "2", - }, - "low-resource": { - "VIBEMOUSE_AUTO_PASTE": "false", - "VIBEMOUSE_BUTTON_DEBOUNCE_MS": "250", - "VIBEMOUSE_PREWARM_ON_START": "false", - "VIBEMOUSE_OPENCLAW_RETRIES": "0", - }, -} - - -def configure_deploy_parser(parser: argparse.ArgumentParser) -> None: - _ = parser.add_argument( - "--preset", - choices=sorted(_PRESET_OVERRIDES.keys()), - default="stable", - help="deployment preset profile", - ) - _ = parser.add_argument( - "--env-file", - default=str(Path.home() / ".config" / "vibemouse" / "deploy.env"), - help="path to generated EnvironmentFile", - ) - _ = parser.add_argument( - "--service-file", - default=str(Path.home() / ".config" / "systemd" / "user" / "vibemouse.service"), - help="path to generated systemd user service file", - ) - _ = parser.add_argument( - "--log-file", - default=str(Path.home() / ".local" / "state" / "vibemouse" / "service.log"), - help="path to persistent service log file", - ) - _ = parser.add_argument( - "--openclaw-command", - default=shutil.which("openclaw") or "openclaw", - help="OpenClaw command prefix", - ) - _ = parser.add_argument( - "--openclaw-agent", - default="main", - help="OpenClaw agent id used for rear-button routing", - ) - _ = parser.add_argument( - "--openclaw-retries", - type=int, - default=None, - help="override retries for OpenClaw spawn failures", - ) - _ = parser.add_argument( - "--exec-start", - default=None, - help="override ExecStart command", - ) - _ = parser.add_argument( - "--skip-systemctl", - action="store_true", - help="skip systemctl enable/restart operations", - ) - _ = parser.add_argument( - "--dry-run", - action="store_true", - help="print plan without writing files", - ) - - -def run_deploy(args: argparse.Namespace) -> int: - preset = str(getattr(args, "preset", "stable")) - if preset not in _PRESET_OVERRIDES: - print(f"Unknown preset: {preset}") - return 1 - - openclaw_command = str(getattr(args, "openclaw_command", "openclaw")).strip() - if not openclaw_command: - print("--openclaw-command must not be empty") - return 1 - - openclaw_agent = str(getattr(args, "openclaw_agent", "main")).strip() or "main" - - retries_override = cast(int | None, getattr(args, "openclaw_retries", None)) - - if retries_override is not None and retries_override < 0: - print("--openclaw-retries must be non-negative") - return 1 - - env_path = Path(str(getattr(args, "env_file", ""))).expanduser() - service_path = Path(str(getattr(args, "service_file", ""))).expanduser() - log_path = Path(str(getattr(args, "log_file", ""))).expanduser() - exec_start = _resolve_exec_start(str(getattr(args, "exec_start", "") or "")) - - env_map = build_deploy_env( - preset=preset, - openclaw_command=openclaw_command, - openclaw_agent=openclaw_agent, - openclaw_retries=retries_override, - ) - env_content = render_env_file(env_map) - service_content = render_service_file( - env_file=env_path, - log_file=log_path, - exec_start=exec_start, - ) - - dry_run = bool(getattr(args, "dry_run", False)) - if dry_run: - print(f"[DRY-RUN] would write {env_path}") - print(f"[DRY-RUN] would write {service_path}") - print(f"[DRY-RUN] preset={preset}") - print(f"[DRY-RUN] exec_start={exec_start}") - return 0 - - _write_text(env_path, env_content) - _write_text(service_path, service_content) - print(f"Wrote {env_path}") - print(f"Wrote {service_path}") - - if not bool(getattr(args, "skip_systemctl", False)): - service_name = service_path.name - if not _run_systemctl(["daemon-reload"]): - return 1 - if not _run_systemctl(["enable", "--now", service_name]): - return 1 - if not _run_systemctl(["is-active", service_name]): - return 1 - - print("Running doctor checks...") - return run_doctor() - - -def build_deploy_env( - *, - preset: str, - openclaw_command: str, - openclaw_agent: str, - openclaw_retries: int | None, -) -> dict[str, str]: - base = { - "VIBEMOUSE_BACKEND": "funasr_onnx", - "VIBEMOUSE_DEVICE": "cpu", - "VIBEMOUSE_FALLBACK_CPU": "true", - "VIBEMOUSE_ENTER_MODE": "enter", - "VIBEMOUSE_OPENCLAW_COMMAND": openclaw_command, - "VIBEMOUSE_OPENCLAW_AGENT": openclaw_agent, - "VIBEMOUSE_OPENCLAW_TIMEOUT_S": "20.0", - "VIBEMOUSE_STATUS_FILE": "%t/vibemouse-status.json", - } - base.update(_PRESET_OVERRIDES[preset]) - if openclaw_retries is not None: - base["VIBEMOUSE_OPENCLAW_RETRIES"] = str(openclaw_retries) - return base - - -def render_env_file(env_map: dict[str, str]) -> str: - lines = [ - "# Generated by `vibemouse deploy`.", - "# Edit values if needed, then: systemctl --user restart vibemouse.service", - ] - for key in sorted(env_map.keys()): - lines.append(f"{key}={_quote_env_value(env_map[key])}") - lines.append("") - return "\n".join(lines) - - -def render_service_file(*, env_file: Path, log_file: Path, exec_start: str) -> str: - env_file_str = env_file.as_posix() - log_file_str = log_file.as_posix() - log_dir = log_file.parent.as_posix() - lines = [ - "[Unit]", - "Description=VibeMouse voice input service", - "After=graphical-session.target", - "PartOf=graphical-session.target", - "", - "[Service]", - "Type=simple", - f"EnvironmentFile={env_file_str}", - f"ExecStartPre=/usr/bin/mkdir -p {log_dir}", - f"ExecStart={exec_start}", - f"StandardOutput=append:{log_file_str}", - f"StandardError=append:{log_file_str}", - "Restart=on-failure", - "RestartSec=2", - "", - "[Install]", - "WantedBy=default.target", - "", - ] - return "\n".join(lines) - - -def _quote_env_value(value: str) -> str: - escaped = value.replace("\\", "\\\\").replace('"', '\\"') - return f'"{escaped}"' - - -def _resolve_exec_start(raw_exec_start: str) -> str: - cleaned = raw_exec_start.strip() - if cleaned: - return cleaned - - vibemouse_bin = shutil.which("vibemouse") - if vibemouse_bin: - return f"{vibemouse_bin} run" - - python_bin = sys.executable - return f"{python_bin} -m vibemouse.main run" - - -def _write_text(path: Path, content: str) -> None: - path.parent.mkdir(parents=True, exist_ok=True) - _ = path.write_text(content, encoding="utf-8") - - -def _run_systemctl(args: list[str]) -> bool: - cmd = ["systemctl", "--user", *args] - try: - proc = subprocess.run( - cmd, - capture_output=True, - text=True, - check=False, - timeout=12.0, - ) - except (OSError, subprocess.TimeoutExpired) as error: - print(f"Failed to run {' '.join(cmd)}: {error}") - return False - - if proc.returncode == 0: - return True - - stderr = proc.stderr.strip() - if stderr: - print(f"systemctl {' '.join(args)} failed: {stderr}") - else: - print(f"systemctl {' '.join(args)} failed with code {proc.returncode}") - return False - - -def validate_openclaw_command(raw: str) -> bool: - try: - parts = shlex.split(raw) - except ValueError: - return False - return bool(parts) +_sys.modules[__name__] = _import_module("vibemouse.ops.deploy") diff --git a/vibemouse/doctor.py b/vibemouse/doctor.py index 3cdbac1..a033cf6 100644 --- a/vibemouse/doctor.py +++ b/vibemouse/doctor.py @@ -1,610 +1,4 @@ -from __future__ import annotations +from importlib import import_module as _import_module +import sys as _sys -import importlib -import json -import shlex -import shutil -import subprocess -import sys -from collections.abc import Iterable, Mapping -from dataclasses import dataclass -from pathlib import Path - -from vibemouse.config import AppConfig, load_config - - -@dataclass(frozen=True) -class DoctorCheck: - name: str - status: str - detail: str - - -def run_doctor(*, apply_fixes: bool = False) -> int: - if apply_fixes: - _apply_doctor_fixes() - - checks: list[DoctorCheck] = [] - - config_check, config = _check_config_load() - checks.append(config_check) - - if config is not None: - checks.extend(_check_openclaw(config)) - - checks.append(_check_audio_input(config)) - checks.append(_check_input_device_permissions(config)) - - checks.append(_check_hyprland_return_bind_conflict(config)) - checks.append(_check_user_service_state()) - - _print_checks(checks) - - fail_count = sum(1 for check in checks if check.status == "fail") - warn_count = sum(1 for check in checks if check.status == "warn") - print(f"Doctor summary: {len(checks)} checks, {fail_count} fail, {warn_count} warn") - return 1 if fail_count else 0 - - -def _apply_doctor_fixes() -> None: - _fix_hyprland_return_bind_conflict() - _ensure_user_service_active() - - -def _fix_hyprland_return_bind_conflict() -> None: - bind_path = Path.home() / ".config/hypr/UserConfigs/UserKeybinds.conf" - if not bind_path.exists(): - return - - try: - lines = bind_path.read_text(encoding="utf-8", errors="ignore").splitlines() - except OSError: - return - - changed = False - rewritten: list[str] = [] - for line in lines: - stripped = line.strip() - if ( - stripped.startswith("#") - or "sendshortcut" not in stripped - or "Return" not in stripped - ): - rewritten.append(line) - continue - - if "mouse:275" in stripped or "mouse:276" in stripped: - rewritten.append(f"# {line} # auto-disabled by vibemouse doctor --fix") - changed = True - continue - - rewritten.append(line) - - if not changed: - return - - try: - bind_path.write_text("\n".join(rewritten) + "\n", encoding="utf-8") - except OSError: - return - - _ = _run_subprocess( - ["hyprctl", "reload", "config-only"], - timeout=3.0, - ) - - -def _ensure_user_service_active() -> None: - probe = _run_subprocess( - ["systemctl", "--user", "is-active", "vibemouse.service"], - timeout=3.0, - ) - if probe is None: - return - if probe.returncode == 0 and probe.stdout.strip() == "active": - return - - _ = _run_subprocess( - ["systemctl", "--user", "restart", "vibemouse.service"], - timeout=8.0, - ) - - -def _check_config_load() -> tuple[DoctorCheck, AppConfig | None]: - try: - config = load_config() - except Exception as error: - return ( - DoctorCheck( - name="config", - status="fail", - detail=f"failed to load config: {error}", - ), - None, - ) - - return ( - DoctorCheck( - name="config", - status="ok", - detail=( - "loaded " - + f"front={config.front_button}, rear={config.rear_button}, " - + f"openclaw_agent={config.openclaw_agent or 'none'}" - ), - ), - config, - ) - - -def _check_openclaw(config: AppConfig) -> list[DoctorCheck]: - checks: list[DoctorCheck] = [] - - command_parts = _parse_openclaw_command(config.openclaw_command) - if command_parts is None: - checks.append( - DoctorCheck( - name="openclaw-command", - status="fail", - detail="invalid VIBEMOUSE_OPENCLAW_COMMAND shell syntax", - ) - ) - return checks - - executable = command_parts[0] - resolved = shutil.which(executable) - if resolved is None: - checks.append( - DoctorCheck( - name="openclaw-command", - status="fail", - detail=f"executable not found in PATH: {executable}", - ) - ) - return checks - - checks.append( - DoctorCheck( - name="openclaw-command", - status="ok", - detail=f"resolved executable: {resolved}", - ) - ) - - configured_agent = config.openclaw_agent - if not configured_agent: - checks.append( - DoctorCheck( - name="openclaw-agent", - status="warn", - detail="no agent configured; set VIBEMOUSE_OPENCLAW_AGENT", - ) - ) - return checks - - probe_cmd = [*command_parts, "agents", "list", "--json"] - try: - probe = subprocess.run( - probe_cmd, - capture_output=True, - text=True, - check=False, - timeout=8.0, - ) - except subprocess.TimeoutExpired: - checks.append( - DoctorCheck( - name="openclaw-agent", - status="warn", - detail="timed out while probing available agents", - ) - ) - return checks - except OSError as error: - checks.append( - DoctorCheck( - name="openclaw-agent", - status="warn", - detail=f"failed to run agent probe: {error}", - ) - ) - return checks - - if probe.returncode != 0: - stderr = probe.stderr.strip() - checks.append( - DoctorCheck( - name="openclaw-agent", - status="warn", - detail=( - "agent probe failed" - if not stderr - else f"agent probe failed: {stderr}" - ), - ) - ) - return checks - - try: - payload = json.loads(probe.stdout) - except json.JSONDecodeError: - checks.append( - DoctorCheck( - name="openclaw-agent", - status="warn", - detail="agent probe returned invalid JSON", - ) - ) - return checks - - if not isinstance(payload, list): - checks.append( - DoctorCheck( - name="openclaw-agent", - status="warn", - detail="agent probe returned unexpected payload shape", - ) - ) - return checks - - available_agents = { - str(entry.get("id", "")).strip() for entry in payload if isinstance(entry, dict) - } - if configured_agent in available_agents: - checks.append( - DoctorCheck( - name="openclaw-agent", - status="ok", - detail=f"configured agent exists: {configured_agent}", - ) - ) - else: - sample = ", ".join(sorted(agent for agent in available_agents if agent)[:5]) - checks.append( - DoctorCheck( - name="openclaw-agent", - status="warn", - detail=( - f"configured agent not found: {configured_agent}; " - + (f"available: {sample}" if sample else "no agents listed") - ), - ) - ) - - return checks - - -def _check_audio_input(config: AppConfig | None) -> DoctorCheck: - try: - sounddevice = importlib.import_module("sounddevice") - except Exception as error: - return DoctorCheck( - name="audio-input", - status="fail", - detail=f"cannot import sounddevice: {error}", - ) - - query_devices = getattr(sounddevice, "query_devices", None) - if not callable(query_devices): - return DoctorCheck( - name="audio-input", - status="fail", - detail="sounddevice.query_devices is unavailable", - ) - - try: - devices_obj = query_devices() - except Exception as error: - return DoctorCheck( - name="audio-input", - status="fail", - detail=f"failed to query audio devices: {error}", - ) - - device_entries = _coerce_device_entries(devices_obj) - if device_entries is None: - return DoctorCheck( - name="audio-input", - status="warn", - detail="unexpected audio device payload shape", - ) - - input_devices: list[Mapping[str, object]] = [] - for item in device_entries: - max_inputs = _to_float(item.get("max_input_channels", 0.0)) - if max_inputs > 0: - input_devices.append(item) - if not input_devices: - return DoctorCheck( - name="audio-input", - status="fail", - detail="no input-capable microphone device detected", - ) - - default_index = _read_default_input_device_index(sounddevice) - check_input_settings = getattr(sounddevice, "check_input_settings", None) - if default_index is not None and callable(check_input_settings): - sample_rate = float(config.sample_rate) if config is not None else 16000.0 - channels = config.channels if config is not None else 1 - try: - _ = check_input_settings( - device=default_index, - channels=max(1, int(channels)), - samplerate=sample_rate, - ) - except Exception as error: - return DoctorCheck( - name="audio-input", - status="warn", - detail=f"default input exists but validation failed: {error}", - ) - - return DoctorCheck( - name="audio-input", - status="ok", - detail=f"detected {len(input_devices)} input-capable device(s)", - ) - - -def _check_input_device_permissions(config: AppConfig | None) -> DoctorCheck: - if not sys.platform.startswith("linux"): - return DoctorCheck( - name="input-device-permissions", - status="warn", - detail="raw input permission check is only available on Linux", - ) - - try: - evdev_module = importlib.import_module("evdev") - except Exception as error: - return DoctorCheck( - name="input-device-permissions", - status="warn", - detail=f"cannot import evdev for raw input check: {error}", - ) - - list_devices = getattr(evdev_module, "list_devices", None) - input_device_ctor = getattr(evdev_module, "InputDevice", None) - ecodes = getattr(evdev_module, "ecodes", None) - if not callable(list_devices) or input_device_ctor is None or ecodes is None: - return DoctorCheck( - name="input-device-permissions", - status="warn", - detail="evdev module is missing required APIs", - ) - - try: - device_paths_obj = list_devices() - except Exception as error: - return DoctorCheck( - name="input-device-permissions", - status="warn", - detail=f"failed to list /dev/input devices: {error}", - ) - - if not isinstance(device_paths_obj, list): - return DoctorCheck( - name="input-device-permissions", - status="warn", - detail="unexpected device-path payload from evdev", - ) - - device_paths = [str(path) for path in device_paths_obj] - if not device_paths: - return DoctorCheck( - name="input-device-permissions", - status="warn", - detail="no /dev/input/event* devices were found", - ) - - ev_key = int(getattr(ecodes, "EV_KEY", 1)) - btn_side = int(getattr(ecodes, "BTN_SIDE", 0x116)) - btn_extra = int(getattr(ecodes, "BTN_EXTRA", 0x117)) - side_button_codes = {btn_side, btn_extra} - - accessible = 0 - side_capable = 0 - permission_denied = 0 - - for path in device_paths: - try: - device = input_device_ctor(path) - except PermissionError: - permission_denied += 1 - continue - except Exception: - continue - - try: - capabilities_obj = device.capabilities() - accessible += 1 - if isinstance(capabilities_obj, dict): - keys_obj = capabilities_obj.get(ev_key, []) - keys = {int(code) for code in keys_obj if isinstance(code, int)} - if side_button_codes & keys: - side_capable += 1 - finally: - try: - device.close() - except Exception: - pass - - if accessible == 0 and permission_denied > 0: - return DoctorCheck( - name="input-device-permissions", - status="fail", - detail=( - "cannot access /dev/input event devices (permission denied); " - + "add user to input group or configure udev rules" - ), - ) - - if accessible == 0: - return DoctorCheck( - name="input-device-permissions", - status="warn", - detail="no readable /dev/input event devices were found", - ) - - rear_button = config.rear_button if config is not None else "x2" - if side_capable == 0: - return DoctorCheck( - name="input-device-permissions", - status="warn", - detail=( - f"{accessible} input device(s) readable but none expose side-button codes " - + f"for rear={rear_button}" - ), - ) - - return DoctorCheck( - name="input-device-permissions", - status="ok", - detail=( - f"{accessible} readable input device(s), " - + f"{side_capable} with side-button capability" - ), - ) - - -def _read_default_input_device_index(sounddevice: object) -> int | None: - default_obj = getattr(sounddevice, "default", None) - if default_obj is None: - return None - - device_attr = getattr(default_obj, "device", None) - if not isinstance(device_attr, tuple | list) or len(device_attr) < 1: - return None - - raw_input_index = device_attr[0] - if not isinstance(raw_input_index, int): - return None - if raw_input_index < 0: - return None - return raw_input_index - - -def _coerce_device_entries(devices_obj: object) -> list[Mapping[str, object]] | None: - if isinstance(devices_obj, list): - return [entry for entry in devices_obj if isinstance(entry, Mapping)] - - if isinstance(devices_obj, Iterable): - entries: list[Mapping[str, object]] = [] - for entry in devices_obj: - if isinstance(entry, Mapping): - entries.append(entry) - return entries - - return None - - -def _to_float(value: object) -> float: - if isinstance(value, int | float): - return float(value) - if isinstance(value, str): - try: - return float(value.strip()) - except ValueError: - return 0.0 - return 0.0 - - -def _check_hyprland_return_bind_conflict(config: AppConfig | None) -> DoctorCheck: - bind_path = Path.home() / ".config/hypr/UserConfigs/UserKeybinds.conf" - if not bind_path.exists(): - return DoctorCheck( - name="hyprland-bind-conflict", - status="warn", - detail=f"file not found: {bind_path}", - ) - - rear_button = config.rear_button if config is not None else "x2" - rear_mouse_code = "mouse:275" if rear_button == "x1" else "mouse:276" - - lines = bind_path.read_text(encoding="utf-8", errors="ignore").splitlines() - for idx, raw_line in enumerate(lines, start=1): - line = raw_line.strip() - if not line or line.startswith("#"): - continue - if rear_mouse_code in line and "sendshortcut" in line and "Return" in line: - return DoctorCheck( - name="hyprland-bind-conflict", - status="fail", - detail=( - f"conflicting return bind found at {bind_path}:{idx}; " - + "disable it to let VibeMouse control rear-button behavior" - ), - ) - - return DoctorCheck( - name="hyprland-bind-conflict", - status="ok", - detail=f"no conflicting {rear_mouse_code} return bind found", - ) - - -def _check_user_service_state() -> DoctorCheck: - probe = _run_subprocess( - ["systemctl", "--user", "is-active", "vibemouse.service"], - timeout=3.0, - ) - if probe is None: - return DoctorCheck( - name="user-service", - status="warn", - detail="could not query service state", - ) - - state = probe.stdout.strip() or "unknown" - if state == "active": - return DoctorCheck( - name="user-service", - status="ok", - detail="vibemouse.service is active", - ) - - return DoctorCheck( - name="user-service", - status="warn", - detail=f"vibemouse.service state is {state}", - ) - - -def _run_subprocess( - cmd: list[str], - *, - timeout: float, -) -> subprocess.CompletedProcess[str] | None: - try: - return subprocess.run( - cmd, - capture_output=True, - text=True, - check=False, - timeout=timeout, - ) - except (OSError, subprocess.TimeoutExpired): - return None - - -def _parse_openclaw_command(raw: str) -> list[str] | None: - cleaned = raw.strip() - if not cleaned: - return None - try: - parts = shlex.split(cleaned) - except ValueError: - return None - if not parts: - return None - return parts - - -def _print_checks(checks: list[DoctorCheck]) -> None: - for check in checks: - badge = { - "ok": "[OK]", - "warn": "[WARN]", - "fail": "[FAIL]", - }.get(check.status, "[INFO]") - print(f"{badge} {check.name}: {check.detail}") +_sys.modules[__name__] = _import_module("vibemouse.ops.doctor") diff --git a/vibemouse/keyboard_listener.py b/vibemouse/keyboard_listener.py index 117bb7a..696609c 100644 --- a/vibemouse/keyboard_listener.py +++ b/vibemouse/keyboard_listener.py @@ -1,181 +1,4 @@ -from __future__ import annotations +from importlib import import_module as _import_module +import sys as _sys -import importlib -import select -import threading -import time -from collections.abc import Callable -from typing import Protocol, cast - - -HotkeyCallback = Callable[[], None] - - -class KeyboardHotkeyListener: - def __init__( - self, - *, - on_hotkey: HotkeyCallback, - keycodes: tuple[int, ...], - debounce_s: float = 0.15, - rescan_interval_s: float = 2.0, - ) -> None: - if not keycodes: - raise ValueError("keycodes must not be empty") - self._on_hotkey: HotkeyCallback = on_hotkey - self._combo: frozenset[int] = frozenset(keycodes) - self._debounce_s: float = max(0.0, debounce_s) - self._rescan_interval_s: float = max(0.2, rescan_interval_s) - self._state_lock: threading.Lock = threading.Lock() - self._pressed: set[int] = set() - self._combo_latched: bool = False - self._last_fire_monotonic: float = 0.0 - self._stop: threading.Event = threading.Event() - self._thread: threading.Thread | None = None - - def start(self) -> None: - if self._thread is not None and self._thread.is_alive(): - return - self._stop.clear() - self._thread = threading.Thread(target=self._run, daemon=True) - self._thread.start() - - def stop(self) -> None: - self._stop.set() - if self._thread is not None: - self._thread.join(timeout=2) - - def _run(self) -> None: - last_error_summary: str | None = None - while not self._stop.is_set(): - try: - self._run_evdev() - self._reset_pressed_state() - continue - except Exception as error: - summary = f"Keyboard hotkey listener unavailable ({error}). Retrying..." - if summary != last_error_summary: - print(summary) - last_error_summary = summary - self._reset_pressed_state() - if self._stop.wait(1.0): - return - - def _run_evdev(self) -> None: - try: - evdev_module = importlib.import_module("evdev") - except Exception as error: - raise RuntimeError("evdev is not available") from error - - input_device_ctor = cast(_InputDeviceCtor, getattr(evdev_module, "InputDevice")) - ecodes = cast(_Ecodes, getattr(evdev_module, "ecodes")) - list_devices = cast(_ListDevicesFn, getattr(evdev_module, "list_devices")) - - devices: list[_EvdevDevice] = [] - for path in list_devices(): - try: - dev = input_device_ctor(path) - except Exception: - continue - try: - caps = dev.capabilities() - key_cap = caps.get(ecodes.EV_KEY, []) - if not any(code in key_cap for code in self._combo): - dev.close() - continue - if ecodes.KEY_A not in key_cap: - dev.close() - continue - devices.append(dev) - except Exception: - dev.close() - - if not devices: - raise RuntimeError("No keyboard input device with required keycodes found") - - try: - fd_map: dict[int, _EvdevDevice] = {dev.fd: dev for dev in devices} - next_rescan_at = time.monotonic() + self._rescan_interval_s - while not self._stop.is_set(): - if not fd_map: - return - now = time.monotonic() - if now >= next_rescan_at: - return - - timeout_s = min(0.2, max(0.0, next_rescan_at - now)) - try: - ready, _, _ = select.select(list(fd_map.keys()), [], [], timeout_s) - except (OSError, ValueError): - return - for fd in ready: - dev = fd_map[fd] - try: - events = dev.read() - except OSError: - return - for event in events: - if event.type != ecodes.EV_KEY: - continue - if self._process_key_event(event.code, event.value): - self._on_hotkey() - finally: - for dev in devices: - dev.close() - - def _reset_pressed_state(self) -> None: - with self._state_lock: - self._pressed.clear() - self._combo_latched = False - - def _process_key_event(self, keycode: int, value: int) -> bool: - with self._state_lock: - if value == 1: - self._pressed.add(keycode) - elif value == 0: - self._pressed.discard(keycode) - else: - return False - - if self._combo_latched and not self._combo.issubset(self._pressed): - self._combo_latched = False - - now = time.monotonic() - if ( - not self._combo_latched - and self._combo.issubset(self._pressed) - and now - self._last_fire_monotonic >= self._debounce_s - ): - self._combo_latched = True - self._last_fire_monotonic = now - return True - return False - - -class _EvdevEvent(Protocol): - type: int - value: int - code: int - - -class _EvdevDevice(Protocol): - fd: int - - def read(self) -> list[_EvdevEvent]: ... - - def capabilities(self) -> dict[int, list[int]]: ... - - def close(self) -> None: ... - - -class _InputDeviceCtor(Protocol): - def __call__(self, path: str) -> _EvdevDevice: ... - - -class _ListDevicesFn(Protocol): - def __call__(self) -> list[str]: ... - - -class _Ecodes(Protocol): - EV_KEY: int - KEY_A: int +_sys.modules[__name__] = _import_module("vibemouse.listener.keyboard_listener") diff --git a/vibemouse/listener/__init__.py b/vibemouse/listener/__init__.py new file mode 100644 index 0000000..a9a2c5b --- /dev/null +++ b/vibemouse/listener/__init__.py @@ -0,0 +1 @@ +__all__ = [] diff --git a/vibemouse/listener/keyboard_listener.py b/vibemouse/listener/keyboard_listener.py new file mode 100644 index 0000000..117bb7a --- /dev/null +++ b/vibemouse/listener/keyboard_listener.py @@ -0,0 +1,181 @@ +from __future__ import annotations + +import importlib +import select +import threading +import time +from collections.abc import Callable +from typing import Protocol, cast + + +HotkeyCallback = Callable[[], None] + + +class KeyboardHotkeyListener: + def __init__( + self, + *, + on_hotkey: HotkeyCallback, + keycodes: tuple[int, ...], + debounce_s: float = 0.15, + rescan_interval_s: float = 2.0, + ) -> None: + if not keycodes: + raise ValueError("keycodes must not be empty") + self._on_hotkey: HotkeyCallback = on_hotkey + self._combo: frozenset[int] = frozenset(keycodes) + self._debounce_s: float = max(0.0, debounce_s) + self._rescan_interval_s: float = max(0.2, rescan_interval_s) + self._state_lock: threading.Lock = threading.Lock() + self._pressed: set[int] = set() + self._combo_latched: bool = False + self._last_fire_monotonic: float = 0.0 + self._stop: threading.Event = threading.Event() + self._thread: threading.Thread | None = None + + def start(self) -> None: + if self._thread is not None and self._thread.is_alive(): + return + self._stop.clear() + self._thread = threading.Thread(target=self._run, daemon=True) + self._thread.start() + + def stop(self) -> None: + self._stop.set() + if self._thread is not None: + self._thread.join(timeout=2) + + def _run(self) -> None: + last_error_summary: str | None = None + while not self._stop.is_set(): + try: + self._run_evdev() + self._reset_pressed_state() + continue + except Exception as error: + summary = f"Keyboard hotkey listener unavailable ({error}). Retrying..." + if summary != last_error_summary: + print(summary) + last_error_summary = summary + self._reset_pressed_state() + if self._stop.wait(1.0): + return + + def _run_evdev(self) -> None: + try: + evdev_module = importlib.import_module("evdev") + except Exception as error: + raise RuntimeError("evdev is not available") from error + + input_device_ctor = cast(_InputDeviceCtor, getattr(evdev_module, "InputDevice")) + ecodes = cast(_Ecodes, getattr(evdev_module, "ecodes")) + list_devices = cast(_ListDevicesFn, getattr(evdev_module, "list_devices")) + + devices: list[_EvdevDevice] = [] + for path in list_devices(): + try: + dev = input_device_ctor(path) + except Exception: + continue + try: + caps = dev.capabilities() + key_cap = caps.get(ecodes.EV_KEY, []) + if not any(code in key_cap for code in self._combo): + dev.close() + continue + if ecodes.KEY_A not in key_cap: + dev.close() + continue + devices.append(dev) + except Exception: + dev.close() + + if not devices: + raise RuntimeError("No keyboard input device with required keycodes found") + + try: + fd_map: dict[int, _EvdevDevice] = {dev.fd: dev for dev in devices} + next_rescan_at = time.monotonic() + self._rescan_interval_s + while not self._stop.is_set(): + if not fd_map: + return + now = time.monotonic() + if now >= next_rescan_at: + return + + timeout_s = min(0.2, max(0.0, next_rescan_at - now)) + try: + ready, _, _ = select.select(list(fd_map.keys()), [], [], timeout_s) + except (OSError, ValueError): + return + for fd in ready: + dev = fd_map[fd] + try: + events = dev.read() + except OSError: + return + for event in events: + if event.type != ecodes.EV_KEY: + continue + if self._process_key_event(event.code, event.value): + self._on_hotkey() + finally: + for dev in devices: + dev.close() + + def _reset_pressed_state(self) -> None: + with self._state_lock: + self._pressed.clear() + self._combo_latched = False + + def _process_key_event(self, keycode: int, value: int) -> bool: + with self._state_lock: + if value == 1: + self._pressed.add(keycode) + elif value == 0: + self._pressed.discard(keycode) + else: + return False + + if self._combo_latched and not self._combo.issubset(self._pressed): + self._combo_latched = False + + now = time.monotonic() + if ( + not self._combo_latched + and self._combo.issubset(self._pressed) + and now - self._last_fire_monotonic >= self._debounce_s + ): + self._combo_latched = True + self._last_fire_monotonic = now + return True + return False + + +class _EvdevEvent(Protocol): + type: int + value: int + code: int + + +class _EvdevDevice(Protocol): + fd: int + + def read(self) -> list[_EvdevEvent]: ... + + def capabilities(self) -> dict[int, list[int]]: ... + + def close(self) -> None: ... + + +class _InputDeviceCtor(Protocol): + def __call__(self, path: str) -> _EvdevDevice: ... + + +class _ListDevicesFn(Protocol): + def __call__(self) -> list[str]: ... + + +class _Ecodes(Protocol): + EV_KEY: int + KEY_A: int diff --git a/vibemouse/listener/mouse_listener.py b/vibemouse/listener/mouse_listener.py new file mode 100644 index 0000000..a632418 --- /dev/null +++ b/vibemouse/listener/mouse_listener.py @@ -0,0 +1,944 @@ +from __future__ import annotations + +import importlib +import json +import logging +import subprocess +import threading +import time +from collections.abc import Callable +from typing import Protocol, cast + +from vibemouse.platform.system_integration import ( + SystemIntegration, + create_system_integration, + is_browser_window_payload, +) + + +ButtonCallback = Callable[[], None] +GestureCallback = Callable[[str], None] +_LOG = logging.getLogger(__name__) + + +class SideButtonListener: + def __init__( + self, + on_front_press: ButtonCallback, + on_rear_press: ButtonCallback, + front_button: str, + rear_button: str, + debounce_s: float = 0.15, + on_gesture: GestureCallback | None = None, + gestures_enabled: bool = False, + gesture_trigger_button: str = "rear", + gesture_threshold_px: int = 120, + gesture_freeze_pointer: bool = True, + gesture_restore_cursor: bool = True, + system_integration: SystemIntegration | None = None, + rescan_interval_s: float = 2.0, + ) -> None: + if gesture_trigger_button not in {"front", "rear", "right"}: + raise ValueError( + "gesture_trigger_button must be one of: front, rear, right" + ) + self._on_front_press: ButtonCallback = on_front_press + self._on_rear_press: ButtonCallback = on_rear_press + self._on_gesture: GestureCallback | None = on_gesture + self._front_button: str = front_button + self._rear_button: str = rear_button + self._debounce_s: float = max(0.0, debounce_s) + self._gestures_enabled: bool = gestures_enabled + self._gesture_trigger_button: str = gesture_trigger_button + self._gesture_threshold_px: int = max(1, gesture_threshold_px) + self._gesture_freeze_pointer: bool = gesture_freeze_pointer + self._gesture_restore_cursor: bool = gesture_restore_cursor + self._rescan_interval_s: float = max(0.2, rescan_interval_s) + self._system_integration: SystemIntegration = ( + system_integration + if system_integration is not None + else create_system_integration() + ) + self._hyprland_session: bool = self._system_integration.is_hyprland + self._last_front_press_monotonic: float = 0.0 + self._last_rear_press_monotonic: float = 0.0 + self._debounce_lock: threading.Lock = threading.Lock() + self._gesture_lock: threading.Lock = threading.Lock() + self._gesture_active: bool = False + self._gesture_dx: int = 0 + self._gesture_dy: int = 0 + self._gesture_last_position: tuple[int, int] | None = None + self._gesture_anchor_cursor: tuple[int, int] | None = None + self._gesture_started_since: float | None = None + self._gesture_trigger_label: str | None = None + self._gesture_grab_timeout_s: float = 1.2 + self._right_trigger_pressed: bool = False + self._gesture_grabbed_device: _EvdevDevice | None = None + self._button_grabbed_device: _EvdevDevice | None = None + self._button_grabbed_label: str | None = None + self._button_grabbed_since: float | None = None + self._button_grab_deadline_monotonic: float | None = None + self._button_grab_timeout_s: float = 0.22 + self._right_trigger_pressed_since: float | None = None + self._right_trigger_pending_dx: int = 0 + self._right_trigger_pending_dy: int = 0 + self._right_trigger_origin_position: tuple[int, int] | None = None + self._right_trigger_passthrough: bool = False + self._right_tap_timeout_s: float = 0.30 + self._right_click_slop_px: int = 8 + self._right_hold_suppress_timeout_s: float = 8.0 + self._stop: threading.Event = threading.Event() + self._thread: threading.Thread | None = None + + def start(self) -> None: + if self._thread is not None and self._thread.is_alive(): + return + self._stop.clear() + self._thread = threading.Thread(target=self._run, daemon=True) + self._thread.start() + + def stop(self) -> None: + self._stop.set() + self._clear_right_trigger_state() + self._release_button_grab() + self._release_gesture_grab() + if self._thread is not None: + self._thread.join(timeout=2) + + def _run(self) -> None: + last_error_summary: str | None = None + while not self._stop.is_set(): + evdev_error: Exception | None = None + try: + self._run_evdev() + continue + except Exception as error: + evdev_error = error + + try: + self._run_pynput(timeout_s=self._rescan_interval_s) + continue + except Exception as pynput_error: + summary = ( + "Mouse listener backends unavailable " + + f"(evdev: {evdev_error}; pynput: {pynput_error}). Retrying..." + ) + if summary != last_error_summary: + _LOG.warning(summary) + last_error_summary = summary + if self._stop.wait(1.0): + return + + def _run_evdev(self) -> None: + import select + + try: + evdev_module = importlib.import_module("evdev") + except Exception as error: + raise RuntimeError("evdev is not available") from error + + input_device_ctor = cast(_InputDeviceCtor, getattr(evdev_module, "InputDevice")) + ecodes = cast(_Ecodes, getattr(evdev_module, "ecodes")) + list_devices = cast(_ListDevicesFn, getattr(evdev_module, "list_devices")) + + side_code_candidates = { + "x1": { + ecodes.BTN_SIDE, + int(getattr(ecodes, "BTN_BACK", ecodes.BTN_SIDE)), + }, + "x2": { + ecodes.BTN_EXTRA, + int(getattr(ecodes, "BTN_FORWARD", ecodes.BTN_EXTRA)), + }, + } + front_codes = side_code_candidates[self._front_button] + rear_codes = side_code_candidates[self._rear_button] + trigger_code: int | None = None + if self._gestures_enabled and self._gesture_trigger_button == "right": + trigger_code = ecodes.BTN_RIGHT + + devices: list[_EvdevDevice] = [] + for path in list_devices(): + try: + dev = input_device_ctor(path) + except Exception: + continue + try: + caps = dev.capabilities() + key_cap = caps.get(ecodes.EV_KEY, []) + has_side_button = any(code in key_cap for code in {*front_codes, *rear_codes}) + if not has_side_button: + dev.close() + continue + + if trigger_code is not None and trigger_code not in key_cap: + dev.close() + continue + + btn_mouse = getattr(ecodes, "BTN_MOUSE", None) + has_pointer_button = ecodes.BTN_LEFT in key_cap or ( + isinstance(btn_mouse, int) and btn_mouse in key_cap + ) + if not has_pointer_button: + dev.close() + continue + + # Skip keyboard-like composite devices; they can emit pointer + # events but tend to make side-button suppression unstable. + if int(getattr(ecodes, "KEY_A", 30)) in key_cap: + dev.close() + continue + + devices.append(dev) + except Exception: + dev.close() + + if not devices: + raise RuntimeError("No input device with side-button capability found") + _LOG.info( + "Mouse listener using evdev with %d candidate device(s)", len(devices) + ) + + try: + fd_map: dict[int, _EvdevDevice] = {dev.fd: dev for dev in devices} + next_rescan_at = time.monotonic() + self._rescan_interval_s + while not self._stop.is_set(): + self._release_stale_button_grab() + self._release_stale_gesture_capture() + if not fd_map: + return + now = time.monotonic() + if now >= next_rescan_at: + return + + timeout_candidates = [0.2, max(0.0, next_rescan_at - now)] + button_deadline = self._button_grab_deadline_monotonic + if button_deadline is not None: + timeout_candidates.append(max(0.0, button_deadline - now)) + + gesture_started_since: float | None = None + with self._gesture_lock: + if self._gesture_active: + gesture_started_since = self._gesture_started_since + if gesture_started_since is not None: + gesture_deadline = ( + gesture_started_since + self._gesture_grab_timeout_s + ) + timeout_candidates.append(max(0.0, gesture_deadline - now)) + + timeout_s = min(timeout_candidates) + try: + ready, _, _ = select.select(list(fd_map.keys()), [], [], timeout_s) + except (OSError, ValueError): + return + for fd in ready: + dev = fd_map[fd] + try: + events = dev.read() + except OSError: + return + for event in events: + if event.type == ecodes.EV_KEY: + button_label: str | None = None + if event.code in front_codes: + button_label = "front" + elif event.code in rear_codes: + button_label = "rear" + elif ( + trigger_code is not None and event.code == trigger_code + ): + button_label = "right" + + if button_label is None: + continue + + if ( + self._gestures_enabled + and self._is_gesture_trigger_button(button_label) + ): + if button_label == "right": + if event.value == 1: + self._begin_right_trigger_press( + source_device=dev, + ) + elif event.value == 0: + ( + should_replay_right_click, + gesture_direction, + ) = self._consume_right_trigger_release() + if gesture_direction is not None: + self._dispatch_gesture(gesture_direction) + elif should_replay_right_click: + self._dispatch_click_async(button_label) + else: + if event.value == 1: + self._start_gesture_capture( + source_device=dev, + button_label=button_label, + ) + elif event.value == 0: + self._finish_gesture_capture(button_label) + continue + + if event.value == 1: + if button_label == "right": + self._begin_button_suppress( + source_device=dev, + button_label=button_label, + ) + _LOG.debug( + "Mouse click detected: label=%s code=%s", + button_label, + event.code, + ) + self._dispatch_click_async(button_label) + elif event.value == 0 and button_label == "right": + self._end_button_suppress(button_label=button_label) + continue + + if self._gestures_enabled and event.type == ecodes.EV_REL: + if ( + self._gesture_trigger_button == "right" + and self._right_trigger_pressed + ): + if event.code == ecodes.REL_X: + self._right_trigger_pending_dx += event.value + elif event.code == ecodes.REL_Y: + self._right_trigger_pending_dy += event.value + if self._maybe_dispatch_passthrough_right_gesture(): + continue + continue + if not self._gesture_active: + continue + if event.code == ecodes.REL_X: + self._accumulate_gesture_delta(dx=event.value, dy=0) + elif event.code == ecodes.REL_Y: + self._accumulate_gesture_delta(dx=0, dy=event.value) + finally: + self._clear_right_trigger_state() + self._release_button_grab() + self._release_gesture_grab() + for dev in devices: + dev.close() + + def _run_pynput(self, *, timeout_s: float | None = None) -> None: + try: + mouse_module = importlib.import_module("pynput.mouse") + except Exception as error: + raise RuntimeError("pynput.mouse is not available") from error + + listener_ctor = cast(_MouseListenerCtor, getattr(mouse_module, "Listener")) + + button_map = { + "x1": {"x1", "x_button1", "button8"}, + "x2": {"x2", "x_button2", "button9"}, + } + + front_candidates = button_map[self._front_button] + rear_candidates = button_map[self._rear_button] + right_candidates = {"right", "button2"} + + def on_click(x: int, y: int, button: object, pressed: bool) -> None: + btn_name = str(button).lower().split(".")[-1] + button_label: str | None = None + if btn_name in front_candidates: + button_label = "front" + elif btn_name in rear_candidates: + button_label = "rear" + elif btn_name in right_candidates: + button_label = "right" + + if button_label is None: + return + + if self._gestures_enabled and self._is_gesture_trigger_button(button_label): + if button_label == "right": + if pressed: + self._begin_right_trigger_press(initial_position=(x, y)) + else: + ( + should_replay_right_click, + gesture_direction, + ) = self._consume_right_trigger_release() + if gesture_direction is not None: + self._dispatch_gesture(gesture_direction) + elif should_replay_right_click: + self._dispatch_click_async(button_label) + else: + if pressed: + self._start_gesture_capture( + initial_position=(x, y), + button_label=button_label, + ) + else: + self._finish_gesture_capture(button_label) + return + + if pressed: + self._dispatch_click_async(button_label) + + def on_move(x: int, y: int) -> None: + if not self._gestures_enabled: + return + if self._gesture_trigger_button == "right" and self._right_trigger_pressed: + origin = self._right_trigger_origin_position + if origin is None: + self._right_trigger_origin_position = (x, y) + return + self._right_trigger_pending_dx = x - origin[0] + self._right_trigger_pending_dy = y - origin[1] + if self._maybe_dispatch_passthrough_right_gesture(): + return + return + self._accumulate_gesture_position(x, y) + + listener = listener_ctor(on_click=on_click, on_move=on_move) + _LOG.info("Mouse listener using pynput fallback backend") + listener.start() + deadline: float | None = None + if timeout_s is not None: + deadline = time.monotonic() + max(0.2, timeout_s) + try: + while not self._stop.is_set(): + self._release_stale_gesture_capture() + if deadline is not None and time.monotonic() >= deadline: + return + time.sleep(0.2) + finally: + self._clear_right_trigger_state() + listener.stop() + + def _dispatch_click(self, button_label: str) -> None: + if button_label == "front": + self._dispatch_front_press() + return + if button_label == "rear": + self._dispatch_rear_press() + return + if button_label == "right": + self._dispatch_right_click() + return + + def _dispatch_click_async(self, button_label: str) -> None: + worker = threading.Thread( + target=self._invoke_click_callback, + args=(button_label,), + daemon=True, + ) + worker.start() + + def _invoke_click_callback(self, button_label: str) -> None: + try: + self._dispatch_click(button_label) + except Exception: + _LOG.exception("Mouse click callback failed: button=%s", button_label) + + def _is_gesture_trigger_button(self, button_label: str) -> bool: + return button_label == self._gesture_trigger_button + + def _clear_right_trigger_state(self) -> None: + self._right_trigger_pressed = False + self._right_trigger_pressed_since = None + self._right_trigger_pending_dx = 0 + self._right_trigger_pending_dy = 0 + self._right_trigger_origin_position = None + self._right_trigger_passthrough = False + + def _should_passthrough_right_trigger(self) -> bool: + try: + payload = self._system_integration.active_window() + except Exception: + return False + + if payload is None or not is_browser_window_payload(payload): + return False + + xwayland = payload.get("xwayland") + return xwayland is False + + def _begin_right_trigger_press( + self, + *, + source_device: _EvdevDevice | None = None, + initial_position: tuple[int, int] | None = None, + ) -> None: + self._clear_right_trigger_state() + self._right_trigger_pressed = True + self._right_trigger_pressed_since = time.monotonic() + self._right_trigger_origin_position = initial_position + if self._should_passthrough_right_trigger(): + self._right_trigger_passthrough = True + return + + if source_device is not None: + self._begin_button_suppress( + source_device=source_device, + button_label="right", + ) + + def _maybe_dispatch_passthrough_right_gesture(self) -> bool: + if not self._right_trigger_passthrough: + return False + + direction = self._classify_gesture( + self._right_trigger_pending_dx, + self._right_trigger_pending_dy, + self._gesture_threshold_px, + ) + if direction is None: + return False + + self._dispatch_gesture(direction) + self._clear_right_trigger_state() + return True + + def _consume_right_trigger_release(self) -> tuple[bool, str | None]: + if self._right_trigger_passthrough: + self._clear_right_trigger_state() + return False, None + + now = time.monotonic() + pressed_since = self._right_trigger_pressed_since + direction = self._classify_gesture( + self._right_trigger_pending_dx, + self._right_trigger_pending_dy, + self._gesture_threshold_px, + ) + movement_px = max( + abs(self._right_trigger_pending_dx), + abs(self._right_trigger_pending_dy), + ) + suppressed_native_click = self._button_grabbed_label == "right" + + if suppressed_native_click: + self._end_button_suppress(button_label="right") + + should_replay = ( + direction is None + and suppressed_native_click + and pressed_since is not None + and now - pressed_since <= self._right_tap_timeout_s + and movement_px <= self._right_click_slop_px + ) + self._clear_right_trigger_state() + return should_replay, direction + + def _dispatch_right_click(self) -> None: + mouse_module = importlib.import_module("pynput.mouse") + controller_ctor = cast( + _MouseControllerCtor, + getattr(cast(object, mouse_module), "Controller"), + ) + button_holder = cast( + _MouseButtonHolder, + getattr(cast(object, mouse_module), "Button"), + ) + controller = controller_ctor() + controller.press(button_holder.right) + time.sleep(0.012) + controller.release(button_holder.right) + + def _start_gesture_capture( + self, + *, + initial_position: tuple[int, int] | None = None, + source_device: _EvdevDevice | None = None, + button_label: str | None = None, + ) -> None: + should_grab = False + with self._gesture_lock: + self._gesture_active = True + self._gesture_dx = 0 + self._gesture_dy = 0 + self._gesture_last_position = initial_position + self._gesture_started_since = time.monotonic() + self._gesture_trigger_label = button_label + if self._gesture_restore_cursor and button_label != "right": + self._gesture_anchor_cursor = self._read_cursor_position() + else: + self._gesture_anchor_cursor = None + should_grab = ( + self._gesture_freeze_pointer + and source_device is not None + and button_label != "right" + ) + + if should_grab and source_device is not None: + self._try_grab_device(source_device) + + def _accumulate_gesture_delta(self, *, dx: int, dy: int) -> None: + with self._gesture_lock: + if not self._gesture_active: + return + self._gesture_dx += dx + self._gesture_dy += dy + + def _accumulate_gesture_position(self, x: int, y: int) -> None: + with self._gesture_lock: + if not self._gesture_active: + return + if self._gesture_last_position is None: + self._gesture_last_position = (x, y) + return + last_x, last_y = self._gesture_last_position + self._gesture_dx += x - last_x + self._gesture_dy += y - last_y + self._gesture_last_position = (x, y) + + def _finish_gesture_capture(self, button_label: str) -> None: + with self._gesture_lock: + if not self._gesture_active: + return + dx = self._gesture_dx + dy = self._gesture_dy + self._gesture_active = False + self._gesture_dx = 0 + self._gesture_dy = 0 + self._gesture_last_position = None + self._gesture_started_since = None + self._gesture_trigger_label = None + if button_label == "right": + self._clear_right_trigger_state() + anchor_cursor = self._gesture_anchor_cursor + self._gesture_anchor_cursor = None + + self._release_gesture_grab() + + direction = self._classify_gesture(dx, dy, self._gesture_threshold_px) + _LOG.debug( + "Gesture capture finished: button=%s dx=%s dy=%s direction=%s", + button_label, + dx, + dy, + direction, + ) + if direction is None: + if button_label == "right": + return + self._dispatch_click_async(button_label) + return + self._dispatch_gesture(direction) + if anchor_cursor is not None: + self._restore_cursor_position(anchor_cursor) + + def _dispatch_gesture(self, direction: str) -> None: + callback = self._on_gesture + if callback is None: + return + callback(direction) + + def _try_grab_device(self, device: _EvdevDevice) -> None: + try: + device.grab() + except Exception: + return + + with self._gesture_lock: + self._gesture_grabbed_device = device + + def _release_gesture_grab(self) -> None: + with self._gesture_lock: + grabbed = self._gesture_grabbed_device + + if grabbed is None: + return + + try: + grabbed.ungrab() + except Exception: + _LOG.warning( + "Failed to release gesture mouse device; will retry ungrab", + exc_info=True, + ) + return + + with self._gesture_lock: + if self._gesture_grabbed_device is grabbed: + self._gesture_grabbed_device = None + + def _begin_button_suppress( + self, + *, + source_device: _EvdevDevice, + button_label: str, + ) -> None: + if ( + self._button_grabbed_device is source_device + and self._button_grabbed_label == button_label + ): + return + + self._release_button_grab() + try: + source_device.grab() + except Exception: + return + + grabbed_since = time.monotonic() + timeout_s = self._button_grab_timeout_s + if button_label == "right": + timeout_s = max(timeout_s, self._right_hold_suppress_timeout_s) + self._button_grabbed_device = source_device + self._button_grabbed_label = button_label + self._button_grabbed_since = grabbed_since + self._button_grab_deadline_monotonic = grabbed_since + timeout_s + + def _end_button_suppress(self, *, button_label: str) -> None: + if self._button_grabbed_label != button_label: + return + self._release_button_grab() + + def _release_button_grab(self) -> None: + grabbed = self._button_grabbed_device + if grabbed is None: + self._button_grabbed_label = None + self._button_grabbed_since = None + self._button_grab_deadline_monotonic = None + return + try: + grabbed.ungrab() + except Exception: + _LOG.warning( + "Failed to release suppressed mouse device; will retry ungrab", + exc_info=True, + ) + return + + self._button_grabbed_device = None + self._button_grabbed_label = None + self._button_grabbed_since = None + self._button_grab_deadline_monotonic = None + + def _release_stale_button_grab(self) -> None: + deadline = self._button_grab_deadline_monotonic + if deadline is None: + return + + now = time.monotonic() + if self._button_grabbed_label == "right" and self._right_trigger_pressed: + pressed_since = self._right_trigger_pressed_since + if ( + pressed_since is not None + and now - pressed_since < self._right_hold_suppress_timeout_s + ): + return + _LOG.warning( + "Right-button hold timeout reached (%.2fs); force-releasing mouse grab", + self._right_hold_suppress_timeout_s, + ) + self._clear_right_trigger_state() + self._release_button_grab() + return + + if now < deadline: + return + + timeout_s = self._button_grab_timeout_s + grabbed_since = self._button_grabbed_since + if grabbed_since is not None: + timeout_s = max(0.0, deadline - grabbed_since) + _LOG.warning( + "Button suppress timeout reached (%.2fs); force-releasing mouse grab", + timeout_s, + ) + self._release_button_grab() + + def _release_stale_gesture_capture(self) -> None: + stale_label: str | None = None + elapsed_s: float | None = None + with self._gesture_lock: + if not self._gesture_active: + return + started_since = self._gesture_started_since + if started_since is None: + return + elapsed_s = time.monotonic() - started_since + if elapsed_s < self._gesture_grab_timeout_s: + return + self._gesture_active = False + self._gesture_dx = 0 + self._gesture_dy = 0 + self._gesture_last_position = None + self._gesture_anchor_cursor = None + self._gesture_started_since = None + stale_label = self._gesture_trigger_label + self._gesture_trigger_label = None + if stale_label == "right": + self._clear_right_trigger_state() + + _LOG.warning( + "Gesture capture timeout reached (%.2fs); force-releasing input grabs (trigger=%s)", + elapsed_s, + stale_label, + ) + self._release_gesture_grab() + if stale_label is not None: + self._end_button_suppress(button_label=stale_label) + + def _read_cursor_position(self) -> tuple[int, int] | None: + try: + system_integration = self._system_integration + except AttributeError: + system_integration = None + + if system_integration is not None: + try: + return system_integration.cursor_position() + except Exception: + return None + + if not self._hyprland_session: + return None + try: + proc = subprocess.run( + ["hyprctl", "-j", "cursorpos"], + capture_output=True, + text=True, + check=False, + timeout=0.8, + ) + except (OSError, subprocess.TimeoutExpired): + return None + + if proc.returncode != 0: + return None + + try: + payload = cast(dict[str, object], json.loads(proc.stdout)) + except json.JSONDecodeError: + return None + + x_raw = payload.get("x") + y_raw = payload.get("y") + if not isinstance(x_raw, int | float) or not isinstance(y_raw, int | float): + return None + return int(x_raw), int(y_raw) + + def _restore_cursor_position(self, position: tuple[int, int]) -> None: + try: + system_integration = self._system_integration + except AttributeError: + system_integration = None + + if system_integration is not None: + x, y = position + try: + _ = system_integration.move_cursor(x=x, y=y) + except Exception: + return + return + + if not self._hyprland_session: + return + + x, y = position + try: + _ = subprocess.run( + ["hyprctl", "dispatch", "movecursor", str(x), str(y)], + capture_output=True, + text=True, + check=False, + timeout=0.8, + ) + except (OSError, subprocess.TimeoutExpired): + return + + @staticmethod + def _classify_gesture(dx: int, dy: int, threshold_px: int) -> str | None: + if max(abs(dx), abs(dy)) < threshold_px: + return None + if abs(dx) >= abs(dy): + return "right" if dx > 0 else "left" + return "down" if dy > 0 else "up" + + def _dispatch_front_press(self) -> None: + if self._should_fire_front(): + self._on_front_press() + + def _dispatch_rear_press(self) -> None: + if self._should_fire_rear(): + self._on_rear_press() + + def _should_fire_front(self) -> bool: + now = time.monotonic() + with self._debounce_lock: + if now - self._last_front_press_monotonic < self._debounce_s: + return False + self._last_front_press_monotonic = now + return True + + def _should_fire_rear(self) -> bool: + now = time.monotonic() + with self._debounce_lock: + if now - self._last_rear_press_monotonic < self._debounce_s: + return False + self._last_rear_press_monotonic = now + return True + + +class _EvdevEvent(Protocol): + type: int + value: int + code: int + + +class _EvdevDevice(Protocol): + fd: int + + def read(self) -> list[_EvdevEvent]: ... + + def capabilities(self) -> dict[int, list[int]]: ... + + def grab(self) -> None: ... + + def ungrab(self) -> None: ... + + def close(self) -> None: ... + + +class _InputDeviceCtor(Protocol): + def __call__(self, path: str) -> _EvdevDevice: ... + + +class _ListDevicesFn(Protocol): + def __call__(self) -> list[str]: ... + + +class _Ecodes(Protocol): + BTN_SIDE: int + BTN_EXTRA: int + BTN_LEFT: int + BTN_RIGHT: int + EV_KEY: int + EV_REL: int + REL_X: int + REL_Y: int + + +class _MouseListener(Protocol): + def start(self) -> None: ... + + def stop(self) -> None: ... + + +class _MouseListenerCtor(Protocol): + def __call__( + self, + *, + on_click: Callable[[int, int, object, bool], None], + on_move: Callable[[int, int], None] | None = None, + ) -> _MouseListener: ... + + +class _MouseController(Protocol): + def press(self, button: object) -> None: ... + + def release(self, button: object) -> None: ... + + +class _MouseControllerCtor(Protocol): + def __call__(self) -> _MouseController: ... + + +class _MouseButtonHolder(Protocol): + right: object diff --git a/vibemouse/logging_setup.py b/vibemouse/logging_setup.py index caf7a96..22477b0 100644 --- a/vibemouse/logging_setup.py +++ b/vibemouse/logging_setup.py @@ -1,19 +1,4 @@ -from __future__ import annotations +from importlib import import_module as _import_module +import sys as _sys -import logging - -_LOG_FORMAT = "%(asctime)s %(levelname)s [%(name)s] %(message)s" - - -def configure_logging(level_name: str) -> None: - normalized = level_name.strip().upper() - level = getattr(logging, normalized, logging.INFO) - root = logging.getLogger() - if not root.handlers: - logging.basicConfig(level=level, format=_LOG_FORMAT) - return - root.setLevel(level) - - -def get_logger(name: str) -> logging.Logger: - return logging.getLogger(name) +_sys.modules[__name__] = _import_module("vibemouse.core.logging_setup") diff --git a/vibemouse/main.py b/vibemouse/main.py index a7a589e..ced9338 100644 --- a/vibemouse/main.py +++ b/vibemouse/main.py @@ -1,51 +1,9 @@ -from __future__ import annotations - -import argparse - -from vibemouse.app import VoiceMouseApp -from vibemouse.config import load_config -from vibemouse.deploy import configure_deploy_parser, run_deploy -from vibemouse.doctor import run_doctor -from vibemouse.logging_setup import configure_logging - - -def _build_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser(prog="vibemouse") - subparsers = parser.add_subparsers(dest="command") - _ = subparsers.add_parser("run", help="run the voice-input daemon") - doctor_parser = subparsers.add_parser("doctor", help="run environment diagnostics") - _ = doctor_parser.add_argument( - "--fix", - action="store_true", - help="apply safe auto-remediations before running checks", - ) - deploy_parser = subparsers.add_parser( - "deploy", - help="generate service/env files and deploy as user service", - ) - configure_deploy_parser(deploy_parser) - return parser - - -def main(argv: list[str] | None = None) -> int: - parser = _build_parser() - args = parser.parse_args(argv) - - raw_command = getattr(args, "command", None) - command = raw_command if isinstance(raw_command, str) else "run" - if command == "doctor": - apply_fixes_raw = getattr(args, "fix", False) - apply_fixes = bool(apply_fixes_raw) - return run_doctor(apply_fixes=apply_fixes) - if command == "deploy": - return run_deploy(args) - - config = load_config() - configure_logging(config.log_level) - app = VoiceMouseApp(config) - app.run() - return 0 +from importlib import import_module as _import_module +import sys as _sys +_module = _import_module("vibemouse.cli.main") if __name__ == "__main__": - raise SystemExit(main()) + raise SystemExit(_module.main()) + +_sys.modules[__name__] = _module diff --git a/vibemouse/mouse_listener.py b/vibemouse/mouse_listener.py index 1b9d52f..99995dc 100644 --- a/vibemouse/mouse_listener.py +++ b/vibemouse/mouse_listener.py @@ -1,944 +1,4 @@ -from __future__ import annotations +from importlib import import_module as _import_module +import sys as _sys -import importlib -import json -import logging -import subprocess -import threading -import time -from collections.abc import Callable -from typing import Protocol, cast - -from vibemouse.system_integration import ( - SystemIntegration, - create_system_integration, - is_browser_window_payload, -) - - -ButtonCallback = Callable[[], None] -GestureCallback = Callable[[str], None] -_LOG = logging.getLogger(__name__) - - -class SideButtonListener: - def __init__( - self, - on_front_press: ButtonCallback, - on_rear_press: ButtonCallback, - front_button: str, - rear_button: str, - debounce_s: float = 0.15, - on_gesture: GestureCallback | None = None, - gestures_enabled: bool = False, - gesture_trigger_button: str = "rear", - gesture_threshold_px: int = 120, - gesture_freeze_pointer: bool = True, - gesture_restore_cursor: bool = True, - system_integration: SystemIntegration | None = None, - rescan_interval_s: float = 2.0, - ) -> None: - if gesture_trigger_button not in {"front", "rear", "right"}: - raise ValueError( - "gesture_trigger_button must be one of: front, rear, right" - ) - self._on_front_press: ButtonCallback = on_front_press - self._on_rear_press: ButtonCallback = on_rear_press - self._on_gesture: GestureCallback | None = on_gesture - self._front_button: str = front_button - self._rear_button: str = rear_button - self._debounce_s: float = max(0.0, debounce_s) - self._gestures_enabled: bool = gestures_enabled - self._gesture_trigger_button: str = gesture_trigger_button - self._gesture_threshold_px: int = max(1, gesture_threshold_px) - self._gesture_freeze_pointer: bool = gesture_freeze_pointer - self._gesture_restore_cursor: bool = gesture_restore_cursor - self._rescan_interval_s: float = max(0.2, rescan_interval_s) - self._system_integration: SystemIntegration = ( - system_integration - if system_integration is not None - else create_system_integration() - ) - self._hyprland_session: bool = self._system_integration.is_hyprland - self._last_front_press_monotonic: float = 0.0 - self._last_rear_press_monotonic: float = 0.0 - self._debounce_lock: threading.Lock = threading.Lock() - self._gesture_lock: threading.Lock = threading.Lock() - self._gesture_active: bool = False - self._gesture_dx: int = 0 - self._gesture_dy: int = 0 - self._gesture_last_position: tuple[int, int] | None = None - self._gesture_anchor_cursor: tuple[int, int] | None = None - self._gesture_started_since: float | None = None - self._gesture_trigger_label: str | None = None - self._gesture_grab_timeout_s: float = 1.2 - self._right_trigger_pressed: bool = False - self._gesture_grabbed_device: _EvdevDevice | None = None - self._button_grabbed_device: _EvdevDevice | None = None - self._button_grabbed_label: str | None = None - self._button_grabbed_since: float | None = None - self._button_grab_deadline_monotonic: float | None = None - self._button_grab_timeout_s: float = 0.22 - self._right_trigger_pressed_since: float | None = None - self._right_trigger_pending_dx: int = 0 - self._right_trigger_pending_dy: int = 0 - self._right_trigger_origin_position: tuple[int, int] | None = None - self._right_trigger_passthrough: bool = False - self._right_tap_timeout_s: float = 0.30 - self._right_click_slop_px: int = 8 - self._right_hold_suppress_timeout_s: float = 8.0 - self._stop: threading.Event = threading.Event() - self._thread: threading.Thread | None = None - - def start(self) -> None: - if self._thread is not None and self._thread.is_alive(): - return - self._stop.clear() - self._thread = threading.Thread(target=self._run, daemon=True) - self._thread.start() - - def stop(self) -> None: - self._stop.set() - self._clear_right_trigger_state() - self._release_button_grab() - self._release_gesture_grab() - if self._thread is not None: - self._thread.join(timeout=2) - - def _run(self) -> None: - last_error_summary: str | None = None - while not self._stop.is_set(): - evdev_error: Exception | None = None - try: - self._run_evdev() - continue - except Exception as error: - evdev_error = error - - try: - self._run_pynput(timeout_s=self._rescan_interval_s) - continue - except Exception as pynput_error: - summary = ( - "Mouse listener backends unavailable " - + f"(evdev: {evdev_error}; pynput: {pynput_error}). Retrying..." - ) - if summary != last_error_summary: - _LOG.warning(summary) - last_error_summary = summary - if self._stop.wait(1.0): - return - - def _run_evdev(self) -> None: - import select - - try: - evdev_module = importlib.import_module("evdev") - except Exception as error: - raise RuntimeError("evdev is not available") from error - - input_device_ctor = cast(_InputDeviceCtor, getattr(evdev_module, "InputDevice")) - ecodes = cast(_Ecodes, getattr(evdev_module, "ecodes")) - list_devices = cast(_ListDevicesFn, getattr(evdev_module, "list_devices")) - - side_code_candidates = { - "x1": { - ecodes.BTN_SIDE, - int(getattr(ecodes, "BTN_BACK", ecodes.BTN_SIDE)), - }, - "x2": { - ecodes.BTN_EXTRA, - int(getattr(ecodes, "BTN_FORWARD", ecodes.BTN_EXTRA)), - }, - } - front_codes = side_code_candidates[self._front_button] - rear_codes = side_code_candidates[self._rear_button] - trigger_code: int | None = None - if self._gestures_enabled and self._gesture_trigger_button == "right": - trigger_code = ecodes.BTN_RIGHT - - devices: list[_EvdevDevice] = [] - for path in list_devices(): - try: - dev = input_device_ctor(path) - except Exception: - continue - try: - caps = dev.capabilities() - key_cap = caps.get(ecodes.EV_KEY, []) - has_side_button = any(code in key_cap for code in {*front_codes, *rear_codes}) - if not has_side_button: - dev.close() - continue - - if trigger_code is not None and trigger_code not in key_cap: - dev.close() - continue - - btn_mouse = getattr(ecodes, "BTN_MOUSE", None) - has_pointer_button = ecodes.BTN_LEFT in key_cap or ( - isinstance(btn_mouse, int) and btn_mouse in key_cap - ) - if not has_pointer_button: - dev.close() - continue - - # Skip keyboard-like composite devices; they can emit pointer - # events but tend to make side-button suppression unstable. - if int(getattr(ecodes, "KEY_A", 30)) in key_cap: - dev.close() - continue - - devices.append(dev) - except Exception: - dev.close() - - if not devices: - raise RuntimeError("No input device with side-button capability found") - _LOG.info( - "Mouse listener using evdev with %d candidate device(s)", len(devices) - ) - - try: - fd_map: dict[int, _EvdevDevice] = {dev.fd: dev for dev in devices} - next_rescan_at = time.monotonic() + self._rescan_interval_s - while not self._stop.is_set(): - self._release_stale_button_grab() - self._release_stale_gesture_capture() - if not fd_map: - return - now = time.monotonic() - if now >= next_rescan_at: - return - - timeout_candidates = [0.2, max(0.0, next_rescan_at - now)] - button_deadline = self._button_grab_deadline_monotonic - if button_deadline is not None: - timeout_candidates.append(max(0.0, button_deadline - now)) - - gesture_started_since: float | None = None - with self._gesture_lock: - if self._gesture_active: - gesture_started_since = self._gesture_started_since - if gesture_started_since is not None: - gesture_deadline = ( - gesture_started_since + self._gesture_grab_timeout_s - ) - timeout_candidates.append(max(0.0, gesture_deadline - now)) - - timeout_s = min(timeout_candidates) - try: - ready, _, _ = select.select(list(fd_map.keys()), [], [], timeout_s) - except (OSError, ValueError): - return - for fd in ready: - dev = fd_map[fd] - try: - events = dev.read() - except OSError: - return - for event in events: - if event.type == ecodes.EV_KEY: - button_label: str | None = None - if event.code in front_codes: - button_label = "front" - elif event.code in rear_codes: - button_label = "rear" - elif ( - trigger_code is not None and event.code == trigger_code - ): - button_label = "right" - - if button_label is None: - continue - - if ( - self._gestures_enabled - and self._is_gesture_trigger_button(button_label) - ): - if button_label == "right": - if event.value == 1: - self._begin_right_trigger_press( - source_device=dev, - ) - elif event.value == 0: - ( - should_replay_right_click, - gesture_direction, - ) = self._consume_right_trigger_release() - if gesture_direction is not None: - self._dispatch_gesture(gesture_direction) - elif should_replay_right_click: - self._dispatch_click_async(button_label) - else: - if event.value == 1: - self._start_gesture_capture( - source_device=dev, - button_label=button_label, - ) - elif event.value == 0: - self._finish_gesture_capture(button_label) - continue - - if event.value == 1: - if button_label == "right": - self._begin_button_suppress( - source_device=dev, - button_label=button_label, - ) - _LOG.debug( - "Mouse click detected: label=%s code=%s", - button_label, - event.code, - ) - self._dispatch_click_async(button_label) - elif event.value == 0 and button_label == "right": - self._end_button_suppress(button_label=button_label) - continue - - if self._gestures_enabled and event.type == ecodes.EV_REL: - if ( - self._gesture_trigger_button == "right" - and self._right_trigger_pressed - ): - if event.code == ecodes.REL_X: - self._right_trigger_pending_dx += event.value - elif event.code == ecodes.REL_Y: - self._right_trigger_pending_dy += event.value - if self._maybe_dispatch_passthrough_right_gesture(): - continue - continue - if not self._gesture_active: - continue - if event.code == ecodes.REL_X: - self._accumulate_gesture_delta(dx=event.value, dy=0) - elif event.code == ecodes.REL_Y: - self._accumulate_gesture_delta(dx=0, dy=event.value) - finally: - self._clear_right_trigger_state() - self._release_button_grab() - self._release_gesture_grab() - for dev in devices: - dev.close() - - def _run_pynput(self, *, timeout_s: float | None = None) -> None: - try: - mouse_module = importlib.import_module("pynput.mouse") - except Exception as error: - raise RuntimeError("pynput.mouse is not available") from error - - listener_ctor = cast(_MouseListenerCtor, getattr(mouse_module, "Listener")) - - button_map = { - "x1": {"x1", "x_button1", "button8"}, - "x2": {"x2", "x_button2", "button9"}, - } - - front_candidates = button_map[self._front_button] - rear_candidates = button_map[self._rear_button] - right_candidates = {"right", "button2"} - - def on_click(x: int, y: int, button: object, pressed: bool) -> None: - btn_name = str(button).lower().split(".")[-1] - button_label: str | None = None - if btn_name in front_candidates: - button_label = "front" - elif btn_name in rear_candidates: - button_label = "rear" - elif btn_name in right_candidates: - button_label = "right" - - if button_label is None: - return - - if self._gestures_enabled and self._is_gesture_trigger_button(button_label): - if button_label == "right": - if pressed: - self._begin_right_trigger_press(initial_position=(x, y)) - else: - ( - should_replay_right_click, - gesture_direction, - ) = self._consume_right_trigger_release() - if gesture_direction is not None: - self._dispatch_gesture(gesture_direction) - elif should_replay_right_click: - self._dispatch_click_async(button_label) - else: - if pressed: - self._start_gesture_capture( - initial_position=(x, y), - button_label=button_label, - ) - else: - self._finish_gesture_capture(button_label) - return - - if pressed: - self._dispatch_click_async(button_label) - - def on_move(x: int, y: int) -> None: - if not self._gestures_enabled: - return - if self._gesture_trigger_button == "right" and self._right_trigger_pressed: - origin = self._right_trigger_origin_position - if origin is None: - self._right_trigger_origin_position = (x, y) - return - self._right_trigger_pending_dx = x - origin[0] - self._right_trigger_pending_dy = y - origin[1] - if self._maybe_dispatch_passthrough_right_gesture(): - return - return - self._accumulate_gesture_position(x, y) - - listener = listener_ctor(on_click=on_click, on_move=on_move) - _LOG.info("Mouse listener using pynput fallback backend") - listener.start() - deadline: float | None = None - if timeout_s is not None: - deadline = time.monotonic() + max(0.2, timeout_s) - try: - while not self._stop.is_set(): - self._release_stale_gesture_capture() - if deadline is not None and time.monotonic() >= deadline: - return - time.sleep(0.2) - finally: - self._clear_right_trigger_state() - listener.stop() - - def _dispatch_click(self, button_label: str) -> None: - if button_label == "front": - self._dispatch_front_press() - return - if button_label == "rear": - self._dispatch_rear_press() - return - if button_label == "right": - self._dispatch_right_click() - return - - def _dispatch_click_async(self, button_label: str) -> None: - worker = threading.Thread( - target=self._invoke_click_callback, - args=(button_label,), - daemon=True, - ) - worker.start() - - def _invoke_click_callback(self, button_label: str) -> None: - try: - self._dispatch_click(button_label) - except Exception: - _LOG.exception("Mouse click callback failed: button=%s", button_label) - - def _is_gesture_trigger_button(self, button_label: str) -> bool: - return button_label == self._gesture_trigger_button - - def _clear_right_trigger_state(self) -> None: - self._right_trigger_pressed = False - self._right_trigger_pressed_since = None - self._right_trigger_pending_dx = 0 - self._right_trigger_pending_dy = 0 - self._right_trigger_origin_position = None - self._right_trigger_passthrough = False - - def _should_passthrough_right_trigger(self) -> bool: - try: - payload = self._system_integration.active_window() - except Exception: - return False - - if payload is None or not is_browser_window_payload(payload): - return False - - xwayland = payload.get("xwayland") - return xwayland is False - - def _begin_right_trigger_press( - self, - *, - source_device: _EvdevDevice | None = None, - initial_position: tuple[int, int] | None = None, - ) -> None: - self._clear_right_trigger_state() - self._right_trigger_pressed = True - self._right_trigger_pressed_since = time.monotonic() - self._right_trigger_origin_position = initial_position - if self._should_passthrough_right_trigger(): - self._right_trigger_passthrough = True - return - - if source_device is not None: - self._begin_button_suppress( - source_device=source_device, - button_label="right", - ) - - def _maybe_dispatch_passthrough_right_gesture(self) -> bool: - if not self._right_trigger_passthrough: - return False - - direction = self._classify_gesture( - self._right_trigger_pending_dx, - self._right_trigger_pending_dy, - self._gesture_threshold_px, - ) - if direction is None: - return False - - self._dispatch_gesture(direction) - self._clear_right_trigger_state() - return True - - def _consume_right_trigger_release(self) -> tuple[bool, str | None]: - if self._right_trigger_passthrough: - self._clear_right_trigger_state() - return False, None - - now = time.monotonic() - pressed_since = self._right_trigger_pressed_since - direction = self._classify_gesture( - self._right_trigger_pending_dx, - self._right_trigger_pending_dy, - self._gesture_threshold_px, - ) - movement_px = max( - abs(self._right_trigger_pending_dx), - abs(self._right_trigger_pending_dy), - ) - suppressed_native_click = self._button_grabbed_label == "right" - - if suppressed_native_click: - self._end_button_suppress(button_label="right") - - should_replay = ( - direction is None - and suppressed_native_click - and pressed_since is not None - and now - pressed_since <= self._right_tap_timeout_s - and movement_px <= self._right_click_slop_px - ) - self._clear_right_trigger_state() - return should_replay, direction - - def _dispatch_right_click(self) -> None: - mouse_module = importlib.import_module("pynput.mouse") - controller_ctor = cast( - _MouseControllerCtor, - getattr(cast(object, mouse_module), "Controller"), - ) - button_holder = cast( - _MouseButtonHolder, - getattr(cast(object, mouse_module), "Button"), - ) - controller = controller_ctor() - controller.press(button_holder.right) - time.sleep(0.012) - controller.release(button_holder.right) - - def _start_gesture_capture( - self, - *, - initial_position: tuple[int, int] | None = None, - source_device: _EvdevDevice | None = None, - button_label: str | None = None, - ) -> None: - should_grab = False - with self._gesture_lock: - self._gesture_active = True - self._gesture_dx = 0 - self._gesture_dy = 0 - self._gesture_last_position = initial_position - self._gesture_started_since = time.monotonic() - self._gesture_trigger_label = button_label - if self._gesture_restore_cursor and button_label != "right": - self._gesture_anchor_cursor = self._read_cursor_position() - else: - self._gesture_anchor_cursor = None - should_grab = ( - self._gesture_freeze_pointer - and source_device is not None - and button_label != "right" - ) - - if should_grab and source_device is not None: - self._try_grab_device(source_device) - - def _accumulate_gesture_delta(self, *, dx: int, dy: int) -> None: - with self._gesture_lock: - if not self._gesture_active: - return - self._gesture_dx += dx - self._gesture_dy += dy - - def _accumulate_gesture_position(self, x: int, y: int) -> None: - with self._gesture_lock: - if not self._gesture_active: - return - if self._gesture_last_position is None: - self._gesture_last_position = (x, y) - return - last_x, last_y = self._gesture_last_position - self._gesture_dx += x - last_x - self._gesture_dy += y - last_y - self._gesture_last_position = (x, y) - - def _finish_gesture_capture(self, button_label: str) -> None: - with self._gesture_lock: - if not self._gesture_active: - return - dx = self._gesture_dx - dy = self._gesture_dy - self._gesture_active = False - self._gesture_dx = 0 - self._gesture_dy = 0 - self._gesture_last_position = None - self._gesture_started_since = None - self._gesture_trigger_label = None - if button_label == "right": - self._clear_right_trigger_state() - anchor_cursor = self._gesture_anchor_cursor - self._gesture_anchor_cursor = None - - self._release_gesture_grab() - - direction = self._classify_gesture(dx, dy, self._gesture_threshold_px) - _LOG.debug( - "Gesture capture finished: button=%s dx=%s dy=%s direction=%s", - button_label, - dx, - dy, - direction, - ) - if direction is None: - if button_label == "right": - return - self._dispatch_click_async(button_label) - return - self._dispatch_gesture(direction) - if anchor_cursor is not None: - self._restore_cursor_position(anchor_cursor) - - def _dispatch_gesture(self, direction: str) -> None: - callback = self._on_gesture - if callback is None: - return - callback(direction) - - def _try_grab_device(self, device: _EvdevDevice) -> None: - try: - device.grab() - except Exception: - return - - with self._gesture_lock: - self._gesture_grabbed_device = device - - def _release_gesture_grab(self) -> None: - with self._gesture_lock: - grabbed = self._gesture_grabbed_device - - if grabbed is None: - return - - try: - grabbed.ungrab() - except Exception: - _LOG.warning( - "Failed to release gesture mouse device; will retry ungrab", - exc_info=True, - ) - return - - with self._gesture_lock: - if self._gesture_grabbed_device is grabbed: - self._gesture_grabbed_device = None - - def _begin_button_suppress( - self, - *, - source_device: _EvdevDevice, - button_label: str, - ) -> None: - if ( - self._button_grabbed_device is source_device - and self._button_grabbed_label == button_label - ): - return - - self._release_button_grab() - try: - source_device.grab() - except Exception: - return - - grabbed_since = time.monotonic() - timeout_s = self._button_grab_timeout_s - if button_label == "right": - timeout_s = max(timeout_s, self._right_hold_suppress_timeout_s) - self._button_grabbed_device = source_device - self._button_grabbed_label = button_label - self._button_grabbed_since = grabbed_since - self._button_grab_deadline_monotonic = grabbed_since + timeout_s - - def _end_button_suppress(self, *, button_label: str) -> None: - if self._button_grabbed_label != button_label: - return - self._release_button_grab() - - def _release_button_grab(self) -> None: - grabbed = self._button_grabbed_device - if grabbed is None: - self._button_grabbed_label = None - self._button_grabbed_since = None - self._button_grab_deadline_monotonic = None - return - try: - grabbed.ungrab() - except Exception: - _LOG.warning( - "Failed to release suppressed mouse device; will retry ungrab", - exc_info=True, - ) - return - - self._button_grabbed_device = None - self._button_grabbed_label = None - self._button_grabbed_since = None - self._button_grab_deadline_monotonic = None - - def _release_stale_button_grab(self) -> None: - deadline = self._button_grab_deadline_monotonic - if deadline is None: - return - - now = time.monotonic() - if self._button_grabbed_label == "right" and self._right_trigger_pressed: - pressed_since = self._right_trigger_pressed_since - if ( - pressed_since is not None - and now - pressed_since < self._right_hold_suppress_timeout_s - ): - return - _LOG.warning( - "Right-button hold timeout reached (%.2fs); force-releasing mouse grab", - self._right_hold_suppress_timeout_s, - ) - self._clear_right_trigger_state() - self._release_button_grab() - return - - if now < deadline: - return - - timeout_s = self._button_grab_timeout_s - grabbed_since = self._button_grabbed_since - if grabbed_since is not None: - timeout_s = max(0.0, deadline - grabbed_since) - _LOG.warning( - "Button suppress timeout reached (%.2fs); force-releasing mouse grab", - timeout_s, - ) - self._release_button_grab() - - def _release_stale_gesture_capture(self) -> None: - stale_label: str | None = None - elapsed_s: float | None = None - with self._gesture_lock: - if not self._gesture_active: - return - started_since = self._gesture_started_since - if started_since is None: - return - elapsed_s = time.monotonic() - started_since - if elapsed_s < self._gesture_grab_timeout_s: - return - self._gesture_active = False - self._gesture_dx = 0 - self._gesture_dy = 0 - self._gesture_last_position = None - self._gesture_anchor_cursor = None - self._gesture_started_since = None - stale_label = self._gesture_trigger_label - self._gesture_trigger_label = None - if stale_label == "right": - self._clear_right_trigger_state() - - _LOG.warning( - "Gesture capture timeout reached (%.2fs); force-releasing input grabs (trigger=%s)", - elapsed_s, - stale_label, - ) - self._release_gesture_grab() - if stale_label is not None: - self._end_button_suppress(button_label=stale_label) - - def _read_cursor_position(self) -> tuple[int, int] | None: - try: - system_integration = self._system_integration - except AttributeError: - system_integration = None - - if system_integration is not None: - try: - return system_integration.cursor_position() - except Exception: - return None - - if not self._hyprland_session: - return None - try: - proc = subprocess.run( - ["hyprctl", "-j", "cursorpos"], - capture_output=True, - text=True, - check=False, - timeout=0.8, - ) - except (OSError, subprocess.TimeoutExpired): - return None - - if proc.returncode != 0: - return None - - try: - payload = cast(dict[str, object], json.loads(proc.stdout)) - except json.JSONDecodeError: - return None - - x_raw = payload.get("x") - y_raw = payload.get("y") - if not isinstance(x_raw, int | float) or not isinstance(y_raw, int | float): - return None - return int(x_raw), int(y_raw) - - def _restore_cursor_position(self, position: tuple[int, int]) -> None: - try: - system_integration = self._system_integration - except AttributeError: - system_integration = None - - if system_integration is not None: - x, y = position - try: - _ = system_integration.move_cursor(x=x, y=y) - except Exception: - return - return - - if not self._hyprland_session: - return - - x, y = position - try: - _ = subprocess.run( - ["hyprctl", "dispatch", "movecursor", str(x), str(y)], - capture_output=True, - text=True, - check=False, - timeout=0.8, - ) - except (OSError, subprocess.TimeoutExpired): - return - - @staticmethod - def _classify_gesture(dx: int, dy: int, threshold_px: int) -> str | None: - if max(abs(dx), abs(dy)) < threshold_px: - return None - if abs(dx) >= abs(dy): - return "right" if dx > 0 else "left" - return "down" if dy > 0 else "up" - - def _dispatch_front_press(self) -> None: - if self._should_fire_front(): - self._on_front_press() - - def _dispatch_rear_press(self) -> None: - if self._should_fire_rear(): - self._on_rear_press() - - def _should_fire_front(self) -> bool: - now = time.monotonic() - with self._debounce_lock: - if now - self._last_front_press_monotonic < self._debounce_s: - return False - self._last_front_press_monotonic = now - return True - - def _should_fire_rear(self) -> bool: - now = time.monotonic() - with self._debounce_lock: - if now - self._last_rear_press_monotonic < self._debounce_s: - return False - self._last_rear_press_monotonic = now - return True - - -class _EvdevEvent(Protocol): - type: int - value: int - code: int - - -class _EvdevDevice(Protocol): - fd: int - - def read(self) -> list[_EvdevEvent]: ... - - def capabilities(self) -> dict[int, list[int]]: ... - - def grab(self) -> None: ... - - def ungrab(self) -> None: ... - - def close(self) -> None: ... - - -class _InputDeviceCtor(Protocol): - def __call__(self, path: str) -> _EvdevDevice: ... - - -class _ListDevicesFn(Protocol): - def __call__(self) -> list[str]: ... - - -class _Ecodes(Protocol): - BTN_SIDE: int - BTN_EXTRA: int - BTN_LEFT: int - BTN_RIGHT: int - EV_KEY: int - EV_REL: int - REL_X: int - REL_Y: int - - -class _MouseListener(Protocol): - def start(self) -> None: ... - - def stop(self) -> None: ... - - -class _MouseListenerCtor(Protocol): - def __call__( - self, - *, - on_click: Callable[[int, int, object, bool], None], - on_move: Callable[[int, int], None] | None = None, - ) -> _MouseListener: ... - - -class _MouseController(Protocol): - def press(self, button: object) -> None: ... - - def release(self, button: object) -> None: ... - - -class _MouseControllerCtor(Protocol): - def __call__(self) -> _MouseController: ... - - -class _MouseButtonHolder(Protocol): - right: object +_sys.modules[__name__] = _import_module("vibemouse.listener.mouse_listener") diff --git a/vibemouse/ops/__init__.py b/vibemouse/ops/__init__.py new file mode 100644 index 0000000..a9a2c5b --- /dev/null +++ b/vibemouse/ops/__init__.py @@ -0,0 +1 @@ +__all__ = [] diff --git a/vibemouse/ops/deploy.py b/vibemouse/ops/deploy.py new file mode 100644 index 0000000..ebe2418 --- /dev/null +++ b/vibemouse/ops/deploy.py @@ -0,0 +1,268 @@ +from __future__ import annotations + +import argparse +import shlex +import shutil +import subprocess +import sys +from pathlib import Path +from typing import cast + +from vibemouse.ops.doctor import run_doctor + + +_PRESET_OVERRIDES: dict[str, dict[str, str]] = { + "stable": { + "VIBEMOUSE_AUTO_PASTE": "true", + "VIBEMOUSE_BUTTON_DEBOUNCE_MS": "220", + "VIBEMOUSE_PREWARM_ON_START": "true", + "VIBEMOUSE_OPENCLAW_RETRIES": "1", + }, + "fast": { + "VIBEMOUSE_AUTO_PASTE": "true", + "VIBEMOUSE_BUTTON_DEBOUNCE_MS": "120", + "VIBEMOUSE_PREWARM_ON_START": "true", + "VIBEMOUSE_OPENCLAW_RETRIES": "2", + }, + "low-resource": { + "VIBEMOUSE_AUTO_PASTE": "false", + "VIBEMOUSE_BUTTON_DEBOUNCE_MS": "250", + "VIBEMOUSE_PREWARM_ON_START": "false", + "VIBEMOUSE_OPENCLAW_RETRIES": "0", + }, +} + + +def configure_deploy_parser(parser: argparse.ArgumentParser) -> None: + _ = parser.add_argument( + "--preset", + choices=sorted(_PRESET_OVERRIDES.keys()), + default="stable", + help="deployment preset profile", + ) + _ = parser.add_argument( + "--env-file", + default=str(Path.home() / ".config" / "vibemouse" / "deploy.env"), + help="path to generated EnvironmentFile", + ) + _ = parser.add_argument( + "--service-file", + default=str(Path.home() / ".config" / "systemd" / "user" / "vibemouse.service"), + help="path to generated systemd user service file", + ) + _ = parser.add_argument( + "--log-file", + default=str(Path.home() / ".local" / "state" / "vibemouse" / "service.log"), + help="path to persistent service log file", + ) + _ = parser.add_argument( + "--openclaw-command", + default=shutil.which("openclaw") or "openclaw", + help="OpenClaw command prefix", + ) + _ = parser.add_argument( + "--openclaw-agent", + default="main", + help="OpenClaw agent id used for rear-button routing", + ) + _ = parser.add_argument( + "--openclaw-retries", + type=int, + default=None, + help="override retries for OpenClaw spawn failures", + ) + _ = parser.add_argument( + "--exec-start", + default=None, + help="override ExecStart command", + ) + _ = parser.add_argument( + "--skip-systemctl", + action="store_true", + help="skip systemctl enable/restart operations", + ) + _ = parser.add_argument( + "--dry-run", + action="store_true", + help="print plan without writing files", + ) + + +def run_deploy(args: argparse.Namespace) -> int: + preset = str(getattr(args, "preset", "stable")) + if preset not in _PRESET_OVERRIDES: + print(f"Unknown preset: {preset}") + return 1 + + openclaw_command = str(getattr(args, "openclaw_command", "openclaw")).strip() + if not openclaw_command: + print("--openclaw-command must not be empty") + return 1 + + openclaw_agent = str(getattr(args, "openclaw_agent", "main")).strip() or "main" + + retries_override = cast(int | None, getattr(args, "openclaw_retries", None)) + + if retries_override is not None and retries_override < 0: + print("--openclaw-retries must be non-negative") + return 1 + + env_path = Path(str(getattr(args, "env_file", ""))).expanduser() + service_path = Path(str(getattr(args, "service_file", ""))).expanduser() + log_path = Path(str(getattr(args, "log_file", ""))).expanduser() + exec_start = _resolve_exec_start(str(getattr(args, "exec_start", "") or "")) + + env_map = build_deploy_env( + preset=preset, + openclaw_command=openclaw_command, + openclaw_agent=openclaw_agent, + openclaw_retries=retries_override, + ) + env_content = render_env_file(env_map) + service_content = render_service_file( + env_file=env_path, + log_file=log_path, + exec_start=exec_start, + ) + + dry_run = bool(getattr(args, "dry_run", False)) + if dry_run: + print(f"[DRY-RUN] would write {env_path}") + print(f"[DRY-RUN] would write {service_path}") + print(f"[DRY-RUN] preset={preset}") + print(f"[DRY-RUN] exec_start={exec_start}") + return 0 + + _write_text(env_path, env_content) + _write_text(service_path, service_content) + print(f"Wrote {env_path}") + print(f"Wrote {service_path}") + + if not bool(getattr(args, "skip_systemctl", False)): + service_name = service_path.name + if not _run_systemctl(["daemon-reload"]): + return 1 + if not _run_systemctl(["enable", "--now", service_name]): + return 1 + if not _run_systemctl(["is-active", service_name]): + return 1 + + print("Running doctor checks...") + return run_doctor() + + +def build_deploy_env( + *, + preset: str, + openclaw_command: str, + openclaw_agent: str, + openclaw_retries: int | None, +) -> dict[str, str]: + base = { + "VIBEMOUSE_BACKEND": "funasr_onnx", + "VIBEMOUSE_DEVICE": "cpu", + "VIBEMOUSE_FALLBACK_CPU": "true", + "VIBEMOUSE_ENTER_MODE": "enter", + "VIBEMOUSE_OPENCLAW_COMMAND": openclaw_command, + "VIBEMOUSE_OPENCLAW_AGENT": openclaw_agent, + "VIBEMOUSE_OPENCLAW_TIMEOUT_S": "20.0", + "VIBEMOUSE_STATUS_FILE": "%t/vibemouse-status.json", + } + base.update(_PRESET_OVERRIDES[preset]) + if openclaw_retries is not None: + base["VIBEMOUSE_OPENCLAW_RETRIES"] = str(openclaw_retries) + return base + + +def render_env_file(env_map: dict[str, str]) -> str: + lines = [ + "# Generated by `vibemouse deploy`.", + "# Edit values if needed, then: systemctl --user restart vibemouse.service", + ] + for key in sorted(env_map.keys()): + lines.append(f"{key}={_quote_env_value(env_map[key])}") + lines.append("") + return "\n".join(lines) + + +def render_service_file(*, env_file: Path, log_file: Path, exec_start: str) -> str: + env_file_str = env_file.as_posix() + log_file_str = log_file.as_posix() + log_dir = log_file.parent.as_posix() + lines = [ + "[Unit]", + "Description=VibeMouse voice input service", + "After=graphical-session.target", + "PartOf=graphical-session.target", + "", + "[Service]", + "Type=simple", + f"EnvironmentFile={env_file_str}", + f"ExecStartPre=/usr/bin/mkdir -p {log_dir}", + f"ExecStart={exec_start}", + f"StandardOutput=append:{log_file_str}", + f"StandardError=append:{log_file_str}", + "Restart=on-failure", + "RestartSec=2", + "", + "[Install]", + "WantedBy=default.target", + "", + ] + return "\n".join(lines) + + +def _quote_env_value(value: str) -> str: + escaped = value.replace("\\", "\\\\").replace('"', '\\"') + return f'"{escaped}"' + + +def _resolve_exec_start(raw_exec_start: str) -> str: + cleaned = raw_exec_start.strip() + if cleaned: + return cleaned + + vibemouse_bin = shutil.which("vibemouse") + if vibemouse_bin: + return f"{vibemouse_bin} run" + + python_bin = sys.executable + return f"{python_bin} -m vibemouse.main run" + + +def _write_text(path: Path, content: str) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + _ = path.write_text(content, encoding="utf-8") + + +def _run_systemctl(args: list[str]) -> bool: + cmd = ["systemctl", "--user", *args] + try: + proc = subprocess.run( + cmd, + capture_output=True, + text=True, + check=False, + timeout=12.0, + ) + except (OSError, subprocess.TimeoutExpired) as error: + print(f"Failed to run {' '.join(cmd)}: {error}") + return False + + if proc.returncode == 0: + return True + + stderr = proc.stderr.strip() + if stderr: + print(f"systemctl {' '.join(args)} failed: {stderr}") + else: + print(f"systemctl {' '.join(args)} failed with code {proc.returncode}") + return False + + +def validate_openclaw_command(raw: str) -> bool: + try: + parts = shlex.split(raw) + except ValueError: + return False + return bool(parts) diff --git a/vibemouse/ops/doctor.py b/vibemouse/ops/doctor.py new file mode 100644 index 0000000..3cdbac1 --- /dev/null +++ b/vibemouse/ops/doctor.py @@ -0,0 +1,610 @@ +from __future__ import annotations + +import importlib +import json +import shlex +import shutil +import subprocess +import sys +from collections.abc import Iterable, Mapping +from dataclasses import dataclass +from pathlib import Path + +from vibemouse.config import AppConfig, load_config + + +@dataclass(frozen=True) +class DoctorCheck: + name: str + status: str + detail: str + + +def run_doctor(*, apply_fixes: bool = False) -> int: + if apply_fixes: + _apply_doctor_fixes() + + checks: list[DoctorCheck] = [] + + config_check, config = _check_config_load() + checks.append(config_check) + + if config is not None: + checks.extend(_check_openclaw(config)) + + checks.append(_check_audio_input(config)) + checks.append(_check_input_device_permissions(config)) + + checks.append(_check_hyprland_return_bind_conflict(config)) + checks.append(_check_user_service_state()) + + _print_checks(checks) + + fail_count = sum(1 for check in checks if check.status == "fail") + warn_count = sum(1 for check in checks if check.status == "warn") + print(f"Doctor summary: {len(checks)} checks, {fail_count} fail, {warn_count} warn") + return 1 if fail_count else 0 + + +def _apply_doctor_fixes() -> None: + _fix_hyprland_return_bind_conflict() + _ensure_user_service_active() + + +def _fix_hyprland_return_bind_conflict() -> None: + bind_path = Path.home() / ".config/hypr/UserConfigs/UserKeybinds.conf" + if not bind_path.exists(): + return + + try: + lines = bind_path.read_text(encoding="utf-8", errors="ignore").splitlines() + except OSError: + return + + changed = False + rewritten: list[str] = [] + for line in lines: + stripped = line.strip() + if ( + stripped.startswith("#") + or "sendshortcut" not in stripped + or "Return" not in stripped + ): + rewritten.append(line) + continue + + if "mouse:275" in stripped or "mouse:276" in stripped: + rewritten.append(f"# {line} # auto-disabled by vibemouse doctor --fix") + changed = True + continue + + rewritten.append(line) + + if not changed: + return + + try: + bind_path.write_text("\n".join(rewritten) + "\n", encoding="utf-8") + except OSError: + return + + _ = _run_subprocess( + ["hyprctl", "reload", "config-only"], + timeout=3.0, + ) + + +def _ensure_user_service_active() -> None: + probe = _run_subprocess( + ["systemctl", "--user", "is-active", "vibemouse.service"], + timeout=3.0, + ) + if probe is None: + return + if probe.returncode == 0 and probe.stdout.strip() == "active": + return + + _ = _run_subprocess( + ["systemctl", "--user", "restart", "vibemouse.service"], + timeout=8.0, + ) + + +def _check_config_load() -> tuple[DoctorCheck, AppConfig | None]: + try: + config = load_config() + except Exception as error: + return ( + DoctorCheck( + name="config", + status="fail", + detail=f"failed to load config: {error}", + ), + None, + ) + + return ( + DoctorCheck( + name="config", + status="ok", + detail=( + "loaded " + + f"front={config.front_button}, rear={config.rear_button}, " + + f"openclaw_agent={config.openclaw_agent or 'none'}" + ), + ), + config, + ) + + +def _check_openclaw(config: AppConfig) -> list[DoctorCheck]: + checks: list[DoctorCheck] = [] + + command_parts = _parse_openclaw_command(config.openclaw_command) + if command_parts is None: + checks.append( + DoctorCheck( + name="openclaw-command", + status="fail", + detail="invalid VIBEMOUSE_OPENCLAW_COMMAND shell syntax", + ) + ) + return checks + + executable = command_parts[0] + resolved = shutil.which(executable) + if resolved is None: + checks.append( + DoctorCheck( + name="openclaw-command", + status="fail", + detail=f"executable not found in PATH: {executable}", + ) + ) + return checks + + checks.append( + DoctorCheck( + name="openclaw-command", + status="ok", + detail=f"resolved executable: {resolved}", + ) + ) + + configured_agent = config.openclaw_agent + if not configured_agent: + checks.append( + DoctorCheck( + name="openclaw-agent", + status="warn", + detail="no agent configured; set VIBEMOUSE_OPENCLAW_AGENT", + ) + ) + return checks + + probe_cmd = [*command_parts, "agents", "list", "--json"] + try: + probe = subprocess.run( + probe_cmd, + capture_output=True, + text=True, + check=False, + timeout=8.0, + ) + except subprocess.TimeoutExpired: + checks.append( + DoctorCheck( + name="openclaw-agent", + status="warn", + detail="timed out while probing available agents", + ) + ) + return checks + except OSError as error: + checks.append( + DoctorCheck( + name="openclaw-agent", + status="warn", + detail=f"failed to run agent probe: {error}", + ) + ) + return checks + + if probe.returncode != 0: + stderr = probe.stderr.strip() + checks.append( + DoctorCheck( + name="openclaw-agent", + status="warn", + detail=( + "agent probe failed" + if not stderr + else f"agent probe failed: {stderr}" + ), + ) + ) + return checks + + try: + payload = json.loads(probe.stdout) + except json.JSONDecodeError: + checks.append( + DoctorCheck( + name="openclaw-agent", + status="warn", + detail="agent probe returned invalid JSON", + ) + ) + return checks + + if not isinstance(payload, list): + checks.append( + DoctorCheck( + name="openclaw-agent", + status="warn", + detail="agent probe returned unexpected payload shape", + ) + ) + return checks + + available_agents = { + str(entry.get("id", "")).strip() for entry in payload if isinstance(entry, dict) + } + if configured_agent in available_agents: + checks.append( + DoctorCheck( + name="openclaw-agent", + status="ok", + detail=f"configured agent exists: {configured_agent}", + ) + ) + else: + sample = ", ".join(sorted(agent for agent in available_agents if agent)[:5]) + checks.append( + DoctorCheck( + name="openclaw-agent", + status="warn", + detail=( + f"configured agent not found: {configured_agent}; " + + (f"available: {sample}" if sample else "no agents listed") + ), + ) + ) + + return checks + + +def _check_audio_input(config: AppConfig | None) -> DoctorCheck: + try: + sounddevice = importlib.import_module("sounddevice") + except Exception as error: + return DoctorCheck( + name="audio-input", + status="fail", + detail=f"cannot import sounddevice: {error}", + ) + + query_devices = getattr(sounddevice, "query_devices", None) + if not callable(query_devices): + return DoctorCheck( + name="audio-input", + status="fail", + detail="sounddevice.query_devices is unavailable", + ) + + try: + devices_obj = query_devices() + except Exception as error: + return DoctorCheck( + name="audio-input", + status="fail", + detail=f"failed to query audio devices: {error}", + ) + + device_entries = _coerce_device_entries(devices_obj) + if device_entries is None: + return DoctorCheck( + name="audio-input", + status="warn", + detail="unexpected audio device payload shape", + ) + + input_devices: list[Mapping[str, object]] = [] + for item in device_entries: + max_inputs = _to_float(item.get("max_input_channels", 0.0)) + if max_inputs > 0: + input_devices.append(item) + if not input_devices: + return DoctorCheck( + name="audio-input", + status="fail", + detail="no input-capable microphone device detected", + ) + + default_index = _read_default_input_device_index(sounddevice) + check_input_settings = getattr(sounddevice, "check_input_settings", None) + if default_index is not None and callable(check_input_settings): + sample_rate = float(config.sample_rate) if config is not None else 16000.0 + channels = config.channels if config is not None else 1 + try: + _ = check_input_settings( + device=default_index, + channels=max(1, int(channels)), + samplerate=sample_rate, + ) + except Exception as error: + return DoctorCheck( + name="audio-input", + status="warn", + detail=f"default input exists but validation failed: {error}", + ) + + return DoctorCheck( + name="audio-input", + status="ok", + detail=f"detected {len(input_devices)} input-capable device(s)", + ) + + +def _check_input_device_permissions(config: AppConfig | None) -> DoctorCheck: + if not sys.platform.startswith("linux"): + return DoctorCheck( + name="input-device-permissions", + status="warn", + detail="raw input permission check is only available on Linux", + ) + + try: + evdev_module = importlib.import_module("evdev") + except Exception as error: + return DoctorCheck( + name="input-device-permissions", + status="warn", + detail=f"cannot import evdev for raw input check: {error}", + ) + + list_devices = getattr(evdev_module, "list_devices", None) + input_device_ctor = getattr(evdev_module, "InputDevice", None) + ecodes = getattr(evdev_module, "ecodes", None) + if not callable(list_devices) or input_device_ctor is None or ecodes is None: + return DoctorCheck( + name="input-device-permissions", + status="warn", + detail="evdev module is missing required APIs", + ) + + try: + device_paths_obj = list_devices() + except Exception as error: + return DoctorCheck( + name="input-device-permissions", + status="warn", + detail=f"failed to list /dev/input devices: {error}", + ) + + if not isinstance(device_paths_obj, list): + return DoctorCheck( + name="input-device-permissions", + status="warn", + detail="unexpected device-path payload from evdev", + ) + + device_paths = [str(path) for path in device_paths_obj] + if not device_paths: + return DoctorCheck( + name="input-device-permissions", + status="warn", + detail="no /dev/input/event* devices were found", + ) + + ev_key = int(getattr(ecodes, "EV_KEY", 1)) + btn_side = int(getattr(ecodes, "BTN_SIDE", 0x116)) + btn_extra = int(getattr(ecodes, "BTN_EXTRA", 0x117)) + side_button_codes = {btn_side, btn_extra} + + accessible = 0 + side_capable = 0 + permission_denied = 0 + + for path in device_paths: + try: + device = input_device_ctor(path) + except PermissionError: + permission_denied += 1 + continue + except Exception: + continue + + try: + capabilities_obj = device.capabilities() + accessible += 1 + if isinstance(capabilities_obj, dict): + keys_obj = capabilities_obj.get(ev_key, []) + keys = {int(code) for code in keys_obj if isinstance(code, int)} + if side_button_codes & keys: + side_capable += 1 + finally: + try: + device.close() + except Exception: + pass + + if accessible == 0 and permission_denied > 0: + return DoctorCheck( + name="input-device-permissions", + status="fail", + detail=( + "cannot access /dev/input event devices (permission denied); " + + "add user to input group or configure udev rules" + ), + ) + + if accessible == 0: + return DoctorCheck( + name="input-device-permissions", + status="warn", + detail="no readable /dev/input event devices were found", + ) + + rear_button = config.rear_button if config is not None else "x2" + if side_capable == 0: + return DoctorCheck( + name="input-device-permissions", + status="warn", + detail=( + f"{accessible} input device(s) readable but none expose side-button codes " + + f"for rear={rear_button}" + ), + ) + + return DoctorCheck( + name="input-device-permissions", + status="ok", + detail=( + f"{accessible} readable input device(s), " + + f"{side_capable} with side-button capability" + ), + ) + + +def _read_default_input_device_index(sounddevice: object) -> int | None: + default_obj = getattr(sounddevice, "default", None) + if default_obj is None: + return None + + device_attr = getattr(default_obj, "device", None) + if not isinstance(device_attr, tuple | list) or len(device_attr) < 1: + return None + + raw_input_index = device_attr[0] + if not isinstance(raw_input_index, int): + return None + if raw_input_index < 0: + return None + return raw_input_index + + +def _coerce_device_entries(devices_obj: object) -> list[Mapping[str, object]] | None: + if isinstance(devices_obj, list): + return [entry for entry in devices_obj if isinstance(entry, Mapping)] + + if isinstance(devices_obj, Iterable): + entries: list[Mapping[str, object]] = [] + for entry in devices_obj: + if isinstance(entry, Mapping): + entries.append(entry) + return entries + + return None + + +def _to_float(value: object) -> float: + if isinstance(value, int | float): + return float(value) + if isinstance(value, str): + try: + return float(value.strip()) + except ValueError: + return 0.0 + return 0.0 + + +def _check_hyprland_return_bind_conflict(config: AppConfig | None) -> DoctorCheck: + bind_path = Path.home() / ".config/hypr/UserConfigs/UserKeybinds.conf" + if not bind_path.exists(): + return DoctorCheck( + name="hyprland-bind-conflict", + status="warn", + detail=f"file not found: {bind_path}", + ) + + rear_button = config.rear_button if config is not None else "x2" + rear_mouse_code = "mouse:275" if rear_button == "x1" else "mouse:276" + + lines = bind_path.read_text(encoding="utf-8", errors="ignore").splitlines() + for idx, raw_line in enumerate(lines, start=1): + line = raw_line.strip() + if not line or line.startswith("#"): + continue + if rear_mouse_code in line and "sendshortcut" in line and "Return" in line: + return DoctorCheck( + name="hyprland-bind-conflict", + status="fail", + detail=( + f"conflicting return bind found at {bind_path}:{idx}; " + + "disable it to let VibeMouse control rear-button behavior" + ), + ) + + return DoctorCheck( + name="hyprland-bind-conflict", + status="ok", + detail=f"no conflicting {rear_mouse_code} return bind found", + ) + + +def _check_user_service_state() -> DoctorCheck: + probe = _run_subprocess( + ["systemctl", "--user", "is-active", "vibemouse.service"], + timeout=3.0, + ) + if probe is None: + return DoctorCheck( + name="user-service", + status="warn", + detail="could not query service state", + ) + + state = probe.stdout.strip() or "unknown" + if state == "active": + return DoctorCheck( + name="user-service", + status="ok", + detail="vibemouse.service is active", + ) + + return DoctorCheck( + name="user-service", + status="warn", + detail=f"vibemouse.service state is {state}", + ) + + +def _run_subprocess( + cmd: list[str], + *, + timeout: float, +) -> subprocess.CompletedProcess[str] | None: + try: + return subprocess.run( + cmd, + capture_output=True, + text=True, + check=False, + timeout=timeout, + ) + except (OSError, subprocess.TimeoutExpired): + return None + + +def _parse_openclaw_command(raw: str) -> list[str] | None: + cleaned = raw.strip() + if not cleaned: + return None + try: + parts = shlex.split(cleaned) + except ValueError: + return None + if not parts: + return None + return parts + + +def _print_checks(checks: list[DoctorCheck]) -> None: + for check in checks: + badge = { + "ok": "[OK]", + "warn": "[WARN]", + "fail": "[FAIL]", + }.get(check.status, "[INFO]") + print(f"{badge} {check.name}: {check.detail}") diff --git a/vibemouse/output.py b/vibemouse/output.py index e6c1cda..b542ef2 100644 --- a/vibemouse/output.py +++ b/vibemouse/output.py @@ -1,456 +1,4 @@ -from __future__ import annotations +from importlib import import_module as _import_module +import sys as _sys -import importlib -import json -import shlex -import subprocess -import time -from dataclasses import dataclass -from typing import Protocol, cast - -import pyperclip - -from vibemouse.system_integration import ( - SystemIntegration, - create_system_integration, - is_terminal_window_payload, - load_atspi_module, - probe_text_input_focus_via_atspi, - probe_send_enter_via_atspi, -) - - -class TextOutput: - def __init__( - self, - *, - system_integration: SystemIntegration | None = None, - openclaw_command: str = "openclaw", - openclaw_agent: str | None = None, - openclaw_timeout_s: float = 20.0, - openclaw_retries: int = 0, - ) -> None: - try: - keyboard_module = importlib.import_module("pynput.keyboard") - except Exception as error: - raise RuntimeError( - f"Failed to load keyboard control dependencies: {error}" - ) from error - - controller_ctor = cast( - _ControllerCtor, - getattr(cast(object, keyboard_module), "Controller"), - ) - key_holder = cast( - _KeyHolder, - getattr(cast(object, keyboard_module), "Key"), - ) - self._kb: _KeyboardController = controller_ctor() - self._enter_key: object = key_holder.enter - self._ctrl_key: object = key_holder.ctrl - self._shift_key: object = key_holder.shift - self._insert_key: object = key_holder.insert - self._atspi: object | None = load_atspi_module() - self._system_integration: SystemIntegration = ( - system_integration - if system_integration is not None - else create_system_integration() - ) - self._hyprland_session: bool = self._system_integration.is_hyprland - self._openclaw_command: str = openclaw_command - self._openclaw_agent: str | None = openclaw_agent - self._openclaw_timeout_s: float = max(0.5, openclaw_timeout_s) - self._openclaw_retries: int = max(0, int(openclaw_retries)) - - def send_enter(self, *, mode: str = "enter") -> None: - normalized = mode.strip().lower() - if normalized == "none": - return - if normalized == "enter": - if self._send_hyprland_shortcut(mod="", key="Return"): - return - if self._send_enter_via_atspi(): - return - self._tap_key(self._enter_key) - return - if normalized == "ctrl_enter": - self._tap_modified_key(self._ctrl_key, self._enter_key) - return - if normalized == "shift_enter": - self._tap_modified_key(self._shift_key, self._enter_key) - return - raise ValueError(f"Unsupported enter mode: {mode!r}") - - def inject_or_clipboard(self, text: str, *, auto_paste: bool = False) -> str: - normalized = text.strip() - if not normalized: - return "empty" - - if self._is_text_input_focused(): - self._kb.type(normalized) - return "typed" - - pyperclip.copy(normalized) - if auto_paste: - try: - self._paste_clipboard() - return "pasted" - except Exception: - return "clipboard" - return "clipboard" - - def send_to_openclaw(self, text: str) -> str: - return self.send_to_openclaw_result(text).route - - def send_to_openclaw_result(self, text: str) -> "OpenClawDispatchResult": - normalized = text.strip() - if not normalized: - return OpenClawDispatchResult(route="empty", reason="empty_text") - - command = self._build_openclaw_command(normalized) - if command is None: - pyperclip.copy(normalized) - return OpenClawDispatchResult(route="clipboard", reason="invalid_command") - - attempts = max(1, int(getattr(self, "_openclaw_retries", 0)) + 1) - last_reason = "spawn_error" - for attempt in range(attempts): - try: - _ = subprocess.Popen( - command, - stdin=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - start_new_session=True, - ) - if attempt == 0: - return OpenClawDispatchResult( - route="openclaw", - reason="dispatched", - ) - return OpenClawDispatchResult( - route="openclaw", - reason=f"dispatched_after_retry_{attempt}", - ) - except OSError as error: - last_reason = f"spawn_error:{error.__class__.__name__}" - - pyperclip.copy(normalized) - return OpenClawDispatchResult(route="clipboard", reason=last_reason) - - def _build_openclaw_command(self, message: str) -> list[str] | None: - raw_command = str(getattr(self, "_openclaw_command", "openclaw")).strip() - if not raw_command: - return None - - try: - parts = shlex.split(raw_command) - except ValueError: - return None - - if not parts: - return None - - command = [*parts, "agent", "--message", message, "--json"] - agent = getattr(self, "_openclaw_agent", None) - if isinstance(agent, str): - normalized_agent = agent.strip() - if normalized_agent: - command.extend(["--agent", normalized_agent]) - return command - - def _paste_clipboard(self) -> None: - terminal_active = self._is_hyprland_terminal_active() - for mod, key in self._paste_shortcuts(terminal_active=terminal_active): - if self._send_platform_shortcut(mod=mod, key=key): - return - - if ( - self._hyprland_session - and terminal_active - and self._send_ctrl_shift_v_via_keyboard() - ): - return - - if ( - self._hyprland_session - and terminal_active - and self._send_shift_insert_via_keyboard() - ): - return - - self._send_ctrl_v_via_keyboard() - - def _send_ctrl_v_via_keyboard(self) -> None: - pressed_ctrl = False - pressed_v = False - try: - self._kb.press(self._ctrl_key) - pressed_ctrl = True - self._kb.press("v") - pressed_v = True - finally: - if pressed_v: - try: - self._kb.release("v") - except Exception: - pass - if pressed_ctrl: - try: - self._kb.release(self._ctrl_key) - except Exception: - pass - - def _send_ctrl_shift_v_via_keyboard(self) -> bool: - pressed_ctrl = False - pressed_shift = False - pressed_v = False - try: - self._kb.press(self._ctrl_key) - pressed_ctrl = True - self._kb.press(self._shift_key) - pressed_shift = True - self._kb.press("v") - pressed_v = True - return True - except Exception: - return False - finally: - if pressed_v: - try: - self._kb.release("v") - except Exception: - pass - if pressed_shift: - try: - self._kb.release(self._shift_key) - except Exception: - pass - if pressed_ctrl: - try: - self._kb.release(self._ctrl_key) - except Exception: - pass - - def _send_shift_insert_via_keyboard(self) -> bool: - pressed_shift = False - pressed_insert = False - try: - self._kb.press(self._shift_key) - pressed_shift = True - self._kb.press(self._insert_key) - pressed_insert = True - return True - except Exception: - return False - finally: - if pressed_insert: - try: - self._kb.release(self._insert_key) - except Exception: - pass - if pressed_shift: - try: - self._kb.release(self._shift_key) - except Exception: - pass - - def _tap_key(self, key: object) -> None: - self._kb.press(key) - time.sleep(0.012) - self._kb.release(key) - - def _tap_modified_key(self, modifier: object, key: object) -> None: - pressed_modifier = False - pressed_key = False - try: - self._kb.press(modifier) - pressed_modifier = True - self._kb.press(key) - pressed_key = True - time.sleep(0.012) - finally: - if pressed_key: - try: - self._kb.release(key) - except Exception: - pass - if pressed_modifier: - try: - self._kb.release(modifier) - except Exception: - pass - - def _send_enter_via_atspi(self) -> bool: - try: - system_integration = self._system_integration - except AttributeError: - system_integration = None - - if system_integration is not None: - try: - handled = system_integration.send_enter_via_accessibility() - except Exception: - handled = None - if handled is True: - return True - - atspi_module = getattr(self, "_atspi", None) - return probe_send_enter_via_atspi( - atspi_module=atspi_module, - lazy_load=False, - ) - - def _paste_shortcuts(self, *, terminal_active: bool) -> tuple[tuple[str, str], ...]: - try: - system_integration = self._system_integration - except AttributeError: - system_integration = None - - if system_integration is not None: - try: - shortcuts = system_integration.paste_shortcuts( - terminal_active=terminal_active - ) - except Exception: - shortcuts = () - if shortcuts: - return shortcuts - - if terminal_active: - return ( - ("CTRL SHIFT", "V"), - ("SHIFT", "Insert"), - ("CTRL", "V"), - ) - return (("CTRL", "V"),) - - def _send_platform_shortcut(self, *, mod: str, key: str) -> bool: - try: - system_integration = self._system_integration - except AttributeError: - system_integration = None - - if system_integration is not None: - try: - if bool(system_integration.send_shortcut(mod=mod, key=key)): - return True - if not self._hyprland_session: - return False - except Exception: - if not self._hyprland_session: - return False - - if not self._hyprland_session: - return False - - mod_part = mod.strip().upper() - if mod_part: - arg = f"{mod_part}, {key}, activewindow" - else: - arg = f", {key}, activewindow" - - try: - proc = subprocess.run( - ["hyprctl", "dispatch", "sendshortcut", arg], - capture_output=True, - text=True, - check=False, - timeout=1.0, - ) - except (OSError, subprocess.TimeoutExpired): - return False - - return proc.returncode == 0 and proc.stdout.strip() == "ok" - - def _send_hyprland_shortcut(self, *, mod: str, key: str) -> bool: - return self._send_platform_shortcut(mod=mod, key=key) - - def _is_terminal_window_active(self) -> bool: - payload_map: dict[str, object] | None = None - try: - system_integration = self._system_integration - except AttributeError: - system_integration = None - - if system_integration is not None: - try: - terminal_active = system_integration.is_terminal_window_active() - except Exception: - terminal_active = None - if isinstance(terminal_active, bool): - return terminal_active - - if not self._hyprland_session: - return False - - if payload_map is None: - try: - proc = subprocess.run( - ["hyprctl", "-j", "activewindow"], - capture_output=True, - text=True, - check=False, - timeout=1.0, - ) - except (OSError, subprocess.TimeoutExpired): - return False - - if proc.returncode != 0: - return False - - try: - payload_obj = cast(object, json.loads(proc.stdout)) - except json.JSONDecodeError: - return False - - if not isinstance(payload_obj, dict): - return False - - payload_map = cast(dict[str, object], payload_obj) - - return is_terminal_window_payload(payload_map) - - def _is_hyprland_terminal_active(self) -> bool: - return self._is_terminal_window_active() - - def _is_text_input_focused(self) -> bool: - try: - system_integration = self._system_integration - except AttributeError: - system_integration = None - - if system_integration is not None: - try: - focused = system_integration.is_text_input_focused() - except Exception: - focused = None - if isinstance(focused, bool): - return focused - - return probe_text_input_focus_via_atspi() - - -class _KeyboardController(Protocol): - def press(self, key: object) -> None: ... - - def release(self, key: object) -> None: ... - - def type(self, text: str) -> None: ... - - -class _ControllerCtor(Protocol): - def __call__(self) -> _KeyboardController: ... - - -class _KeyHolder(Protocol): - enter: object - ctrl: object - shift: object - insert: object - - -@dataclass(frozen=True) -class OpenClawDispatchResult: - route: str - reason: str +_sys.modules[__name__] = _import_module("vibemouse.core.output") diff --git a/vibemouse/platform/__init__.py b/vibemouse/platform/__init__.py new file mode 100644 index 0000000..a9a2c5b --- /dev/null +++ b/vibemouse/platform/__init__.py @@ -0,0 +1 @@ +__all__ = [] diff --git a/vibemouse/platform/system_integration.py b/vibemouse/platform/system_integration.py new file mode 100644 index 0000000..9d9f43c --- /dev/null +++ b/vibemouse/platform/system_integration.py @@ -0,0 +1,347 @@ +from __future__ import annotations + +import importlib +import json +import os +import subprocess +import sys +from collections.abc import Mapping +from typing import Protocol, cast + + +_TERMINAL_CLASS_HINTS: set[str] = { + "foot", + "kitty", + "alacritty", + "wezterm", + "ghostty", + "gnome-terminal", + "gnome-terminal-server", + "konsole", + "tilix", + "xterm", + "terminator", + "xfce4-terminal", + "urxvt", + "st", + "tabby", + "hyper", + "warp", + "windowsterminal", + "wt", +} + +_TERMINAL_TITLE_HINTS: set[str] = { + "terminal", + "tmux", + "bash", + "zsh", + "fish", + "powershell", + "cmd.exe", +} + +_BROWSER_CLASS_HINTS: set[str] = { + "firefox", + "zen", + "librewolf", + "waterfox", + "floorp", + "chromium", + "google-chrome", + "chrome", + "brave-browser", + "microsoft-edge", + "vivaldi", + "opera", + "thorium", + "browser", +} + + +def is_terminal_window_payload(payload: Mapping[str, object]) -> bool: + window_class = str(payload.get("class", "")).lower() + initial_class = str(payload.get("initialClass", "")).lower() + title = str(payload.get("title", "")).lower() + + if any( + hint in window_class or hint in initial_class for hint in _TERMINAL_CLASS_HINTS + ): + return True + + return any(hint in title for hint in _TERMINAL_TITLE_HINTS) + + +def is_browser_window_payload(payload: Mapping[str, object]) -> bool: + window_class = str(payload.get("class", "")).lower() + initial_class = str(payload.get("initialClass", "")).lower() + title = str(payload.get("title", "")).lower() + + if any( + hint in window_class or hint in initial_class for hint in _BROWSER_CLASS_HINTS + ): + return True + + return "browser" in title or "chrome" in title or "firefox" in title + + +class SystemIntegration(Protocol): + @property + def is_hyprland(self) -> bool: ... + + def send_shortcut(self, *, mod: str, key: str) -> bool: ... + + def active_window(self) -> dict[str, object] | None: ... + + def cursor_position(self) -> tuple[int, int] | None: ... + + def move_cursor(self, *, x: int, y: int) -> bool: ... + + def switch_workspace(self, direction: str) -> bool: ... + + def is_text_input_focused(self) -> bool | None: ... + + def send_enter_via_accessibility(self) -> bool | None: ... + + def is_terminal_window_active(self) -> bool | None: ... + + def paste_shortcuts( + self, *, terminal_active: bool + ) -> tuple[tuple[str, str], ...]: ... + + +class NoopSystemIntegration: + @property + def is_hyprland(self) -> bool: + return False + + def send_shortcut(self, *, mod: str, key: str) -> bool: + del mod + del key + return False + + def active_window(self) -> dict[str, object] | None: + return None + + def cursor_position(self) -> tuple[int, int] | None: + return None + + def move_cursor(self, *, x: int, y: int) -> bool: + del x + del y + return False + + def switch_workspace(self, direction: str) -> bool: + del direction + return False + + def is_text_input_focused(self) -> bool | None: + return None + + def send_enter_via_accessibility(self) -> bool | None: + return None + + def is_terminal_window_active(self) -> bool | None: + return None + + def paste_shortcuts(self, *, terminal_active: bool) -> tuple[tuple[str, str], ...]: + del terminal_active + return () + + +class HyprlandSystemIntegration: + @property + def is_hyprland(self) -> bool: + return True + + def send_shortcut(self, *, mod: str, key: str) -> bool: + mod_part = mod.strip().upper() + if mod_part: + arg = f"{mod_part}, {key}, activewindow" + else: + arg = f", {key}, activewindow" + return self._dispatch(["sendshortcut", arg], timeout=1.0) + + def active_window(self) -> dict[str, object] | None: + return self._query_json(["activewindow"], timeout=1.0) + + def cursor_position(self) -> tuple[int, int] | None: + payload = self._query_json(["cursorpos"], timeout=0.8) + if payload is None: + return None + + x_raw = payload.get("x") + y_raw = payload.get("y") + if not isinstance(x_raw, int | float) or not isinstance(y_raw, int | float): + return None + + return int(x_raw), int(y_raw) + + def move_cursor(self, *, x: int, y: int) -> bool: + return self._dispatch(["movecursor", str(x), str(y)], timeout=0.8) + + def switch_workspace(self, direction: str) -> bool: + workspace_arg = "e-1" if direction == "left" else "e+1" + return self._dispatch(["workspace", workspace_arg], timeout=1.0) + + def is_text_input_focused(self) -> bool | None: + return probe_text_input_focus_via_atspi() + + def send_enter_via_accessibility(self) -> bool | None: + return probe_send_enter_via_atspi() + + def is_terminal_window_active(self) -> bool | None: + payload = self.active_window() + if payload is None: + return False + return is_terminal_window_payload(payload) + + def paste_shortcuts(self, *, terminal_active: bool) -> tuple[tuple[str, str], ...]: + if terminal_active: + return ( + ("CTRL SHIFT", "V"), + ("SHIFT", "Insert"), + ("CTRL", "V"), + ) + return (("CTRL", "V"),) + + @staticmethod + def _dispatch(args: list[str], *, timeout: float) -> bool: + try: + proc = subprocess.run( + ["hyprctl", "dispatch", *args], + capture_output=True, + text=True, + check=False, + timeout=timeout, + ) + except (OSError, subprocess.TimeoutExpired): + return False + + return proc.returncode == 0 and proc.stdout.strip() == "ok" + + @staticmethod + def _query_json(args: list[str], *, timeout: float) -> dict[str, object] | None: + try: + proc = subprocess.run( + ["hyprctl", "-j", *args], + capture_output=True, + text=True, + check=False, + timeout=timeout, + ) + except (OSError, subprocess.TimeoutExpired): + return None + + if proc.returncode != 0: + return None + + try: + payload_obj = cast(object, json.loads(proc.stdout)) + except json.JSONDecodeError: + return None + + if not isinstance(payload_obj, dict): + return None + + return cast(dict[str, object], payload_obj) + + +def detect_hyprland_session(*, env: Mapping[str, str] | None = None) -> bool: + source = env if env is not None else os.environ + desktop = source.get("XDG_CURRENT_DESKTOP", "") + if "hyprland" in desktop.lower(): + return True + return bool(source.get("HYPRLAND_INSTANCE_SIGNATURE")) + + +def create_system_integration( + *, + env: Mapping[str, str] | None = None, + platform_name: str | None = None, +) -> SystemIntegration: + if detect_hyprland_session(env=env): + return HyprlandSystemIntegration() + + _ = platform_name if platform_name is not None else sys.platform + + return NoopSystemIntegration() + + +def probe_text_input_focus_via_atspi(*, timeout_s: float = 1.5) -> bool: + script = ( + "import gi\n" + "gi.require_version('Atspi', '2.0')\n" + "from gi.repository import Atspi\n" + "obj = Atspi.get_desktop(0).get_focus()\n" + "editable = False\n" + "role = ''\n" + "if obj is not None:\n" + " role = obj.get_role_name().lower()\n" + " attrs = obj.get_attributes() or []\n" + " for it in attrs:\n" + " s = str(it).lower()\n" + " if s == 'editable:true' or s.endswith(':editable:true'):\n" + " editable = True\n" + " break\n" + "roles = {'text', 'entry', 'password text', 'terminal', 'paragraph', 'document text', 'document web'}\n" + "print('1' if editable or role in roles else '0')\n" + ) + + try: + proc = subprocess.run( + ["python3", "-c", script], + capture_output=True, + text=True, + check=False, + timeout=timeout_s, + ) + except (OSError, subprocess.TimeoutExpired): + return False + + return proc.returncode == 0 and proc.stdout.strip() == "1" + + +def load_atspi_module() -> object | None: + try: + gi = importlib.import_module("gi") + require_version = cast(_RequireVersionFn, getattr(gi, "require_version")) + require_version("Atspi", "2.0") + atspi_repo = cast(object, importlib.import_module("gi.repository")) + return cast(object, getattr(atspi_repo, "Atspi")) + except Exception: + return None + + +def probe_send_enter_via_atspi( + *, atspi_module: object | None = None, lazy_load: bool = True +) -> bool: + module = atspi_module + if module is None and lazy_load: + module = load_atspi_module() + if module is None: + return False + + try: + key_synth = cast(object, getattr(module, "KeySynthType")) + press_release = cast(object, getattr(key_synth, "PRESSRELEASE")) + generate_keyboard_event = cast( + _GenerateKeyboardEventFn, + getattr(module, "generate_keyboard_event"), + ) + return bool(generate_keyboard_event(65293, None, press_release)) + except Exception: + return False + + +class _GenerateKeyboardEventFn(Protocol): + def __call__( + self, + keyval: int, + keystring: str | None, + synth_type: object, + ) -> bool: ... + + +class _RequireVersionFn(Protocol): + def __call__(self, namespace: str, version: str) -> None: ... diff --git a/vibemouse/system_integration.py b/vibemouse/system_integration.py index 9d9f43c..b7462f4 100644 --- a/vibemouse/system_integration.py +++ b/vibemouse/system_integration.py @@ -1,347 +1,4 @@ -from __future__ import annotations +from importlib import import_module as _import_module +import sys as _sys -import importlib -import json -import os -import subprocess -import sys -from collections.abc import Mapping -from typing import Protocol, cast - - -_TERMINAL_CLASS_HINTS: set[str] = { - "foot", - "kitty", - "alacritty", - "wezterm", - "ghostty", - "gnome-terminal", - "gnome-terminal-server", - "konsole", - "tilix", - "xterm", - "terminator", - "xfce4-terminal", - "urxvt", - "st", - "tabby", - "hyper", - "warp", - "windowsterminal", - "wt", -} - -_TERMINAL_TITLE_HINTS: set[str] = { - "terminal", - "tmux", - "bash", - "zsh", - "fish", - "powershell", - "cmd.exe", -} - -_BROWSER_CLASS_HINTS: set[str] = { - "firefox", - "zen", - "librewolf", - "waterfox", - "floorp", - "chromium", - "google-chrome", - "chrome", - "brave-browser", - "microsoft-edge", - "vivaldi", - "opera", - "thorium", - "browser", -} - - -def is_terminal_window_payload(payload: Mapping[str, object]) -> bool: - window_class = str(payload.get("class", "")).lower() - initial_class = str(payload.get("initialClass", "")).lower() - title = str(payload.get("title", "")).lower() - - if any( - hint in window_class or hint in initial_class for hint in _TERMINAL_CLASS_HINTS - ): - return True - - return any(hint in title for hint in _TERMINAL_TITLE_HINTS) - - -def is_browser_window_payload(payload: Mapping[str, object]) -> bool: - window_class = str(payload.get("class", "")).lower() - initial_class = str(payload.get("initialClass", "")).lower() - title = str(payload.get("title", "")).lower() - - if any( - hint in window_class or hint in initial_class for hint in _BROWSER_CLASS_HINTS - ): - return True - - return "browser" in title or "chrome" in title or "firefox" in title - - -class SystemIntegration(Protocol): - @property - def is_hyprland(self) -> bool: ... - - def send_shortcut(self, *, mod: str, key: str) -> bool: ... - - def active_window(self) -> dict[str, object] | None: ... - - def cursor_position(self) -> tuple[int, int] | None: ... - - def move_cursor(self, *, x: int, y: int) -> bool: ... - - def switch_workspace(self, direction: str) -> bool: ... - - def is_text_input_focused(self) -> bool | None: ... - - def send_enter_via_accessibility(self) -> bool | None: ... - - def is_terminal_window_active(self) -> bool | None: ... - - def paste_shortcuts( - self, *, terminal_active: bool - ) -> tuple[tuple[str, str], ...]: ... - - -class NoopSystemIntegration: - @property - def is_hyprland(self) -> bool: - return False - - def send_shortcut(self, *, mod: str, key: str) -> bool: - del mod - del key - return False - - def active_window(self) -> dict[str, object] | None: - return None - - def cursor_position(self) -> tuple[int, int] | None: - return None - - def move_cursor(self, *, x: int, y: int) -> bool: - del x - del y - return False - - def switch_workspace(self, direction: str) -> bool: - del direction - return False - - def is_text_input_focused(self) -> bool | None: - return None - - def send_enter_via_accessibility(self) -> bool | None: - return None - - def is_terminal_window_active(self) -> bool | None: - return None - - def paste_shortcuts(self, *, terminal_active: bool) -> tuple[tuple[str, str], ...]: - del terminal_active - return () - - -class HyprlandSystemIntegration: - @property - def is_hyprland(self) -> bool: - return True - - def send_shortcut(self, *, mod: str, key: str) -> bool: - mod_part = mod.strip().upper() - if mod_part: - arg = f"{mod_part}, {key}, activewindow" - else: - arg = f", {key}, activewindow" - return self._dispatch(["sendshortcut", arg], timeout=1.0) - - def active_window(self) -> dict[str, object] | None: - return self._query_json(["activewindow"], timeout=1.0) - - def cursor_position(self) -> tuple[int, int] | None: - payload = self._query_json(["cursorpos"], timeout=0.8) - if payload is None: - return None - - x_raw = payload.get("x") - y_raw = payload.get("y") - if not isinstance(x_raw, int | float) or not isinstance(y_raw, int | float): - return None - - return int(x_raw), int(y_raw) - - def move_cursor(self, *, x: int, y: int) -> bool: - return self._dispatch(["movecursor", str(x), str(y)], timeout=0.8) - - def switch_workspace(self, direction: str) -> bool: - workspace_arg = "e-1" if direction == "left" else "e+1" - return self._dispatch(["workspace", workspace_arg], timeout=1.0) - - def is_text_input_focused(self) -> bool | None: - return probe_text_input_focus_via_atspi() - - def send_enter_via_accessibility(self) -> bool | None: - return probe_send_enter_via_atspi() - - def is_terminal_window_active(self) -> bool | None: - payload = self.active_window() - if payload is None: - return False - return is_terminal_window_payload(payload) - - def paste_shortcuts(self, *, terminal_active: bool) -> tuple[tuple[str, str], ...]: - if terminal_active: - return ( - ("CTRL SHIFT", "V"), - ("SHIFT", "Insert"), - ("CTRL", "V"), - ) - return (("CTRL", "V"),) - - @staticmethod - def _dispatch(args: list[str], *, timeout: float) -> bool: - try: - proc = subprocess.run( - ["hyprctl", "dispatch", *args], - capture_output=True, - text=True, - check=False, - timeout=timeout, - ) - except (OSError, subprocess.TimeoutExpired): - return False - - return proc.returncode == 0 and proc.stdout.strip() == "ok" - - @staticmethod - def _query_json(args: list[str], *, timeout: float) -> dict[str, object] | None: - try: - proc = subprocess.run( - ["hyprctl", "-j", *args], - capture_output=True, - text=True, - check=False, - timeout=timeout, - ) - except (OSError, subprocess.TimeoutExpired): - return None - - if proc.returncode != 0: - return None - - try: - payload_obj = cast(object, json.loads(proc.stdout)) - except json.JSONDecodeError: - return None - - if not isinstance(payload_obj, dict): - return None - - return cast(dict[str, object], payload_obj) - - -def detect_hyprland_session(*, env: Mapping[str, str] | None = None) -> bool: - source = env if env is not None else os.environ - desktop = source.get("XDG_CURRENT_DESKTOP", "") - if "hyprland" in desktop.lower(): - return True - return bool(source.get("HYPRLAND_INSTANCE_SIGNATURE")) - - -def create_system_integration( - *, - env: Mapping[str, str] | None = None, - platform_name: str | None = None, -) -> SystemIntegration: - if detect_hyprland_session(env=env): - return HyprlandSystemIntegration() - - _ = platform_name if platform_name is not None else sys.platform - - return NoopSystemIntegration() - - -def probe_text_input_focus_via_atspi(*, timeout_s: float = 1.5) -> bool: - script = ( - "import gi\n" - "gi.require_version('Atspi', '2.0')\n" - "from gi.repository import Atspi\n" - "obj = Atspi.get_desktop(0).get_focus()\n" - "editable = False\n" - "role = ''\n" - "if obj is not None:\n" - " role = obj.get_role_name().lower()\n" - " attrs = obj.get_attributes() or []\n" - " for it in attrs:\n" - " s = str(it).lower()\n" - " if s == 'editable:true' or s.endswith(':editable:true'):\n" - " editable = True\n" - " break\n" - "roles = {'text', 'entry', 'password text', 'terminal', 'paragraph', 'document text', 'document web'}\n" - "print('1' if editable or role in roles else '0')\n" - ) - - try: - proc = subprocess.run( - ["python3", "-c", script], - capture_output=True, - text=True, - check=False, - timeout=timeout_s, - ) - except (OSError, subprocess.TimeoutExpired): - return False - - return proc.returncode == 0 and proc.stdout.strip() == "1" - - -def load_atspi_module() -> object | None: - try: - gi = importlib.import_module("gi") - require_version = cast(_RequireVersionFn, getattr(gi, "require_version")) - require_version("Atspi", "2.0") - atspi_repo = cast(object, importlib.import_module("gi.repository")) - return cast(object, getattr(atspi_repo, "Atspi")) - except Exception: - return None - - -def probe_send_enter_via_atspi( - *, atspi_module: object | None = None, lazy_load: bool = True -) -> bool: - module = atspi_module - if module is None and lazy_load: - module = load_atspi_module() - if module is None: - return False - - try: - key_synth = cast(object, getattr(module, "KeySynthType")) - press_release = cast(object, getattr(key_synth, "PRESSRELEASE")) - generate_keyboard_event = cast( - _GenerateKeyboardEventFn, - getattr(module, "generate_keyboard_event"), - ) - return bool(generate_keyboard_event(65293, None, press_release)) - except Exception: - return False - - -class _GenerateKeyboardEventFn(Protocol): - def __call__( - self, - keyval: int, - keystring: str | None, - synth_type: object, - ) -> bool: ... - - -class _RequireVersionFn(Protocol): - def __call__(self, namespace: str, version: str) -> None: ... +_sys.modules[__name__] = _import_module("vibemouse.platform.system_integration") diff --git a/vibemouse/transcriber.py b/vibemouse/transcriber.py index 4e90436..b2b8bc7 100644 --- a/vibemouse/transcriber.py +++ b/vibemouse/transcriber.py @@ -1,300 +1,4 @@ -from __future__ import annotations +from importlib import import_module as _import_module +import sys as _sys -import importlib -import logging -import re -from pathlib import Path -from threading import Lock -from typing import Protocol, cast - -from vibemouse.config import AppConfig - -_LOG = logging.getLogger(__name__) - - -class SenseVoiceTranscriber: - def __init__(self, config: AppConfig) -> None: - self._config: AppConfig = config - self._transcriber: _TranscriberProtocol | None = None - self._transcriber_lock: Lock = Lock() - self.device_in_use: str = config.device - self.backend_in_use: str = "unknown" - - def transcribe(self, audio_path: Path) -> str: - self._ensure_transcriber_loaded() - if self._transcriber is None: - raise RuntimeError("SenseVoice transcriber is not initialized") - return self._transcriber.transcribe(audio_path) - - def prewarm(self) -> None: - self._ensure_transcriber_loaded() - - def _ensure_transcriber_loaded(self) -> None: - if self._transcriber is not None: - return - - with self._transcriber_lock: - if self._transcriber is not None: - return - - backend = self._config.transcriber_backend - if backend in {"auto", "funasr"}: - _LOG.warning( - "Backend %r is deprecated; using 'funasr_onnx' instead", backend - ) - backend = "funasr_onnx" - - if backend != "funasr_onnx": - raise RuntimeError(f"Unsupported backend {backend!r}. Use funasr_onnx.") - - self._build_funasr_onnx_backend() - return - - def _build_funasr_onnx_backend(self) -> None: - backend = _FunASRONNXBackend(self._config) - self._transcriber = backend - self.device_in_use = backend.device_in_use - self.backend_in_use = "funasr_onnx" - - -class _FunASRONNXBackend: - def __init__(self, config: AppConfig) -> None: - self._config: AppConfig = config - self._model: _ONNXSenseVoiceModel | None = None - self._postprocess: _PostprocessFn | None = None - self._load_lock: Lock = Lock() - self.device_in_use: str = "cpu" - self._ensure_model_loaded() - - def transcribe(self, audio_path: Path) -> str: - if self._model is None: - raise RuntimeError("funasr_onnx SenseVoice model is not initialized") - if self._postprocess is None: - raise RuntimeError("funasr postprocess function is not initialized") - - textnorm = "withitn" if self._config.use_itn else "woitn" - result = self._model( - str(audio_path), - language=self._config.language, - textnorm=textnorm, - ) - if not result: - return "" - - raw_text = result[0] - return self._postprocess(raw_text).strip() - - def _ensure_model_loaded(self) -> None: - if self._model is not None: - return - - with self._load_lock: - if self._model is not None: - return - try: - SenseVoiceSmall = self._load_onnx_class() - postprocess = self._load_postprocess() - except Exception as error: - raise RuntimeError( - "funasr_onnx backend requires funasr-onnx package" - ) from error - - requested_path = self._resolve_onnx_model_dir() - self._ensure_tokenizer_file(requested_path) - device_id = self._resolve_onnx_device_id(self._config.device) - - try: - model = SenseVoiceSmall( - model_dir=str(requested_path), - batch_size=1, - device_id=device_id, - quantize=True, - cache_dir=None, - ) - self._model = model - self._postprocess = postprocess - self.device_in_use = self._resolve_device_label(self._config.device) - _LOG.info( - "Loaded funasr_onnx model: device_in_use=%s model=%s", - self.device_in_use, - requested_path, - ) - return - except Exception as primary_error: - if not self._config.fallback_to_cpu: - raise RuntimeError( - f"Failed to load funasr_onnx backend on {self._config.device}: {primary_error}" - ) from primary_error - - try: - model = SenseVoiceSmall( - model_dir=str(requested_path), - batch_size=1, - device_id="-1", - quantize=True, - cache_dir=None, - ) - except Exception as cpu_error: - raise RuntimeError( - f"Failed to load funasr_onnx backend on {self._config.device} and cpu fallback: {cpu_error}" - ) from cpu_error - - self._model = model - self._postprocess = postprocess - self.device_in_use = "cpu" - _LOG.warning( - "Loaded funasr_onnx model with CPU fallback after device load failure" - ) - - def _resolve_onnx_model_dir(self) -> Path: - raw_model = self._config.model_name - canonical_model = raw_model - if raw_model == "iic/SenseVoiceSmall": - canonical_model = "iic/SenseVoiceSmall-onnx" - - if canonical_model.startswith("iic/"): - return self._download_modelscope_snapshot(canonical_model) - - path_candidate = Path(canonical_model) - if not path_candidate.exists(): - return path_candidate - - if self._contains_onnx_model(path_candidate): - return path_candidate - - raise RuntimeError( - f"ONNX model directory {path_candidate} exists but model_quant.onnx/model.onnx is missing" - ) - - @staticmethod - def _contains_onnx_model(model_dir: Path) -> bool: - return (model_dir / "model_quant.onnx").exists() or ( - model_dir / "model.onnx" - ).exists() - - @staticmethod - def _download_modelscope_snapshot(model_id: str) -> Path: - try: - snapshot_mod = importlib.import_module("modelscope.hub.snapshot_download") - except Exception as error: - raise RuntimeError( - "modelscope is required to download ONNX model snapshots" - ) from error - - snapshot_download = cast( - _SnapshotDownloadFn, - getattr(snapshot_mod, "snapshot_download"), - ) - snapshot_path = snapshot_download(model_id) - model_dir = Path(snapshot_path) - if not model_dir.exists(): - raise RuntimeError(f"Downloaded model path does not exist: {snapshot_path}") - if not _FunASRONNXBackend._contains_onnx_model(model_dir): - raise RuntimeError( - f"Downloaded model {model_id} missing model_quant.onnx/model.onnx" - ) - return model_dir - - @staticmethod - def _resolve_onnx_device_id(device: str) -> str: - normalized = device.strip().lower() - if normalized == "cpu": - return "-1" - if normalized.startswith("cuda"): - parts = normalized.split(":", 1) - return parts[1] if len(parts) > 1 and parts[1] else "0" - return "-1" - - @staticmethod - def _resolve_device_label(device: str) -> str: - normalized = device.strip().lower() - if normalized.startswith("cuda"): - return normalized - return "cpu" - - def _ensure_tokenizer_file(self, model_dir: Path) -> None: - target = model_dir / "chn_jpn_yue_eng_ko_spectok.bpe.model" - if target.exists(): - return - - fallback = ( - Path.home() - / ".cache/modelscope/hub/models/iic/SenseVoiceSmall/chn_jpn_yue_eng_ko_spectok.bpe.model" - ) - if fallback.exists(): - model_dir.mkdir(parents=True, exist_ok=True) - _ = target.write_bytes(fallback.read_bytes()) - return - - raise RuntimeError( - "Tokenizer file chn_jpn_yue_eng_ko_spectok.bpe.model is missing and no fallback was found" - ) - - @staticmethod - def _load_onnx_class() -> _ONNXSenseVoiceCtor: - module = importlib.import_module("funasr_onnx") - return cast(_ONNXSenseVoiceCtor, getattr(module, "SenseVoiceSmall")) - - @staticmethod - def _load_postprocess() -> _PostprocessFn: - try: - post_module = importlib.import_module("funasr.utils.postprocess_utils") - return cast( - _PostprocessFn, - getattr(post_module, "rich_transcription_postprocess"), - ) - except Exception: - try: - post_module = importlib.import_module( - "funasr_onnx.utils.postprocess_utils" - ) - return cast( - _PostprocessFn, - getattr(post_module, "rich_transcription_postprocess"), - ) - except Exception: - return _strip_sensevoice_control_tokens - - -_SENSEVOICE_CONTROL_TOKEN_RE = re.compile(r"<\|[^|>]+\|>") - - -def _strip_sensevoice_control_tokens(text: str) -> str: - cleaned = _SENSEVOICE_CONTROL_TOKEN_RE.sub("", text) - return " ".join(cleaned.split()).strip() - - -class _TranscriberProtocol(Protocol): - device_in_use: str - - def transcribe(self, audio_path: Path) -> str: ... - - -class _PostprocessFn(Protocol): - def __call__(self, text: str) -> str: ... - - -class _ONNXSenseVoiceModel(Protocol): - def __call__( - self, - wav_content: str, - *, - language: str, - textnorm: str, - ) -> list[str]: ... - - -class _ONNXSenseVoiceCtor(Protocol): - def __call__( - self, - *, - model_dir: str, - batch_size: int, - device_id: str, - quantize: bool, - cache_dir: str | None, - ) -> _ONNXSenseVoiceModel: ... - - -class _SnapshotDownloadFn(Protocol): - def __call__(self, model_id: str) -> str: ... +_sys.modules[__name__] = _import_module("vibemouse.core.transcriber")